From 2f6ebacaea7851be5c17a970514769dd3e9735e9 Mon Sep 17 00:00:00 2001 From: Shulhan Date: Thu, 8 Jan 2026 04:29:18 +0700 Subject: all: implement conversion for SPDX-License-Identifier If the file contains "SPDX-License-Identifier", it will not modify it. The program will move the identifier to the top of file after shebang. If the spdxconv.cfg contains match-license, and the pattern match with one of the line in the file, it will use the license_identifier instead of default one and insert it at the top, after shebang. If the files does not contains the identifier, it will insert new one based on default value in spdxconv.cfg file. --- config.go | 8 +- config_match_license.go | 27 ++++++ file.go | 215 +++++++++++++++++++++++++++++++++++++++++++++ file_test.go | 107 ++++++++++++++++++++++ spdxconv.go | 25 +++++- spdxconv_test.go | 8 +- testdata/file/.gitignore | 2 + testdata/file_test.txt | 108 +++++++++++++++++++++++ testdata/scan/spdxconv.cfg | 4 + 9 files changed, 499 insertions(+), 5 deletions(-) create mode 100644 file.go create mode 100644 file_test.go create mode 100644 testdata/file/.gitignore create mode 100644 testdata/file_test.txt create mode 100644 testdata/scan/spdxconv.cfg diff --git a/config.go b/config.go index 962b0ad..1268cb8 100644 --- a/config.go +++ b/config.go @@ -9,7 +9,7 @@ type config struct { LicenseIdentifier string `ini:"default::license_identifier"` FileCopyrightText string `ini:"default::file_copyright_text"` - MatchLicense []configMatchLicense `ini:"match-license"` + MatchLicense []*configMatchLicense `ini:"match-license"` MaxLineMatch int `ini:"default::max_line_match"` } @@ -25,5 +25,11 @@ func (cfg *config) init() (err error) { if cfg.MaxLineMatch <= 0 { cfg.MaxLineMatch = defMaxLineMatch } + for _, cml := range cfg.MatchLicense { + err = cml.init() + if err != nil { + return err + } + } return nil } diff --git a/config_match_license.go b/config_match_license.go index 11e09fe..c027019 100644 --- a/config_match_license.go +++ b/config_match_license.go @@ -3,7 +3,15 @@ package spdxconv +import ( + "fmt" + "regexp" +) + type configMatchLicense struct { + rePattern *regexp.Regexp + reDeleteLine []*regexp.Regexp + // Pattern to be searched in file. Pattern string `ini:"match-license::pattern"` @@ -21,3 +29,22 @@ type configMatchLicense struct { // be deleted. DeleteMatch bool `ini:"match-license::delete_match"` } + +func (cml *configMatchLicense) init() (err error) { + var logp = `match-license` + if cml.Pattern != `` { + cml.rePattern, err = regexp.Compile(cml.Pattern) + if err != nil { + return fmt.Errorf(`%s: pattern %q: %w`, logp, cml.Pattern, err) + } + } + cml.reDeleteLine = make([]*regexp.Regexp, len(cml.DeleteLinePattern)) + for x, pattern := range cml.DeleteLinePattern { + re, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf(`%s: delete_line_pattern %q: %w`, logp, pattern, err) + } + cml.reDeleteLine[x] = re + } + return nil +} diff --git a/file.go b/file.go new file mode 100644 index 0000000..b8a5630 --- /dev/null +++ b/file.go @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan + +package spdxconv + +import ( + "bytes" + "fmt" + "os" + "regexp" + "slices" +) + +// reLicenseID regex to detect SPDX license identifier with or without +// comment prefix. +var reLicenseID = regexp.MustCompile(`^(//+|#+|/\*+|` + return + } + } +} + +// applyLicenseID check and insert the SPDX-License-Identifier. +// +// Its detect if SPDX-License-Identifer exist at the top or bottom of +// the file. +// If one found at the top, but not at the first line, or at the +// bottom, move it to the first line, after shebang. +func (f *file) applyLicenseID(conv *SPDXConv) { + var licenseID string + + for _, cml := range conv.cfg.MatchLicense { + for x, line := range f.topLines { + if reLicenseID.Match(line) { + f.idxLicenseID = x + if f.hasSheBang && x == 1 { + return + } + if x == 0 { + return + } + f.topLines = slices.Delete(f.topLines, x, x+1) + f.insertLicenseID(line) + return + } + if cml.rePattern.Match(line) { + licenseID = cml.LicenseIdentifier + if cml.DeleteMatch { + f.topLines = slices.Delete(f.topLines, x, x+1) + } + f.deleteLinePattern(f.topLines[x:], cml.reDeleteLine) + } + } + if licenseID != `` { + break + } + for x, line := range f.bottomLines { + if reLicenseID.Match(line) { + f.bottomLines = slices.Delete(f.bottomLines, x, x+1) + f.insertLicenseID(line) + return + } + if cml.rePattern.Match(line) { + licenseID = cml.LicenseIdentifier + if cml.DeleteMatch { + f.bottomLines = slices.Delete(f.bottomLines, x, x+1) + } + f.deleteLinePattern(f.bottomLines[x:], cml.reDeleteLine) + } + } + if licenseID != `` { + break + } + } + if licenseID == `` { + licenseID = conv.cfg.LicenseIdentifier + } + line := fmt.Sprintf("%sSPDX-License-Identifier: %s%s", + f.commentPrefix, licenseID, f.commentSuffix) + f.insertLicenseID([]byte(line)) +} + +// insertEmptyLine insert empty line after SPDX identifiers or any comments after it. +func (f *file) insertEmptyLine() { + if f.idxLicenseID < 0 || f.commentPrefix == `` { + // No license ID inserted. + return + } + comment := []byte(f.commentPrefix) + comment = comment[:len(comment)-1] // Remove space. + for x, line := range f.topLines[f.idxLicenseID:] { + if bytes.HasPrefix(line, comment) { + continue + } + line = bytes.TrimSpace(line) + if len(line) == 0 { + // There is already empty line. + return + } + f.topLines = slices.Insert(f.topLines, x, []byte{}) + return + } +} + +// insertLicenseID insert the license identifier `line` at the top of the +// file and below the shebang "#!" if its exists. +func (f *file) insertLicenseID(line []byte) { + if f.hasSheBang { + f.topLines = slices.Insert(f.topLines, 1, line) + f.idxLicenseID = 1 + } else { + f.topLines = slices.Insert(f.topLines, 0, line) + f.idxLicenseID = 0 + } +} + +func (f *file) deleteLinePattern(lines [][]byte, reDeleteLine []*regexp.Regexp) { + for _, re := range reDeleteLine { + for x, line := range lines { + if re.Match(line) { + lines = slices.Delete(lines, x, x+1) + break + } + } + } +} + +func (f *file) write() (err error) { + var finfo os.FileInfo + finfo, err = os.Stat(f.path) + if err != nil { + return fmt.Errorf(`write: %w`, err) + } + + lines := slices.Concat(f.topLines, f.lines, f.bottomLines) + content := bytes.Join(lines, []byte{'\n'}) + content = bytes.TrimRight(content, "\n") + err = os.WriteFile(f.path, content, finfo.Mode()) + if err != nil { + return fmt.Errorf(`write: %w`, err) + } + return nil +} diff --git a/file_test.go b/file_test.go new file mode 100644 index 0000000..31e2a3d --- /dev/null +++ b/file_test.go @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan + +package spdxconv + +import ( + "os" + "testing" + + "git.sr.ht/~shulhan/pakakeh.go/lib/test" +) + +func TestFile_all(t *testing.T) { + // Populate the test files from `testdata/file_test.txt`. + + var testData *test.Data + var err error + testData, err = test.LoadData(`testdata/file_test.txt`) + if err != nil { + t.Fatal(err) + } + var tempDir = `testdata/file/` + testData.ExtractInput(tempDir) + t.Chdir(tempDir) + + // Inititalize the SPDXConv instance. + + conv, err := New(`.`) + if err != nil { + t.Fatal(err) + } + + for input, _ := range testData.Input { + if input == `spdxconv.cfg` { + continue + } + + f, err := newFile(input, conv.cfg.MaxLineMatch) + if err != nil { + t.Fatal(err) + } + f.apply(conv) + err = f.write() + if err != nil { + t.Fatal(err) + } + got, err := os.ReadFile(input) + if err != nil { + t.Fatal(err) + } + test.Assert(t, input+`: after`, + string(testData.Output[input]), string(got)) + } +} + +func TestFile_detectComment(t *testing.T) { + type testCase struct { + topLines [][]byte + expFile file + } + listCase := []testCase{{ + topLines: [][]byte{ + []byte(`#!/bin/sh`), + }, + expFile: file{ + commentPrefix: `# `, + hasSheBang: true, + }, + }, { + topLines: [][]byte{ + []byte(`# comment`), + }, + expFile: file{ + commentPrefix: `# `, + }, + }, { + topLines: [][]byte{ + []byte(`// comment`), + }, + expFile: file{ + commentPrefix: `// `, + }, + }, { + topLines: [][]byte{ + []byte(`/*`), + }, + expFile: file{ + commentPrefix: `// `, + }, + }, { + topLines: [][]byte{ + []byte(``, + }, + }} + for _, tc := range listCase { + f := file{ + topLines: tc.topLines, + } + f.detectComment() + f.topLines = nil + test.Assert(t, string(tc.topLines[0]), tc.expFile, f) + } +} diff --git a/spdxconv.go b/spdxconv.go index b303070..3aaabf3 100644 --- a/spdxconv.go +++ b/spdxconv.go @@ -56,7 +56,10 @@ func Apply(path string) (err error) { var pathFile string for _, pathFile = range listFile { - conv.convert(pathFile) + err = conv.apply(pathFile) + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } } return nil } @@ -179,6 +182,7 @@ func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error) var commonIgnore = map[string]struct{}{ `.git`: struct{}{}, `node_modules`: struct{}{}, + `spdxconv.cfg`: struct{}{}, `vendor`: struct{}{}, } var suffixLicense = `.license` @@ -232,7 +236,22 @@ func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error) return listFile, nil } -// convert the given pathFile to SPDX license format. -func (conv *SPDXConv) convert(pathFile string) { +// apply check and insert the SPDX identifier to file `pathFile`. +func (conv *SPDXConv) apply(pathFile string) (err error) { + var logp = `apply` + var f *file + + f, err = newFile(pathFile, conv.cfg.MaxLineMatch) + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } + + f.apply(conv) + + err = f.write() + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } + return nil } diff --git a/spdxconv_test.go b/spdxconv_test.go index 40bcc16..d2c7494 100644 --- a/spdxconv_test.go +++ b/spdxconv_test.go @@ -4,6 +4,7 @@ package spdxconv import ( + "regexp" "testing" "git.sr.ht/~shulhan/pakakeh.go/lib/git" @@ -71,7 +72,7 @@ func TestSPDXConv_loadConfig(t *testing.T) { LicenseIdentifier: `GPL-3.0-only`, FileCopyrightText: `Author `, MaxLineMatch: 10, - MatchLicense: []configMatchLicense{{ + MatchLicense: []*configMatchLicense{{ Pattern: `^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`, LicenseIdentifier: `BSD-3-Clause`, DeleteLinePattern: []string{ @@ -79,6 +80,11 @@ func TestSPDXConv_loadConfig(t *testing.T) { `^(//+|#+)*\s+license that(.*)$`, }, DeleteMatch: true, + rePattern: regexp.MustCompile(`^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`), + reDeleteLine: []*regexp.Regexp{ + regexp.MustCompile(`^(//+|#+)*\s*$`), + regexp.MustCompile(`^(//+|#+)*\s+license that(.*)$`), + }, }}, }, }} diff --git a/testdata/file/.gitignore b/testdata/file/.gitignore new file mode 100644 index 0000000..1e37400 --- /dev/null +++ b/testdata/file/.gitignore @@ -0,0 +1,2 @@ +/** +!/.gitignore diff --git a/testdata/file_test.txt b/testdata/file_test.txt new file mode 100644 index 0000000..dad7b85 --- /dev/null +++ b/testdata/file_test.txt @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan + +>>> spdxconv.cfg +# SPDX-License-Identifier: BSD-3-Clause +# SPDX-FileCopyrightText: 2025 M. Shulhan + +[default] +license_identifier = GPL-3.0-only +file_copyright_text = M. Shulhan + +[match-license] +pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$" +license_identifier = BSD-3-Clause +delete_match = true +delete_line_pattern = "^(//+|#+)\\s*$" +delete_line_pattern = "^(//+|#+)\\s+license that(.*)$" + +[match-copyright] +pattern = "^(//+|#+)\\s+Copyright\\s+(?\\d{4}),?\\s+(?.*)\\s+<*(?.*)>.*$" + +>>> without_spdx_license_id.txt +1 +2 +3 + +<<< without_spdx_license_id.txt +SPDX-License-Identifier: GPL-3.0-only +1 +2 +3 + +>>> with_spdx_at_bottom.txt +1 +2 +3 +// SPDX-License-Identifier: GPL-3.0-only + +<<< with_spdx_at_bottom.txt +// SPDX-License-Identifier: GPL-3.0-only + +1 +2 +3 + +>>> with_spdx_license_id_only.txt +// SPDX-License-Identifier: GPL-3.0-only +1 +2 +3 + +<<< with_spdx_license_id_only.txt +// SPDX-License-Identifier: GPL-3.0-only + +1 +2 +3 + +>>> with_no_order.txt +// SPDX-FileCopyrightText: 2025 M. Shulhan +// SPDX-License-Identifier: BSD-3-Clause + +1 +2 +3 + +<<< with_no_order.txt +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2025 M. Shulhan + +1 +2 +3 + +>>> with_match_license.txt +// Copyright 2018, Shulhan . All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +1 +2 +3 + +<<< with_match_license.txt +// SPDX-License-Identifier: BSD-3-Clause +// Copyright 2018, Shulhan . All rights reserved. + +1 +2 +3 + +>>> with_match_license_bottom.txt +1 +2 +3 + +// Copyright 2018, Shulhan . All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +<<< with_match_license_bottom.txt +// SPDX-License-Identifier: BSD-3-Clause + +1 +2 +3 + +// Copyright 2018, Shulhan . All rights reserved. diff --git a/testdata/scan/spdxconv.cfg b/testdata/scan/spdxconv.cfg new file mode 100644 index 0000000..b4e39c6 --- /dev/null +++ b/testdata/scan/spdxconv.cfg @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan + +[default] -- cgit v1.3