diff options
| -rw-r--r-- | config.go | 8 | ||||
| -rw-r--r-- | config_match_license.go | 27 | ||||
| -rw-r--r-- | file.go | 215 | ||||
| -rw-r--r-- | file_test.go | 107 | ||||
| -rw-r--r-- | spdxconv.go | 25 | ||||
| -rw-r--r-- | spdxconv_test.go | 8 | ||||
| -rw-r--r-- | testdata/file/.gitignore | 2 | ||||
| -rw-r--r-- | testdata/file_test.txt | 108 | ||||
| -rw-r--r-- | testdata/scan/spdxconv.cfg | 4 |
9 files changed, 499 insertions, 5 deletions
@@ -9,7 +9,7 @@ type config struct { LicenseIdentifier string `ini:"default::license_identifier"` FileCopyrightText string `ini:"default::file_copyright_text"` - MatchLicense []configMatchLicense `ini:"match-license"` + MatchLicense []*configMatchLicense `ini:"match-license"` MaxLineMatch int `ini:"default::max_line_match"` } @@ -25,5 +25,11 @@ func (cfg *config) init() (err error) { if cfg.MaxLineMatch <= 0 { cfg.MaxLineMatch = defMaxLineMatch } + for _, cml := range cfg.MatchLicense { + err = cml.init() + if err != nil { + return err + } + } return nil } diff --git a/config_match_license.go b/config_match_license.go index 11e09fe..c027019 100644 --- a/config_match_license.go +++ b/config_match_license.go @@ -3,7 +3,15 @@ package spdxconv +import ( + "fmt" + "regexp" +) + type configMatchLicense struct { + rePattern *regexp.Regexp + reDeleteLine []*regexp.Regexp + // Pattern to be searched in file. Pattern string `ini:"match-license::pattern"` @@ -21,3 +29,22 @@ type configMatchLicense struct { // be deleted. DeleteMatch bool `ini:"match-license::delete_match"` } + +func (cml *configMatchLicense) init() (err error) { + var logp = `match-license` + if cml.Pattern != `` { + cml.rePattern, err = regexp.Compile(cml.Pattern) + if err != nil { + return fmt.Errorf(`%s: pattern %q: %w`, logp, cml.Pattern, err) + } + } + cml.reDeleteLine = make([]*regexp.Regexp, len(cml.DeleteLinePattern)) + for x, pattern := range cml.DeleteLinePattern { + re, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf(`%s: delete_line_pattern %q: %w`, logp, pattern, err) + } + cml.reDeleteLine[x] = re + } + return nil +} @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package spdxconv + +import ( + "bytes" + "fmt" + "os" + "regexp" + "slices" +) + +// reLicenseID regex to detect SPDX license identifier with or without +// comment prefix. +var reLicenseID = regexp.MustCompile(`^(//+|#+|/\*+|<!--+)?\s?SPDX-License-Identifier:.*$`) + +type file struct { + path string + + // commentPrefix used as prefix to SPDX identifier. + // The comment prefix is detected automatically from the first N + // lines of file. + commentPrefix string + commentSuffix string + + lines [][]byte + topLines [][]byte + bottomLines [][]byte + + // idxLicenseID index of License-Identifier in the topLines. + idxLicenseID int + + hasSheBang bool +} + +func newFile(path string, maxLine int) (f *file, err error) { + var content []byte + content, err = os.ReadFile(path) + if err != nil { + return nil, err + } + + f = &file{ + path: path, + lines: bytes.Split(content, []byte{'\n'}), + idxLicenseID: -1, + } + nline := len(f.lines) + if nline < maxLine*2 { + f.topLines = f.lines + f.lines = f.lines[nline:] + } else { + f.topLines = f.lines[:maxLine] + f.bottomLines = f.lines[nline-maxLine:] + f.lines = f.lines[maxLine : nline-maxLine] + } + return f, nil +} + +// apply the SPDX identifier to file. +func (f *file) apply(conv *SPDXConv) { + f.detectComment() + f.applyLicenseID(conv) + f.insertEmptyLine() +} + +func (f *file) detectComment() { + if bytes.HasPrefix(f.topLines[0], []byte(`#!`)) { + f.hasSheBang = true + f.commentPrefix = `# ` + return + } + for _, line := range f.topLines { + if bytes.HasPrefix(line, []byte(`#`)) { + f.commentPrefix = `# ` + return + } + if bytes.HasPrefix(line, []byte(`//`)) { + f.commentPrefix = `// ` + return + } + if bytes.HasPrefix(line, []byte(`/*`)) { + f.commentPrefix = `// ` + return + } + if bytes.HasPrefix(line, []byte(`<!--`)) { + f.commentPrefix = `<!-- ` + f.commentSuffix = ` -->` + return + } + } +} + +// applyLicenseID check and insert the SPDX-License-Identifier. +// +// Its detect if SPDX-License-Identifer exist at the top or bottom of +// the file. +// If one found at the top, but not at the first line, or at the +// bottom, move it to the first line, after shebang. +func (f *file) applyLicenseID(conv *SPDXConv) { + var licenseID string + + for _, cml := range conv.cfg.MatchLicense { + for x, line := range f.topLines { + if reLicenseID.Match(line) { + f.idxLicenseID = x + if f.hasSheBang && x == 1 { + return + } + if x == 0 { + return + } + f.topLines = slices.Delete(f.topLines, x, x+1) + f.insertLicenseID(line) + return + } + if cml.rePattern.Match(line) { + licenseID = cml.LicenseIdentifier + if cml.DeleteMatch { + f.topLines = slices.Delete(f.topLines, x, x+1) + } + f.deleteLinePattern(f.topLines[x:], cml.reDeleteLine) + } + } + if licenseID != `` { + break + } + for x, line := range f.bottomLines { + if reLicenseID.Match(line) { + f.bottomLines = slices.Delete(f.bottomLines, x, x+1) + f.insertLicenseID(line) + return + } + if cml.rePattern.Match(line) { + licenseID = cml.LicenseIdentifier + if cml.DeleteMatch { + f.bottomLines = slices.Delete(f.bottomLines, x, x+1) + } + f.deleteLinePattern(f.bottomLines[x:], cml.reDeleteLine) + } + } + if licenseID != `` { + break + } + } + if licenseID == `` { + licenseID = conv.cfg.LicenseIdentifier + } + line := fmt.Sprintf("%sSPDX-License-Identifier: %s%s", + f.commentPrefix, licenseID, f.commentSuffix) + f.insertLicenseID([]byte(line)) +} + +// insertEmptyLine insert empty line after SPDX identifiers or any comments after it. +func (f *file) insertEmptyLine() { + if f.idxLicenseID < 0 || f.commentPrefix == `` { + // No license ID inserted. + return + } + comment := []byte(f.commentPrefix) + comment = comment[:len(comment)-1] // Remove space. + for x, line := range f.topLines[f.idxLicenseID:] { + if bytes.HasPrefix(line, comment) { + continue + } + line = bytes.TrimSpace(line) + if len(line) == 0 { + // There is already empty line. + return + } + f.topLines = slices.Insert(f.topLines, x, []byte{}) + return + } +} + +// insertLicenseID insert the license identifier `line` at the top of the +// file and below the shebang "#!" if its exists. +func (f *file) insertLicenseID(line []byte) { + if f.hasSheBang { + f.topLines = slices.Insert(f.topLines, 1, line) + f.idxLicenseID = 1 + } else { + f.topLines = slices.Insert(f.topLines, 0, line) + f.idxLicenseID = 0 + } +} + +func (f *file) deleteLinePattern(lines [][]byte, reDeleteLine []*regexp.Regexp) { + for _, re := range reDeleteLine { + for x, line := range lines { + if re.Match(line) { + lines = slices.Delete(lines, x, x+1) + break + } + } + } +} + +func (f *file) write() (err error) { + var finfo os.FileInfo + finfo, err = os.Stat(f.path) + if err != nil { + return fmt.Errorf(`write: %w`, err) + } + + lines := slices.Concat(f.topLines, f.lines, f.bottomLines) + content := bytes.Join(lines, []byte{'\n'}) + content = bytes.TrimRight(content, "\n") + err = os.WriteFile(f.path, content, finfo.Mode()) + if err != nil { + return fmt.Errorf(`write: %w`, err) + } + return nil +} diff --git a/file_test.go b/file_test.go new file mode 100644 index 0000000..31e2a3d --- /dev/null +++ b/file_test.go @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package spdxconv + +import ( + "os" + "testing" + + "git.sr.ht/~shulhan/pakakeh.go/lib/test" +) + +func TestFile_all(t *testing.T) { + // Populate the test files from `testdata/file_test.txt`. + + var testData *test.Data + var err error + testData, err = test.LoadData(`testdata/file_test.txt`) + if err != nil { + t.Fatal(err) + } + var tempDir = `testdata/file/` + testData.ExtractInput(tempDir) + t.Chdir(tempDir) + + // Inititalize the SPDXConv instance. + + conv, err := New(`.`) + if err != nil { + t.Fatal(err) + } + + for input, _ := range testData.Input { + if input == `spdxconv.cfg` { + continue + } + + f, err := newFile(input, conv.cfg.MaxLineMatch) + if err != nil { + t.Fatal(err) + } + f.apply(conv) + err = f.write() + if err != nil { + t.Fatal(err) + } + got, err := os.ReadFile(input) + if err != nil { + t.Fatal(err) + } + test.Assert(t, input+`: after`, + string(testData.Output[input]), string(got)) + } +} + +func TestFile_detectComment(t *testing.T) { + type testCase struct { + topLines [][]byte + expFile file + } + listCase := []testCase{{ + topLines: [][]byte{ + []byte(`#!/bin/sh`), + }, + expFile: file{ + commentPrefix: `# `, + hasSheBang: true, + }, + }, { + topLines: [][]byte{ + []byte(`# comment`), + }, + expFile: file{ + commentPrefix: `# `, + }, + }, { + topLines: [][]byte{ + []byte(`// comment`), + }, + expFile: file{ + commentPrefix: `// `, + }, + }, { + topLines: [][]byte{ + []byte(`/*`), + }, + expFile: file{ + commentPrefix: `// `, + }, + }, { + topLines: [][]byte{ + []byte(`<!--`), + }, + expFile: file{ + commentPrefix: `<!-- `, + commentSuffix: ` -->`, + }, + }} + for _, tc := range listCase { + f := file{ + topLines: tc.topLines, + } + f.detectComment() + f.topLines = nil + test.Assert(t, string(tc.topLines[0]), tc.expFile, f) + } +} diff --git a/spdxconv.go b/spdxconv.go index b303070..3aaabf3 100644 --- a/spdxconv.go +++ b/spdxconv.go @@ -56,7 +56,10 @@ func Apply(path string) (err error) { var pathFile string for _, pathFile = range listFile { - conv.convert(pathFile) + err = conv.apply(pathFile) + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } } return nil } @@ -179,6 +182,7 @@ func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error) var commonIgnore = map[string]struct{}{ `.git`: struct{}{}, `node_modules`: struct{}{}, + `spdxconv.cfg`: struct{}{}, `vendor`: struct{}{}, } var suffixLicense = `.license` @@ -232,7 +236,22 @@ func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error) return listFile, nil } -// convert the given pathFile to SPDX license format. -func (conv *SPDXConv) convert(pathFile string) { +// apply check and insert the SPDX identifier to file `pathFile`. +func (conv *SPDXConv) apply(pathFile string) (err error) { + var logp = `apply` + var f *file + + f, err = newFile(pathFile, conv.cfg.MaxLineMatch) + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } + + f.apply(conv) + + err = f.write() + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } + return nil } diff --git a/spdxconv_test.go b/spdxconv_test.go index 40bcc16..d2c7494 100644 --- a/spdxconv_test.go +++ b/spdxconv_test.go @@ -4,6 +4,7 @@ package spdxconv import ( + "regexp" "testing" "git.sr.ht/~shulhan/pakakeh.go/lib/git" @@ -71,7 +72,7 @@ func TestSPDXConv_loadConfig(t *testing.T) { LicenseIdentifier: `GPL-3.0-only`, FileCopyrightText: `Author <author@email.info>`, MaxLineMatch: 10, - MatchLicense: []configMatchLicense{{ + MatchLicense: []*configMatchLicense{{ Pattern: `^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`, LicenseIdentifier: `BSD-3-Clause`, DeleteLinePattern: []string{ @@ -79,6 +80,11 @@ func TestSPDXConv_loadConfig(t *testing.T) { `^(//+|#+)*\s+license that(.*)$`, }, DeleteMatch: true, + rePattern: regexp.MustCompile(`^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`), + reDeleteLine: []*regexp.Regexp{ + regexp.MustCompile(`^(//+|#+)*\s*$`), + regexp.MustCompile(`^(//+|#+)*\s+license that(.*)$`), + }, }}, }, }} diff --git a/testdata/file/.gitignore b/testdata/file/.gitignore new file mode 100644 index 0000000..1e37400 --- /dev/null +++ b/testdata/file/.gitignore @@ -0,0 +1,2 @@ +/** +!/.gitignore diff --git a/testdata/file_test.txt b/testdata/file_test.txt new file mode 100644 index 0000000..dad7b85 --- /dev/null +++ b/testdata/file_test.txt @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +>>> spdxconv.cfg +# SPDX-License-Identifier: BSD-3-Clause +# SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +[default] +license_identifier = GPL-3.0-only +file_copyright_text = M. Shulhan <ms@kilabit.info> + +[match-license] +pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$" +license_identifier = BSD-3-Clause +delete_match = true +delete_line_pattern = "^(//+|#+)\\s*$" +delete_line_pattern = "^(//+|#+)\\s+license that(.*)$" + +[match-copyright] +pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<holder>.*)\\s+<*(?<email>.*)>.*$" + +>>> without_spdx_license_id.txt +1 +2 +3 + +<<< without_spdx_license_id.txt +SPDX-License-Identifier: GPL-3.0-only +1 +2 +3 + +>>> with_spdx_at_bottom.txt +1 +2 +3 +// SPDX-License-Identifier: GPL-3.0-only + +<<< with_spdx_at_bottom.txt +// SPDX-License-Identifier: GPL-3.0-only + +1 +2 +3 + +>>> with_spdx_license_id_only.txt +// SPDX-License-Identifier: GPL-3.0-only +1 +2 +3 + +<<< with_spdx_license_id_only.txt +// SPDX-License-Identifier: GPL-3.0-only + +1 +2 +3 + +>>> with_no_order.txt +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> +// SPDX-License-Identifier: BSD-3-Clause + +1 +2 +3 + +<<< with_no_order.txt +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +1 +2 +3 + +>>> with_match_license.txt +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +1 +2 +3 + +<<< with_match_license.txt +// SPDX-License-Identifier: BSD-3-Clause +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. + +1 +2 +3 + +>>> with_match_license_bottom.txt +1 +2 +3 + +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +<<< with_match_license_bottom.txt +// SPDX-License-Identifier: BSD-3-Clause + +1 +2 +3 + +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. diff --git a/testdata/scan/spdxconv.cfg b/testdata/scan/spdxconv.cfg new file mode 100644 index 0000000..b4e39c6 --- /dev/null +++ b/testdata/scan/spdxconv.cfg @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +[default] |
