diff options
| author | Shulhan <ms@kilabit.info> | 2026-01-08 04:29:18 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2026-01-08 04:29:18 +0700 |
| commit | 2f6ebacaea7851be5c17a970514769dd3e9735e9 (patch) | |
| tree | d34bcd794ae3946d1a5446e7554ec3cfeaa7c40b | |
| parent | 161049a3996c574d521d6d3df55998028eb111c0 (diff) | |
| download | spdxconv-2f6ebacaea7851be5c17a970514769dd3e9735e9.tar.xz | |
all: implement conversion for SPDX-License-Identifier
If the file contains "SPDX-License-Identifier", it will not modify it.
The program will move the identifier to the top of file after shebang.
If the spdxconv.cfg contains match-license, and the pattern match with
one of the line in the file, it will use the license_identifier instead
of default one and insert it at the top, after shebang.
If the files does not contains the identifier, it will insert new one
based on default value in spdxconv.cfg file.
| -rw-r--r-- | config.go | 8 | ||||
| -rw-r--r-- | config_match_license.go | 27 | ||||
| -rw-r--r-- | file.go | 215 | ||||
| -rw-r--r-- | file_test.go | 107 | ||||
| -rw-r--r-- | spdxconv.go | 25 | ||||
| -rw-r--r-- | spdxconv_test.go | 8 | ||||
| -rw-r--r-- | testdata/file/.gitignore | 2 | ||||
| -rw-r--r-- | testdata/file_test.txt | 108 | ||||
| -rw-r--r-- | testdata/scan/spdxconv.cfg | 4 |
9 files changed, 499 insertions, 5 deletions
@@ -9,7 +9,7 @@ type config struct { LicenseIdentifier string `ini:"default::license_identifier"` FileCopyrightText string `ini:"default::file_copyright_text"` - MatchLicense []configMatchLicense `ini:"match-license"` + MatchLicense []*configMatchLicense `ini:"match-license"` MaxLineMatch int `ini:"default::max_line_match"` } @@ -25,5 +25,11 @@ func (cfg *config) init() (err error) { if cfg.MaxLineMatch <= 0 { cfg.MaxLineMatch = defMaxLineMatch } + for _, cml := range cfg.MatchLicense { + err = cml.init() + if err != nil { + return err + } + } return nil } diff --git a/config_match_license.go b/config_match_license.go index 11e09fe..c027019 100644 --- a/config_match_license.go +++ b/config_match_license.go @@ -3,7 +3,15 @@ package spdxconv +import ( + "fmt" + "regexp" +) + type configMatchLicense struct { + rePattern *regexp.Regexp + reDeleteLine []*regexp.Regexp + // Pattern to be searched in file. Pattern string `ini:"match-license::pattern"` @@ -21,3 +29,22 @@ type configMatchLicense struct { // be deleted. DeleteMatch bool `ini:"match-license::delete_match"` } + +func (cml *configMatchLicense) init() (err error) { + var logp = `match-license` + if cml.Pattern != `` { + cml.rePattern, err = regexp.Compile(cml.Pattern) + if err != nil { + return fmt.Errorf(`%s: pattern %q: %w`, logp, cml.Pattern, err) + } + } + cml.reDeleteLine = make([]*regexp.Regexp, len(cml.DeleteLinePattern)) + for x, pattern := range cml.DeleteLinePattern { + re, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf(`%s: delete_line_pattern %q: %w`, logp, pattern, err) + } + cml.reDeleteLine[x] = re + } + return nil +} @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package spdxconv + +import ( + "bytes" + "fmt" + "os" + "regexp" + "slices" +) + +// reLicenseID regex to detect SPDX license identifier with or without +// comment prefix. +var reLicenseID = regexp.MustCompile(`^(//+|#+|/\*+|<!--+)?\s?SPDX-License-Identifier:.*$`) + +type file struct { + path string + + // commentPrefix used as prefix to SPDX identifier. + // The comment prefix is detected automatically from the first N + // lines of file. + commentPrefix string + commentSuffix string + + lines [][]byte + topLines [][]byte + bottomLines [][]byte + + // idxLicenseID index of License-Identifier in the topLines. + idxLicenseID int + + hasSheBang bool +} + +func newFile(path string, maxLine int) (f *file, err error) { + var content []byte + content, err = os.ReadFile(path) + if err != nil { + return nil, err + } + + f = &file{ + path: path, + lines: bytes.Split(content, []byte{'\n'}), + idxLicenseID: -1, + } + nline := len(f.lines) + if nline < maxLine*2 { + f.topLines = f.lines + f.lines = f.lines[nline:] + } else { + f.topLines = f.lines[:maxLine] + f.bottomLines = f.lines[nline-maxLine:] + f.lines = f.lines[maxLine : nline-maxLine] + } + return f, nil +} + +// apply the SPDX identifier to file. +func (f *file) apply(conv *SPDXConv) { + f.detectComment() + f.applyLicenseID(conv) + f.insertEmptyLine() +} + +func (f *file) detectComment() { + if bytes.HasPrefix(f.topLines[0], []byte(`#!`)) { + f.hasSheBang = true + f.commentPrefix = `# ` + return + } + for _, line := range f.topLines { + if bytes.HasPrefix(line, []byte(`#`)) { + f.commentPrefix = `# ` + return + } + if bytes.HasPrefix(line, []byte(`//`)) { + f.commentPrefix = `// ` + return + } + if bytes.HasPrefix(line, []byte(`/*`)) { + f.commentPrefix = `// ` + return + } + if bytes.HasPrefix(line, []byte(`<!--`)) { + f.commentPrefix = `<!-- ` + f.commentSuffix = ` -->` + return + } + } +} + +// applyLicenseID check and insert the SPDX-License-Identifier. +// +// Its detect if SPDX-License-Identifer exist at the top or bottom of +// the file. +// If one found at the top, but not at the first line, or at the +// bottom, move it to the first line, after shebang. +func (f *file) applyLicenseID(conv *SPDXConv) { + var licenseID string + + for _, cml := range conv.cfg.MatchLicense { + for x, line := range f.topLines { + if reLicenseID.Match(line) { + f.idxLicenseID = x + if f.hasSheBang && x == 1 { + return + } + if x == 0 { + return + } + f.topLines = slices.Delete(f.topLines, x, x+1) + f.insertLicenseID(line) + return + } + if cml.rePattern.Match(line) { + licenseID = cml.LicenseIdentifier + if cml.DeleteMatch { + f.topLines = slices.Delete(f.topLines, x, x+1) + } + f.deleteLinePattern(f.topLines[x:], cml.reDeleteLine) + } + } + if licenseID != `` { + break + } + for x, line := range f.bottomLines { + if reLicenseID.Match(line) { + f.bottomLines = slices.Delete(f.bottomLines, x, x+1) + f.insertLicenseID(line) + return + } + if cml.rePattern.Match(line) { + licenseID = cml.LicenseIdentifier + if cml.DeleteMatch { + f.bottomLines = slices.Delete(f.bottomLines, x, x+1) + } + f.deleteLinePattern(f.bottomLines[x:], cml.reDeleteLine) + } + } + if licenseID != `` { + break + } + } + if licenseID == `` { + licenseID = conv.cfg.LicenseIdentifier + } + line := fmt.Sprintf("%sSPDX-License-Identifier: %s%s", + f.commentPrefix, licenseID, f.commentSuffix) + f.insertLicenseID([]byte(line)) +} + +// insertEmptyLine insert empty line after SPDX identifiers or any comments after it. +func (f *file) insertEmptyLine() { + if f.idxLicenseID < 0 || f.commentPrefix == `` { + // No license ID inserted. + return + } + comment := []byte(f.commentPrefix) + comment = comment[:len(comment)-1] // Remove space. + for x, line := range f.topLines[f.idxLicenseID:] { + if bytes.HasPrefix(line, comment) { + continue + } + line = bytes.TrimSpace(line) + if len(line) == 0 { + // There is already empty line. + return + } + f.topLines = slices.Insert(f.topLines, x, []byte{}) + return + } +} + +// insertLicenseID insert the license identifier `line` at the top of the +// file and below the shebang "#!" if its exists. +func (f *file) insertLicenseID(line []byte) { + if f.hasSheBang { + f.topLines = slices.Insert(f.topLines, 1, line) + f.idxLicenseID = 1 + } else { + f.topLines = slices.Insert(f.topLines, 0, line) + f.idxLicenseID = 0 + } +} + +func (f *file) deleteLinePattern(lines [][]byte, reDeleteLine []*regexp.Regexp) { + for _, re := range reDeleteLine { + for x, line := range lines { + if re.Match(line) { + lines = slices.Delete(lines, x, x+1) + break + } + } + } +} + +func (f *file) write() (err error) { + var finfo os.FileInfo + finfo, err = os.Stat(f.path) + if err != nil { + return fmt.Errorf(`write: %w`, err) + } + + lines := slices.Concat(f.topLines, f.lines, f.bottomLines) + content := bytes.Join(lines, []byte{'\n'}) + content = bytes.TrimRight(content, "\n") + err = os.WriteFile(f.path, content, finfo.Mode()) + if err != nil { + return fmt.Errorf(`write: %w`, err) + } + return nil +} diff --git a/file_test.go b/file_test.go new file mode 100644 index 0000000..31e2a3d --- /dev/null +++ b/file_test.go @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package spdxconv + +import ( + "os" + "testing" + + "git.sr.ht/~shulhan/pakakeh.go/lib/test" +) + +func TestFile_all(t *testing.T) { + // Populate the test files from `testdata/file_test.txt`. + + var testData *test.Data + var err error + testData, err = test.LoadData(`testdata/file_test.txt`) + if err != nil { + t.Fatal(err) + } + var tempDir = `testdata/file/` + testData.ExtractInput(tempDir) + t.Chdir(tempDir) + + // Inititalize the SPDXConv instance. + + conv, err := New(`.`) + if err != nil { + t.Fatal(err) + } + + for input, _ := range testData.Input { + if input == `spdxconv.cfg` { + continue + } + + f, err := newFile(input, conv.cfg.MaxLineMatch) + if err != nil { + t.Fatal(err) + } + f.apply(conv) + err = f.write() + if err != nil { + t.Fatal(err) + } + got, err := os.ReadFile(input) + if err != nil { + t.Fatal(err) + } + test.Assert(t, input+`: after`, + string(testData.Output[input]), string(got)) + } +} + +func TestFile_detectComment(t *testing.T) { + type testCase struct { + topLines [][]byte + expFile file + } + listCase := []testCase{{ + topLines: [][]byte{ + []byte(`#!/bin/sh`), + }, + expFile: file{ + commentPrefix: `# `, + hasSheBang: true, + }, + }, { + topLines: [][]byte{ + []byte(`# comment`), + }, + expFile: file{ + commentPrefix: `# `, + }, + }, { + topLines: [][]byte{ + []byte(`// comment`), + }, + expFile: file{ + commentPrefix: `// `, + }, + }, { + topLines: [][]byte{ + []byte(`/*`), + }, + expFile: file{ + commentPrefix: `// `, + }, + }, { + topLines: [][]byte{ + []byte(`<!--`), + }, + expFile: file{ + commentPrefix: `<!-- `, + commentSuffix: ` -->`, + }, + }} + for _, tc := range listCase { + f := file{ + topLines: tc.topLines, + } + f.detectComment() + f.topLines = nil + test.Assert(t, string(tc.topLines[0]), tc.expFile, f) + } +} diff --git a/spdxconv.go b/spdxconv.go index b303070..3aaabf3 100644 --- a/spdxconv.go +++ b/spdxconv.go @@ -56,7 +56,10 @@ func Apply(path string) (err error) { var pathFile string for _, pathFile = range listFile { - conv.convert(pathFile) + err = conv.apply(pathFile) + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } } return nil } @@ -179,6 +182,7 @@ func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error) var commonIgnore = map[string]struct{}{ `.git`: struct{}{}, `node_modules`: struct{}{}, + `spdxconv.cfg`: struct{}{}, `vendor`: struct{}{}, } var suffixLicense = `.license` @@ -232,7 +236,22 @@ func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error) return listFile, nil } -// convert the given pathFile to SPDX license format. -func (conv *SPDXConv) convert(pathFile string) { +// apply check and insert the SPDX identifier to file `pathFile`. +func (conv *SPDXConv) apply(pathFile string) (err error) { + var logp = `apply` + var f *file + + f, err = newFile(pathFile, conv.cfg.MaxLineMatch) + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } + + f.apply(conv) + + err = f.write() + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } + return nil } diff --git a/spdxconv_test.go b/spdxconv_test.go index 40bcc16..d2c7494 100644 --- a/spdxconv_test.go +++ b/spdxconv_test.go @@ -4,6 +4,7 @@ package spdxconv import ( + "regexp" "testing" "git.sr.ht/~shulhan/pakakeh.go/lib/git" @@ -71,7 +72,7 @@ func TestSPDXConv_loadConfig(t *testing.T) { LicenseIdentifier: `GPL-3.0-only`, FileCopyrightText: `Author <author@email.info>`, MaxLineMatch: 10, - MatchLicense: []configMatchLicense{{ + MatchLicense: []*configMatchLicense{{ Pattern: `^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`, LicenseIdentifier: `BSD-3-Clause`, DeleteLinePattern: []string{ @@ -79,6 +80,11 @@ func TestSPDXConv_loadConfig(t *testing.T) { `^(//+|#+)*\s+license that(.*)$`, }, DeleteMatch: true, + rePattern: regexp.MustCompile(`^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`), + reDeleteLine: []*regexp.Regexp{ + regexp.MustCompile(`^(//+|#+)*\s*$`), + regexp.MustCompile(`^(//+|#+)*\s+license that(.*)$`), + }, }}, }, }} diff --git a/testdata/file/.gitignore b/testdata/file/.gitignore new file mode 100644 index 0000000..1e37400 --- /dev/null +++ b/testdata/file/.gitignore @@ -0,0 +1,2 @@ +/** +!/.gitignore diff --git a/testdata/file_test.txt b/testdata/file_test.txt new file mode 100644 index 0000000..dad7b85 --- /dev/null +++ b/testdata/file_test.txt @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +>>> spdxconv.cfg +# SPDX-License-Identifier: BSD-3-Clause +# SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +[default] +license_identifier = GPL-3.0-only +file_copyright_text = M. Shulhan <ms@kilabit.info> + +[match-license] +pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$" +license_identifier = BSD-3-Clause +delete_match = true +delete_line_pattern = "^(//+|#+)\\s*$" +delete_line_pattern = "^(//+|#+)\\s+license that(.*)$" + +[match-copyright] +pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<holder>.*)\\s+<*(?<email>.*)>.*$" + +>>> without_spdx_license_id.txt +1 +2 +3 + +<<< without_spdx_license_id.txt +SPDX-License-Identifier: GPL-3.0-only +1 +2 +3 + +>>> with_spdx_at_bottom.txt +1 +2 +3 +// SPDX-License-Identifier: GPL-3.0-only + +<<< with_spdx_at_bottom.txt +// SPDX-License-Identifier: GPL-3.0-only + +1 +2 +3 + +>>> with_spdx_license_id_only.txt +// SPDX-License-Identifier: GPL-3.0-only +1 +2 +3 + +<<< with_spdx_license_id_only.txt +// SPDX-License-Identifier: GPL-3.0-only + +1 +2 +3 + +>>> with_no_order.txt +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> +// SPDX-License-Identifier: BSD-3-Clause + +1 +2 +3 + +<<< with_no_order.txt +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +1 +2 +3 + +>>> with_match_license.txt +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +1 +2 +3 + +<<< with_match_license.txt +// SPDX-License-Identifier: BSD-3-Clause +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. + +1 +2 +3 + +>>> with_match_license_bottom.txt +1 +2 +3 + +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +<<< with_match_license_bottom.txt +// SPDX-License-Identifier: BSD-3-Clause + +1 +2 +3 + +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. diff --git a/testdata/scan/spdxconv.cfg b/testdata/scan/spdxconv.cfg new file mode 100644 index 0000000..b4e39c6 --- /dev/null +++ b/testdata/scan/spdxconv.cfg @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +[default] |
