aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config.go8
-rw-r--r--config_match_license.go27
-rw-r--r--file.go215
-rw-r--r--file_test.go107
-rw-r--r--spdxconv.go25
-rw-r--r--spdxconv_test.go8
-rw-r--r--testdata/file/.gitignore2
-rw-r--r--testdata/file_test.txt108
-rw-r--r--testdata/scan/spdxconv.cfg4
9 files changed, 499 insertions, 5 deletions
diff --git a/config.go b/config.go
index 962b0ad..1268cb8 100644
--- a/config.go
+++ b/config.go
@@ -9,7 +9,7 @@ type config struct {
LicenseIdentifier string `ini:"default::license_identifier"`
FileCopyrightText string `ini:"default::file_copyright_text"`
- MatchLicense []configMatchLicense `ini:"match-license"`
+ MatchLicense []*configMatchLicense `ini:"match-license"`
MaxLineMatch int `ini:"default::max_line_match"`
}
@@ -25,5 +25,11 @@ func (cfg *config) init() (err error) {
if cfg.MaxLineMatch <= 0 {
cfg.MaxLineMatch = defMaxLineMatch
}
+ for _, cml := range cfg.MatchLicense {
+ err = cml.init()
+ if err != nil {
+ return err
+ }
+ }
return nil
}
diff --git a/config_match_license.go b/config_match_license.go
index 11e09fe..c027019 100644
--- a/config_match_license.go
+++ b/config_match_license.go
@@ -3,7 +3,15 @@
package spdxconv
+import (
+ "fmt"
+ "regexp"
+)
+
type configMatchLicense struct {
+ rePattern *regexp.Regexp
+ reDeleteLine []*regexp.Regexp
+
// Pattern to be searched in file.
Pattern string `ini:"match-license::pattern"`
@@ -21,3 +29,22 @@ type configMatchLicense struct {
// be deleted.
DeleteMatch bool `ini:"match-license::delete_match"`
}
+
+func (cml *configMatchLicense) init() (err error) {
+ var logp = `match-license`
+ if cml.Pattern != `` {
+ cml.rePattern, err = regexp.Compile(cml.Pattern)
+ if err != nil {
+ return fmt.Errorf(`%s: pattern %q: %w`, logp, cml.Pattern, err)
+ }
+ }
+ cml.reDeleteLine = make([]*regexp.Regexp, len(cml.DeleteLinePattern))
+ for x, pattern := range cml.DeleteLinePattern {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return fmt.Errorf(`%s: delete_line_pattern %q: %w`, logp, pattern, err)
+ }
+ cml.reDeleteLine[x] = re
+ }
+ return nil
+}
diff --git a/file.go b/file.go
new file mode 100644
index 0000000..b8a5630
--- /dev/null
+++ b/file.go
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "regexp"
+ "slices"
+)
+
+// reLicenseID regex to detect SPDX license identifier with or without
+// comment prefix.
+var reLicenseID = regexp.MustCompile(`^(//+|#+|/\*+|<!--+)?\s?SPDX-License-Identifier:.*$`)
+
+type file struct {
+ path string
+
+ // commentPrefix used as prefix to SPDX identifier.
+ // The comment prefix is detected automatically from the first N
+ // lines of file.
+ commentPrefix string
+ commentSuffix string
+
+ lines [][]byte
+ topLines [][]byte
+ bottomLines [][]byte
+
+ // idxLicenseID index of License-Identifier in the topLines.
+ idxLicenseID int
+
+ hasSheBang bool
+}
+
+func newFile(path string, maxLine int) (f *file, err error) {
+ var content []byte
+ content, err = os.ReadFile(path)
+ if err != nil {
+ return nil, err
+ }
+
+ f = &file{
+ path: path,
+ lines: bytes.Split(content, []byte{'\n'}),
+ idxLicenseID: -1,
+ }
+ nline := len(f.lines)
+ if nline < maxLine*2 {
+ f.topLines = f.lines
+ f.lines = f.lines[nline:]
+ } else {
+ f.topLines = f.lines[:maxLine]
+ f.bottomLines = f.lines[nline-maxLine:]
+ f.lines = f.lines[maxLine : nline-maxLine]
+ }
+ return f, nil
+}
+
+// apply the SPDX identifier to file.
+func (f *file) apply(conv *SPDXConv) {
+ f.detectComment()
+ f.applyLicenseID(conv)
+ f.insertEmptyLine()
+}
+
+func (f *file) detectComment() {
+ if bytes.HasPrefix(f.topLines[0], []byte(`#!`)) {
+ f.hasSheBang = true
+ f.commentPrefix = `# `
+ return
+ }
+ for _, line := range f.topLines {
+ if bytes.HasPrefix(line, []byte(`#`)) {
+ f.commentPrefix = `# `
+ return
+ }
+ if bytes.HasPrefix(line, []byte(`//`)) {
+ f.commentPrefix = `// `
+ return
+ }
+ if bytes.HasPrefix(line, []byte(`/*`)) {
+ f.commentPrefix = `// `
+ return
+ }
+ if bytes.HasPrefix(line, []byte(`<!--`)) {
+ f.commentPrefix = `<!-- `
+ f.commentSuffix = ` -->`
+ return
+ }
+ }
+}
+
+// applyLicenseID check and insert the SPDX-License-Identifier.
+//
+// Its detect if SPDX-License-Identifer exist at the top or bottom of
+// the file.
+// If one found at the top, but not at the first line, or at the
+// bottom, move it to the first line, after shebang.
+func (f *file) applyLicenseID(conv *SPDXConv) {
+ var licenseID string
+
+ for _, cml := range conv.cfg.MatchLicense {
+ for x, line := range f.topLines {
+ if reLicenseID.Match(line) {
+ f.idxLicenseID = x
+ if f.hasSheBang && x == 1 {
+ return
+ }
+ if x == 0 {
+ return
+ }
+ f.topLines = slices.Delete(f.topLines, x, x+1)
+ f.insertLicenseID(line)
+ return
+ }
+ if cml.rePattern.Match(line) {
+ licenseID = cml.LicenseIdentifier
+ if cml.DeleteMatch {
+ f.topLines = slices.Delete(f.topLines, x, x+1)
+ }
+ f.deleteLinePattern(f.topLines[x:], cml.reDeleteLine)
+ }
+ }
+ if licenseID != `` {
+ break
+ }
+ for x, line := range f.bottomLines {
+ if reLicenseID.Match(line) {
+ f.bottomLines = slices.Delete(f.bottomLines, x, x+1)
+ f.insertLicenseID(line)
+ return
+ }
+ if cml.rePattern.Match(line) {
+ licenseID = cml.LicenseIdentifier
+ if cml.DeleteMatch {
+ f.bottomLines = slices.Delete(f.bottomLines, x, x+1)
+ }
+ f.deleteLinePattern(f.bottomLines[x:], cml.reDeleteLine)
+ }
+ }
+ if licenseID != `` {
+ break
+ }
+ }
+ if licenseID == `` {
+ licenseID = conv.cfg.LicenseIdentifier
+ }
+ line := fmt.Sprintf("%sSPDX-License-Identifier: %s%s",
+ f.commentPrefix, licenseID, f.commentSuffix)
+ f.insertLicenseID([]byte(line))
+}
+
+// insertEmptyLine insert empty line after SPDX identifiers or any comments after it.
+func (f *file) insertEmptyLine() {
+ if f.idxLicenseID < 0 || f.commentPrefix == `` {
+ // No license ID inserted.
+ return
+ }
+ comment := []byte(f.commentPrefix)
+ comment = comment[:len(comment)-1] // Remove space.
+ for x, line := range f.topLines[f.idxLicenseID:] {
+ if bytes.HasPrefix(line, comment) {
+ continue
+ }
+ line = bytes.TrimSpace(line)
+ if len(line) == 0 {
+ // There is already empty line.
+ return
+ }
+ f.topLines = slices.Insert(f.topLines, x, []byte{})
+ return
+ }
+}
+
+// insertLicenseID insert the license identifier `line` at the top of the
+// file and below the shebang "#!" if its exists.
+func (f *file) insertLicenseID(line []byte) {
+ if f.hasSheBang {
+ f.topLines = slices.Insert(f.topLines, 1, line)
+ f.idxLicenseID = 1
+ } else {
+ f.topLines = slices.Insert(f.topLines, 0, line)
+ f.idxLicenseID = 0
+ }
+}
+
+func (f *file) deleteLinePattern(lines [][]byte, reDeleteLine []*regexp.Regexp) {
+ for _, re := range reDeleteLine {
+ for x, line := range lines {
+ if re.Match(line) {
+ lines = slices.Delete(lines, x, x+1)
+ break
+ }
+ }
+ }
+}
+
+func (f *file) write() (err error) {
+ var finfo os.FileInfo
+ finfo, err = os.Stat(f.path)
+ if err != nil {
+ return fmt.Errorf(`write: %w`, err)
+ }
+
+ lines := slices.Concat(f.topLines, f.lines, f.bottomLines)
+ content := bytes.Join(lines, []byte{'\n'})
+ content = bytes.TrimRight(content, "\n")
+ err = os.WriteFile(f.path, content, finfo.Mode())
+ if err != nil {
+ return fmt.Errorf(`write: %w`, err)
+ }
+ return nil
+}
diff --git a/file_test.go b/file_test.go
new file mode 100644
index 0000000..31e2a3d
--- /dev/null
+++ b/file_test.go
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+import (
+ "os"
+ "testing"
+
+ "git.sr.ht/~shulhan/pakakeh.go/lib/test"
+)
+
+func TestFile_all(t *testing.T) {
+ // Populate the test files from `testdata/file_test.txt`.
+
+ var testData *test.Data
+ var err error
+ testData, err = test.LoadData(`testdata/file_test.txt`)
+ if err != nil {
+ t.Fatal(err)
+ }
+ var tempDir = `testdata/file/`
+ testData.ExtractInput(tempDir)
+ t.Chdir(tempDir)
+
+ // Inititalize the SPDXConv instance.
+
+ conv, err := New(`.`)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for input, _ := range testData.Input {
+ if input == `spdxconv.cfg` {
+ continue
+ }
+
+ f, err := newFile(input, conv.cfg.MaxLineMatch)
+ if err != nil {
+ t.Fatal(err)
+ }
+ f.apply(conv)
+ err = f.write()
+ if err != nil {
+ t.Fatal(err)
+ }
+ got, err := os.ReadFile(input)
+ if err != nil {
+ t.Fatal(err)
+ }
+ test.Assert(t, input+`: after`,
+ string(testData.Output[input]), string(got))
+ }
+}
+
+func TestFile_detectComment(t *testing.T) {
+ type testCase struct {
+ topLines [][]byte
+ expFile file
+ }
+ listCase := []testCase{{
+ topLines: [][]byte{
+ []byte(`#!/bin/sh`),
+ },
+ expFile: file{
+ commentPrefix: `# `,
+ hasSheBang: true,
+ },
+ }, {
+ topLines: [][]byte{
+ []byte(`# comment`),
+ },
+ expFile: file{
+ commentPrefix: `# `,
+ },
+ }, {
+ topLines: [][]byte{
+ []byte(`// comment`),
+ },
+ expFile: file{
+ commentPrefix: `// `,
+ },
+ }, {
+ topLines: [][]byte{
+ []byte(`/*`),
+ },
+ expFile: file{
+ commentPrefix: `// `,
+ },
+ }, {
+ topLines: [][]byte{
+ []byte(`<!--`),
+ },
+ expFile: file{
+ commentPrefix: `<!-- `,
+ commentSuffix: ` -->`,
+ },
+ }}
+ for _, tc := range listCase {
+ f := file{
+ topLines: tc.topLines,
+ }
+ f.detectComment()
+ f.topLines = nil
+ test.Assert(t, string(tc.topLines[0]), tc.expFile, f)
+ }
+}
diff --git a/spdxconv.go b/spdxconv.go
index b303070..3aaabf3 100644
--- a/spdxconv.go
+++ b/spdxconv.go
@@ -56,7 +56,10 @@ func Apply(path string) (err error) {
var pathFile string
for _, pathFile = range listFile {
- conv.convert(pathFile)
+ err = conv.apply(pathFile)
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
}
return nil
}
@@ -179,6 +182,7 @@ func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error)
var commonIgnore = map[string]struct{}{
`.git`: struct{}{},
`node_modules`: struct{}{},
+ `spdxconv.cfg`: struct{}{},
`vendor`: struct{}{},
}
var suffixLicense = `.license`
@@ -232,7 +236,22 @@ func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error)
return listFile, nil
}
-// convert the given pathFile to SPDX license format.
-func (conv *SPDXConv) convert(pathFile string) {
+// apply check and insert the SPDX identifier to file `pathFile`.
+func (conv *SPDXConv) apply(pathFile string) (err error) {
+ var logp = `apply`
+ var f *file
+
+ f, err = newFile(pathFile, conv.cfg.MaxLineMatch)
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ f.apply(conv)
+
+ err = f.write()
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+ return nil
}
diff --git a/spdxconv_test.go b/spdxconv_test.go
index 40bcc16..d2c7494 100644
--- a/spdxconv_test.go
+++ b/spdxconv_test.go
@@ -4,6 +4,7 @@
package spdxconv
import (
+ "regexp"
"testing"
"git.sr.ht/~shulhan/pakakeh.go/lib/git"
@@ -71,7 +72,7 @@ func TestSPDXConv_loadConfig(t *testing.T) {
LicenseIdentifier: `GPL-3.0-only`,
FileCopyrightText: `Author <author@email.info>`,
MaxLineMatch: 10,
- MatchLicense: []configMatchLicense{{
+ MatchLicense: []*configMatchLicense{{
Pattern: `^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`,
LicenseIdentifier: `BSD-3-Clause`,
DeleteLinePattern: []string{
@@ -79,6 +80,11 @@ func TestSPDXConv_loadConfig(t *testing.T) {
`^(//+|#+)*\s+license that(.*)$`,
},
DeleteMatch: true,
+ rePattern: regexp.MustCompile(`^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`),
+ reDeleteLine: []*regexp.Regexp{
+ regexp.MustCompile(`^(//+|#+)*\s*$`),
+ regexp.MustCompile(`^(//+|#+)*\s+license that(.*)$`),
+ },
}},
},
}}
diff --git a/testdata/file/.gitignore b/testdata/file/.gitignore
new file mode 100644
index 0000000..1e37400
--- /dev/null
+++ b/testdata/file/.gitignore
@@ -0,0 +1,2 @@
+/**
+!/.gitignore
diff --git a/testdata/file_test.txt b/testdata/file_test.txt
new file mode 100644
index 0000000..dad7b85
--- /dev/null
+++ b/testdata/file_test.txt
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+>>> spdxconv.cfg
+# SPDX-License-Identifier: BSD-3-Clause
+# SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+[default]
+license_identifier = GPL-3.0-only
+file_copyright_text = M. Shulhan <ms@kilabit.info>
+
+[match-license]
+pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$"
+license_identifier = BSD-3-Clause
+delete_match = true
+delete_line_pattern = "^(//+|#+)\\s*$"
+delete_line_pattern = "^(//+|#+)\\s+license that(.*)$"
+
+[match-copyright]
+pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<holder>.*)\\s+<*(?<email>.*)>.*$"
+
+>>> without_spdx_license_id.txt
+1
+2
+3
+
+<<< without_spdx_license_id.txt
+SPDX-License-Identifier: GPL-3.0-only
+1
+2
+3
+
+>>> with_spdx_at_bottom.txt
+1
+2
+3
+// SPDX-License-Identifier: GPL-3.0-only
+
+<<< with_spdx_at_bottom.txt
+// SPDX-License-Identifier: GPL-3.0-only
+
+1
+2
+3
+
+>>> with_spdx_license_id_only.txt
+// SPDX-License-Identifier: GPL-3.0-only
+1
+2
+3
+
+<<< with_spdx_license_id_only.txt
+// SPDX-License-Identifier: GPL-3.0-only
+
+1
+2
+3
+
+>>> with_no_order.txt
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: BSD-3-Clause
+
+1
+2
+3
+
+<<< with_no_order.txt
+// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+1
+2
+3
+
+>>> with_match_license.txt
+// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+1
+2
+3
+
+<<< with_match_license.txt
+// SPDX-License-Identifier: BSD-3-Clause
+// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.
+
+1
+2
+3
+
+>>> with_match_license_bottom.txt
+1
+2
+3
+
+// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+<<< with_match_license_bottom.txt
+// SPDX-License-Identifier: BSD-3-Clause
+
+1
+2
+3
+
+// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.
diff --git a/testdata/scan/spdxconv.cfg b/testdata/scan/spdxconv.cfg
new file mode 100644
index 0000000..b4e39c6
--- /dev/null
+++ b/testdata/scan/spdxconv.cfg
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+[default]