aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2026-01-12 23:01:35 +0700
committerShulhan <ms@kilabit.info>2026-01-12 23:01:35 +0700
commite16e2a4ec74443aa8f4c21a73ee837cb72ed46fb (patch)
tree8458e4f7bc3803310f1e4071a28a70bb8f0fd939
parent9817757f8dca159aa261315a91fceff1d9a13566 (diff)
downloadspdxconv-e16e2a4ec74443aa8f4c21a73ee837cb72ed46fb.tar.xz
all: fix default regex match license and copyright to ignore comment
Instead of assuming that the comment prefix and space always exists "^(//)\s+..." change it to be optional, so it will works on the multi-line comment. For example, comment and old headers in html, <-- Copyright ... --> there is no comment prefix and space.
-rw-r--r--config.go6
-rw-r--r--file.go4
-rw-r--r--spdxconv_test.go12
-rw-r--r--testdata/Apply_test.txt30
-rw-r--r--testdata/scan/spdxconv.cfg6
5 files changed, 41 insertions, 17 deletions
diff --git a/config.go b/config.go
index e968fd4..ed6ce10 100644
--- a/config.go
+++ b/config.go
@@ -54,12 +54,12 @@ prefix = "%"
pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$"
[match-license]
-pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$"
+pattern = "^(//+|#+|/\\*+|<!--+)?\\s*(.*)governed by a BSD-style(.*)$"
license_identifier = BSD-3-Clause
-delete_line_pattern = "^(//+|#+)\\s+license that(.*)$"
+delete_line_pattern = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$"
[match-copyright]
-pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<contact>.*)>.*$"
+pattern = "^(//+|#+|/\\*+|<!--+)?\\s*Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<(?<contact>.*)>.*$"
`
type config struct {
diff --git a/file.go b/file.go
index 7849d9e..798790c 100644
--- a/file.go
+++ b/file.go
@@ -187,18 +187,20 @@ func (f *file) scanCopyrightText(conv *SPDXConv) {
}
if cmc.match(string(line)) {
f.idxCopyrightText = x
+ f.copyrightYear = cmc.year
f.copyrightText = valMatch
return
}
}
for x, line := range f.bottomLines {
- if reLicenseID.Match(line) {
+ if reCopyrightText.Match(line) {
f.idxCopyrightText = x - conv.cfg.MaxLineMatch
f.copyrightText = valExist
return
}
if cmc.match(string(line)) {
f.idxCopyrightText = x - conv.cfg.MaxLineMatch
+ f.copyrightYear = cmc.year
f.copyrightText = valMatch
return
}
diff --git a/spdxconv_test.go b/spdxconv_test.go
index 194276e..77929e8 100644
--- a/spdxconv_test.go
+++ b/spdxconv_test.go
@@ -64,14 +64,14 @@ func TestInit(t *testing.T) {
Pattern: `^.*\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$`,
}},
MatchLicense: []*matchLicense{{
- Pattern: `^(//+|#+)\s+(.*)governed by a BSD-style(.*)$`,
+ Pattern: `^(//+|#+|/\*+|<!--+)?\s*(.*)governed by a BSD-style(.*)$`,
LicenseIdentifier: `BSD-3-Clause`,
DeleteLinePattern: []string{
- `^(//+|#+)\s+license that(.*)$`,
+ `^(//+|#+|/\*+|<!--+)?\s*license that can(.*)$`,
},
}},
MatchCopyright: []*matchCopyright{{
- Pattern: `^(//+|#+)\s+Copyright\s+(?<year>\d{4}),?\s+(?<author>.*)\s+<*(?<contact>.*)>.*$`,
+ Pattern: `^(//+|#+|/\*+|<!--+)?\s*Copyright\s+(?<year>\d{4}),?\s+(?<author>.*)\s+<(?<contact>.*)>.*$`,
}},
}
for _, mfc := range exp.MatchFileComment {
@@ -110,9 +110,9 @@ func TestScan(t *testing.T) {
//spdxconv:version:v1
//spdxconv:header:path,license_id,idx_license_id,year,copyright_id,idx_copyright_id
//spdxconv:regular
-test.go,match,1,unknown,match,0,// ,
-test.html,default,0,unknown,default,0,<!-- ," -->"
-test.sh,match,1,unknown,match,0,# ,
+test.go,match,1,2022,match,0,// ,
+test.html,match,2,2022,match,1,<!-- ," -->"
+test.sh,match,1,2022,match,0,# ,
//spdxconv:binary
//spdxconv:unknown`
diff --git a/testdata/Apply_test.txt b/testdata/Apply_test.txt
index 611f654..123dc28 100644
--- a/testdata/Apply_test.txt
+++ b/testdata/Apply_test.txt
@@ -17,7 +17,7 @@ pattern = "^.*\\.(adoc|asciidoc|c|cc|cpp|cs|dart|go|h|hh|hpp|java|js|jsx|jsonc|k
prefix = "//"
[match-file-comment]
-pattern = "^.*\\.(bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$"
+pattern = "^.*\\.(aff|bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$"
prefix = "#"
[match-file-comment]
@@ -48,12 +48,12 @@ prefix = "%"
pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$"
[match-license]
-pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$"
+pattern = "^(//+|#+|/\\*+|<!--+)?\\s*(.*)governed by a BSD-style(.*)$"
license_identifier = BSD-3-Clause
-delete_line_pattern = "^(//+|#+)\\s+license that(.*)$"
+delete_line_pattern = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$"
[match-copyright]
-pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<contact>.*)>.*$"
+pattern = "^(//+|#+|/\\*+|<!--+)?\\s*Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<(?<contact>.*)>.*$"
>>> without_spdx_license_id.go
1
@@ -162,5 +162,27 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
2
3
+>>> with.html
+<!--
+Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved.
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file.
+-->
+
+1
+2
+3
+
+<<< with.html
+<!-- SPDX-License-Identifier: BSD-3-Clause -->
+<!-- SPDX-FileCopyrightText: 2022 Shulhan <ms@kilabit.info> -->
+<!--
+
+-->
+
+1
+2
+3
+
<<< END
// REUSE-IgnoreEnd
diff --git a/testdata/scan/spdxconv.cfg b/testdata/scan/spdxconv.cfg
index 59ae864..3b137b6 100644
--- a/testdata/scan/spdxconv.cfg
+++ b/testdata/scan/spdxconv.cfg
@@ -43,9 +43,9 @@ prefix = "%"
pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$"
[match-license]
-pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$"
+pattern = "^(//+|#+|/\\*+|<!--+)?\\s*(.*)governed by a BSD-style(.*)$"
license_identifier = BSD-3-Clause
-delete_line_pattern = "^(//+|#+)\\s+license that(.*)$"
+delete_line_pattern = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$"
[match-copyright]
-pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<contact>.*)>.*$"
+pattern = "^(//+|#+|/\\*+|<!--+)?\\s*Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<(?<contact>.*)>.*$"