From e16e2a4ec74443aa8f4c21a73ee837cb72ed46fb Mon Sep 17 00:00:00 2001 From: Shulhan Date: Mon, 12 Jan 2026 23:01:35 +0700 Subject: all: fix default regex match license and copyright to ignore comment Instead of assuming that the comment prefix and space always exists "^(//)\s+..." change it to be optional, so it will works on the multi-line comment. For example, comment and old headers in html, <-- Copyright ... --> there is no comment prefix and space. --- config.go | 6 +++--- file.go | 4 +++- spdxconv_test.go | 12 ++++++------ testdata/Apply_test.txt | 30 ++++++++++++++++++++++++++---- testdata/scan/spdxconv.cfg | 6 +++--- 5 files changed, 41 insertions(+), 17 deletions(-) diff --git a/config.go b/config.go index e968fd4..ed6ce10 100644 --- a/config.go +++ b/config.go @@ -54,12 +54,12 @@ prefix = "%" pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$" [match-license] -pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$" +pattern = "^(//+|#+|/\\*+|" -test.sh,match,1,unknown,match,0,# , +test.go,match,1,2022,match,0,// , +test.html,match,2,2022,match,1," +test.sh,match,1,2022,match,0,# , //spdxconv:binary //spdxconv:unknown` diff --git a/testdata/Apply_test.txt b/testdata/Apply_test.txt index 611f654..123dc28 100644 --- a/testdata/Apply_test.txt +++ b/testdata/Apply_test.txt @@ -17,7 +17,7 @@ pattern = "^.*\\.(adoc|asciidoc|c|cc|cpp|cs|dart|go|h|hh|hpp|java|js|jsx|jsonc|k prefix = "//" [match-file-comment] -pattern = "^.*\\.(bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$" +pattern = "^.*\\.(aff|bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$" prefix = "#" [match-file-comment] @@ -48,12 +48,12 @@ prefix = "%" pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$" [match-license] -pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$" +pattern = "^(//+|#+|/\\*+| + +1 +2 +3 + +<<< with.html + + + + +1 +2 +3 + <<< END // REUSE-IgnoreEnd diff --git a/testdata/scan/spdxconv.cfg b/testdata/scan/spdxconv.cfg index 59ae864..3b137b6 100644 --- a/testdata/scan/spdxconv.cfg +++ b/testdata/scan/spdxconv.cfg @@ -43,9 +43,9 @@ prefix = "%" pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$" [match-license] -pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$" +pattern = "^(//+|#+|/\\*+|