diff options
| author | Shulhan <ms@kilabit.info> | 2026-01-13 01:44:23 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2026-01-13 01:45:18 +0700 |
| commit | 434139fe3918fdb56704558301ad9275f1da06ad (patch) | |
| tree | 76f4a942f9f47398ab153a8319b2c4d2e442d167 | |
| parent | e16e2a4ec74443aa8f4c21a73ee837cb72ed46fb (diff) | |
| download | spdxconv-434139fe3918fdb56704558301ad9275f1da06ad.tar.xz | |
all: split the delete_line_pattern into before and after
While at it, also add configuration for delete line before and after
for match-copyright section.
| -rw-r--r-- | README.md | 42 | ||||
| -rw-r--r-- | config.go | 6 | ||||
| -rw-r--r-- | file.go | 165 | ||||
| -rw-r--r-- | match_copyright.go | 22 | ||||
| -rw-r--r-- | match_license.go | 36 | ||||
| -rw-r--r-- | spdxconv_test.go | 30 | ||||
| -rw-r--r-- | testdata/Apply_test.txt | 7 | ||||
| -rw-r--r-- | testdata/loadConfig/config_exists/spdxconv.cfg | 46 | ||||
| -rw-r--r-- | testdata/scan/spdxconv.cfg | 11 | ||||
| -rw-r--r-- | testdata/scan/test.html | 8 |
10 files changed, 301 insertions, 72 deletions
@@ -109,12 +109,16 @@ prefix = "%" pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$" [match-license] -pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$" +pattern = "^(//+|#+|/\\*+|<!--+)?\\s*(.*)governed by a BSD-style(.*)$" license_identifier = BSD-3-Clause -delete_line_pattern = "^(//+|#+)\\s+license that(.*)$" +delete_line_before = "^(//+|#+|/\\*+|<!--+)$" +delete_line_after = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$" +delete_line_after = "^(//+|#+|\\*+/|--+>)$" [match-copyright] -pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<contact>.*)>.*$" +pattern = "^(//+|#+|/\\*+|<!--+)?\\s*Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<(?<contact>.*)>.*$" +delete_line_before = "^(//+|#+|/\\*+|<!--+)$" +delete_line_after = "^(//+|#+|\\*+/|--+>)$" ``` The configuration use the `ini` file format. @@ -194,9 +198,35 @@ regular expression. If there is a line that match with it, the value in "match-license::license_identifier" will replace the "default::license_identifier" value. -If there is "delete_line_pattern" defined, it will search for line that match -with that pattern and delete it. -The "delete_line_pattern" can be defined zero or multiple times. + +If there is "delete_line_before" or "delete_line_after" defined, it will +search for the pattern before and after the matched line and delete it. +The "delete_line_before" and "delete_line_after" can be defined zero or +multiple times. + +### match-copyright section + +The match-copyright section define the pattern to match with old copyright +text. +The regex must contains named group to capture copyright year, author, and +contact. +For example, given the following old copyright text, + +``` +Copyright 2022, John Doe <john.doe@email>. All rights reserved. +``` + +we can capture the year, author, and contact using the following regex, + +``` +^(//+|#+|/\\*+|<!--+)?\\s*Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<(?<contact>.*)>.*$" +``` + +The `match-copyright` section can also contains zero or more +`delete_line_before` and `delete_line_after` pattern. +The `delete_line_before` delete lines before matched line pattern, and +`delete_line_after` contains regex to delete lines after matched line +pattern. ## scan command @@ -56,10 +56,14 @@ pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt| [match-license] pattern = "^(//+|#+|/\\*+|<!--+)?\\s*(.*)governed by a BSD-style(.*)$" license_identifier = BSD-3-Clause -delete_line_pattern = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$" +delete_line_before = "^(//+|#+|/\\*+|<!--+)$" +delete_line_after = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$" +delete_line_after = "^(//+|#+|\\*+/|--+>)$" [match-copyright] pattern = "^(//+|#+|/\\*+|<!--+)?\\s*Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<(?<contact>.*)>.*$" +delete_line_before = "^(//+|#+|/\\*+|<!--+)$" +delete_line_after = "^(//+|#+|\\*+/|--+>)$" ` type config struct { @@ -35,7 +35,8 @@ var reCopyrightText = regexp.MustCompile(`^(//+|#+|/\*+|<!--+)?\s?SPDX-FileCopyr // REUSE-IgnoreEnd type file struct { - matchLicense *matchLicense + matchCopyright *matchCopyright + matchLicense *matchLicense path string @@ -226,7 +227,7 @@ func (f *file) apply(conv *SPDXConv) (err error) { return err } - f.applyDelete() + f.applyDelete(&conv.cfg) // REUSE-IgnoreStart line := fmt.Sprintf("%sSPDX-License-Identifier: %s%s", @@ -360,19 +361,18 @@ func (f *file) applyCopyrightText(conv *SPDXConv) (err error) { } // Verify that the line actually match with one of // match-copyright pattern. - var cmc *matchCopyright - for _, cmc = range conv.cfg.MatchCopyright { - if cmc.match(string(line)) { - f.copyrightYear = cmc.year - f.copyrightText = cmc.String() + for _, f.matchCopyright = range conv.cfg.MatchCopyright { + if f.matchCopyright.match(string(line)) { + f.copyrightYear = f.matchCopyright.year + f.copyrightText = f.matchCopyright.String() break } } - if cmc == nil { - return fmt.Errorf(`%s: line %d does not match with any of match-license pattern`, - f.path, f.idxLicenseID) + if f.matchCopyright == nil { + return fmt.Errorf(`%s: line %d does not match with any of match-copyright pattern`, + f.path, f.idxCopyrightText) } - if f.idxLicenseID >= 0 { + if f.idxCopyrightText >= 0 { f.topLines[x] = nil } else { f.bottomLines[x] = nil @@ -381,48 +381,131 @@ func (f *file) applyCopyrightText(conv *SPDXConv) (err error) { return nil } -func (f *file) applyDelete() { - var reDeleteLine []*regexp.Regexp +func (f *file) applyDelete(cfg *config) { + var startat int + var lines [][]byte if f.matchLicense != nil { - reDeleteLine = f.matchLicense.reDeleteLine + if f.idxLicenseID >= 0 { + startat = f.idxLicenseID + lines = f.topLines + } else { + startat = f.idxLicenseID + cfg.MaxLineMatch + lines = f.bottomLines + } + f.deleteLineBefore(f.matchLicense.reDeleteLineBefore, lines, startat-1) + f.deleteLineAfter(f.matchLicense.reDeleteLineAfter, lines, startat+1) } - var lines [][]byte - for _, line := range f.topLines { - if line == nil { - continue + if f.matchCopyright != nil { + if f.idxCopyrightText >= 0 { + startat = f.idxCopyrightText + lines = f.topLines + } else { + startat = f.idxCopyrightText + cfg.MaxLineMatch + lines = f.bottomLines } - var found bool - for _, redel := range reDeleteLine { - if redel.Match(line) { - found = true - break - } + f.deleteLineBefore(f.matchCopyright.reDeleteLineBefore, lines, startat-1) + f.deleteLineAfter(f.matchCopyright.reDeleteLineAfter, lines, startat+1) + } + f.trimDeletedLines() +} + +func (f *file) deleteLineBefore(listre []*regexp.Regexp, lines [][]byte, startat int) { + if len(listre) == 0 { + return + } + var idxre = 0 + var re = listre[idxre] + var line []byte + for x := startat; x >= 0; x-- { + if lines[x] == nil { + return } - if found { - continue + line = lines[x] + if !re.Match(line) { + // Once the regex does not match with line, return + // immediately. + return + } + lines[x] = nil + idxre++ + if idxre == len(listre) { + break } - lines = append(lines, line) + re = listre[idxre] } - f.topLines = lines + // Continue deleting with the next lines. + for x := len(f.lines) - 1; x >= 0; x-- { + line = f.lines[x] + if !re.Match(line) { + return + } + f.lines[x] = nil + idxre++ + if idxre == len(listre) { + break + } + re = listre[idxre] + } +} - lines = nil - for _, line := range f.bottomLines { - if line == nil { - continue +func (f *file) deleteLineAfter(listre []*regexp.Regexp, lines [][]byte, startat int) { + if len(listre) == 0 { + return + } + var idxre = 0 + var re = listre[idxre] + var line []byte + for x := startat; x < len(lines); x++ { + if lines[x] == nil { + return } - var found bool - for _, redel := range reDeleteLine { - if redel.Match(line) { - found = true - break - } + line = lines[x] + if !re.Match(line) { + // Once the regex does not match with line, return + // immediately. + return + } + lines[x] = nil + idxre++ + if idxre == len(listre) { + break } - if found { + re = listre[idxre] + } + // Continue deleting with the next lines. + for x := 0; x < len(f.lines); x++ { + line = f.lines[x] + if !re.Match(line) { + return + } + f.lines[x] = nil + idxre++ + if idxre == len(listre) { + break + } + re = listre[idxre] + } +} + +func (f *file) trimDeletedLines() { + var newlines [][]byte + var lines [][]byte + if f.idxLicenseID >= 0 { + lines = f.topLines + } else { + lines = f.bottomLines + } + for x := 0; x < len(lines); x++ { + if lines[x] == nil { continue } - lines = append(lines, line) + newlines = append(newlines, lines[x]) + } + if f.idxLicenseID >= 0 { + f.topLines = newlines + } else { + f.bottomLines = newlines } - f.bottomLines = lines } // insertEmptyLine insert empty line after SPDX identifiers or any comments after it. diff --git a/match_copyright.go b/match_copyright.go index aa35950..723c63f 100644 --- a/match_copyright.go +++ b/match_copyright.go @@ -11,12 +11,18 @@ import ( type matchCopyright struct { rePattern *regexp.Regexp + reDeleteLineBefore []*regexp.Regexp + reDeleteLineAfter []*regexp.Regexp + // Pattern to be searched in file. Pattern string `ini:"match-copyright::pattern"` year string author string contact string + + DeleteLineBefore []string `ini:"match-copyright::delete_line_before"` + DeleteLineAfter []string `ini:"match-copyright::delete_line_after"` } func (cmc *matchCopyright) init() (err error) { @@ -27,6 +33,22 @@ func (cmc *matchCopyright) init() (err error) { return fmt.Errorf(`%s: pattern %q: %w`, logp, cmc.Pattern, err) } } + cmc.reDeleteLineBefore = make([]*regexp.Regexp, len(cmc.DeleteLineBefore)) + for x, pattern := range cmc.DeleteLineBefore { + re, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf(`%s: delete_line_before %q: %w`, logp, pattern, err) + } + cmc.reDeleteLineBefore[x] = re + } + cmc.reDeleteLineAfter = make([]*regexp.Regexp, len(cmc.DeleteLineAfter)) + for x, pattern := range cmc.DeleteLineAfter { + re, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf(`%s: delete_line_after %q: %w`, logp, pattern, err) + } + cmc.reDeleteLineAfter[x] = re + } return nil } diff --git a/match_license.go b/match_license.go index 1d15d7f..a9abb99 100644 --- a/match_license.go +++ b/match_license.go @@ -9,8 +9,10 @@ import ( ) type matchLicense struct { - rePattern *regexp.Regexp - reDeleteLine []*regexp.Regexp + rePattern *regexp.Regexp + + reDeleteLineBefore []*regexp.Regexp + reDeleteLineAfter []*regexp.Regexp // Pattern to be searched in file. Pattern string `ini:"match-license::pattern"` @@ -19,11 +21,15 @@ type matchLicense struct { // value if Pattern match. LicenseIdentifier string `ini:"match-license::license_identifier"` - // DeleteLinePattern zero or more pattern that will be search after - // Pattern match line. - // A line that match with this pattern will be deleted. - // An empty line stop the search. - DeleteLinePattern []string `ini:"match-license::delete_line_pattern"` + // DeleteLineBefore zero or more pattern that will be search before + // the matched Pattern line, and deleted if its match. + // Each pattern is executed in order until it does not match. + DeleteLineBefore []string `ini:"match-license::delete_line_before"` + + // DeleteLineAfter zero or more pattern that will be search after + // the matched Pattern line, and deleted if its match. + // Each pattern is executed in order until it does not match. + DeleteLineAfter []string `ini:"match-license::delete_line_after"` } func (cml *matchLicense) init() (err error) { @@ -34,13 +40,21 @@ func (cml *matchLicense) init() (err error) { return fmt.Errorf(`%s: pattern %q: %w`, logp, cml.Pattern, err) } } - cml.reDeleteLine = make([]*regexp.Regexp, len(cml.DeleteLinePattern)) - for x, pattern := range cml.DeleteLinePattern { + cml.reDeleteLineBefore = make([]*regexp.Regexp, len(cml.DeleteLineBefore)) + for x, pattern := range cml.DeleteLineBefore { + re, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf(`%s: delete_line_before %q: %w`, logp, pattern, err) + } + cml.reDeleteLineBefore[x] = re + } + cml.reDeleteLineAfter = make([]*regexp.Regexp, len(cml.DeleteLineAfter)) + for x, pattern := range cml.DeleteLineAfter { re, err := regexp.Compile(pattern) if err != nil { - return fmt.Errorf(`%s: delete_line_pattern %q: %w`, logp, pattern, err) + return fmt.Errorf(`%s: delete_line_after %q: %w`, logp, pattern, err) } - cml.reDeleteLine[x] = re + cml.reDeleteLineAfter[x] = re } return nil } diff --git a/spdxconv_test.go b/spdxconv_test.go index 77929e8..11a2a72 100644 --- a/spdxconv_test.go +++ b/spdxconv_test.go @@ -66,12 +66,22 @@ func TestInit(t *testing.T) { MatchLicense: []*matchLicense{{ Pattern: `^(//+|#+|/\*+|<!--+)?\s*(.*)governed by a BSD-style(.*)$`, LicenseIdentifier: `BSD-3-Clause`, - DeleteLinePattern: []string{ + DeleteLineBefore: []string{ + `^(//+|#+|/\*+|<!--+)$`, + }, + DeleteLineAfter: []string{ `^(//+|#+|/\*+|<!--+)?\s*license that can(.*)$`, + `^(//+|#+|\*+/|--+>)$`, }, }}, MatchCopyright: []*matchCopyright{{ Pattern: `^(//+|#+|/\*+|<!--+)?\s*Copyright\s+(?<year>\d{4}),?\s+(?<author>.*)\s+<(?<contact>.*)>.*$`, + DeleteLineBefore: []string{ + `^(//+|#+|/\*+|<!--+)$`, + }, + DeleteLineAfter: []string{ + `^(//+|#+|\*+/|--+>)$`, + }, }}, } for _, mfc := range exp.MatchFileComment { @@ -79,13 +89,25 @@ func TestInit(t *testing.T) { } for _, ml := range exp.MatchLicense { ml.rePattern = regexp.MustCompile(ml.Pattern) - for _, dlp := range ml.DeleteLinePattern { + for _, dlp := range ml.DeleteLineBefore { + re := regexp.MustCompile(dlp) + ml.reDeleteLineBefore = append(ml.reDeleteLineBefore, re) + } + for _, dlp := range ml.DeleteLineAfter { re := regexp.MustCompile(dlp) - ml.reDeleteLine = append(ml.reDeleteLine, re) + ml.reDeleteLineAfter = append(ml.reDeleteLineAfter, re) } } for _, mc := range exp.MatchCopyright { mc.rePattern = regexp.MustCompile(mc.Pattern) + for _, dlp := range mc.DeleteLineBefore { + re := regexp.MustCompile(dlp) + mc.reDeleteLineBefore = append(mc.reDeleteLineBefore, re) + } + for _, dlp := range mc.DeleteLineAfter { + re := regexp.MustCompile(dlp) + mc.reDeleteLineAfter = append(mc.reDeleteLineAfter, re) + } } test.Assert(t, `Init: loadConfig`, exp, conv.cfg) } @@ -111,7 +133,7 @@ func TestScan(t *testing.T) { //spdxconv:header:path,license_id,idx_license_id,year,copyright_id,idx_copyright_id //spdxconv:regular test.go,match,1,2022,match,0,// , -test.html,match,2,2022,match,1,<!-- ," -->" +test.html,match,4,2022,match,1,<!-- ," -->" test.sh,match,1,2022,match,0,# , //spdxconv:binary //spdxconv:unknown` diff --git a/testdata/Apply_test.txt b/testdata/Apply_test.txt index 123dc28..8107ee8 100644 --- a/testdata/Apply_test.txt +++ b/testdata/Apply_test.txt @@ -50,10 +50,12 @@ pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt| [match-license] pattern = "^(//+|#+|/\\*+|<!--+)?\\s*(.*)governed by a BSD-style(.*)$" license_identifier = BSD-3-Clause -delete_line_pattern = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$" +delete_line_after = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$" +delete_line_after = "^(//+|#+|\\*+/|--+>)$" [match-copyright] pattern = "^(//+|#+|/\\*+|<!--+)?\\s*Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<(?<contact>.*)>.*$" +delete_line_before = "^(//+|#+|/\\*+|<!--+)$" >>> without_spdx_license_id.go 1 @@ -176,9 +178,6 @@ license that can be found in the LICENSE file. <<< with.html <!-- SPDX-License-Identifier: BSD-3-Clause --> <!-- SPDX-FileCopyrightText: 2022 Shulhan <ms@kilabit.info> --> -<!-- - ---> 1 2 diff --git a/testdata/loadConfig/config_exists/spdxconv.cfg b/testdata/loadConfig/config_exists/spdxconv.cfg index ec401a3..1f7ff68 100644 --- a/testdata/loadConfig/config_exists/spdxconv.cfg +++ b/testdata/loadConfig/config_exists/spdxconv.cfg @@ -3,13 +3,53 @@ [default] license_identifier = GPL-3.0-only +copyright_year = 2026 file_copyright_text = Author <contact@email.local> max_line_match = 10 +[match-file-comment] +pattern = "^.*\\.(adoc|asciidoc|c|cc|cpp|cs|dart|go|h|hh|hpp|java|js|jsx|jsonc|kt|kts|php|rs|sass|scss|swift|ts|tsx)$" +prefix = "//" + +[match-file-comment] +pattern = "^.*\\.(aff|bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$" +prefix = "#" + +[match-file-comment] +pattern = "^.*\\.(css)$" +prefix = "/*" +suffix = "*/" + +[match-file-comment] +pattern = "^.*\\.(fxml|htm|html|html5|kml|markdown|md|xml)$" +prefix = "<!--" +suffix = "-->" + +[match-file-comment] +pattern = "^.*\\.(lua|sql)$" +prefix = "--" + +[match-file-comment] +pattern = "^.*\\.(rst)$" +prefix = ".." + +[match-file-comment] +pattern = "^.*\\.(tex)$" +prefix = "%" + +# File name that match with this pattern will have the ".license" file +# created. +[match-file-comment] +pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$" + [match-license] -pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$" +pattern = "^(//+|#+|/\\*+|<!--+)?\\s*(.*)governed by a BSD-style(.*)$" license_identifier = BSD-3-Clause -delete_line_pattern = "^(//+|#+)\\s+license that(.*)$" +delete_line_before = "^(//+|#+|/\\*+|<!--+)$" +delete_line_after = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$" +delete_line_after = "^(//+|#+|\\*+/|--+>)$" [match-copyright] -pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<contact>.*)>.*$" +pattern = "^(//+|#+|/\\*+|<!--+)?\\s*Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<(?<contact>.*)>.*$" +delete_line_before = "^(//+|#+|/\\*+|<!--+)$" +delete_line_after = "^(//+|#+|\\*+/|--+>)$" diff --git a/testdata/scan/spdxconv.cfg b/testdata/scan/spdxconv.cfg index 3b137b6..1cdfefb 100644 --- a/testdata/scan/spdxconv.cfg +++ b/testdata/scan/spdxconv.cfg @@ -42,10 +42,19 @@ prefix = "%" [match-file-comment] pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$" +# File name that match with this pattern will have the ".license" file +# created. +[match-file-comment] +pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$" + [match-license] pattern = "^(//+|#+|/\\*+|<!--+)?\\s*(.*)governed by a BSD-style(.*)$" license_identifier = BSD-3-Clause -delete_line_pattern = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$" +delete_line_before = "^(//+|#+|/\\*+|<!--+)$" +delete_line_after = "^(//+|#+|/\\*+|<!--+)?\\s*license that can(.*)$" +delete_line_after = "^(//+|#+|\\*+/|--+>)$" [match-copyright] pattern = "^(//+|#+|/\\*+|<!--+)?\\s*Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<(?<contact>.*)>.*$" +delete_line_before = "^(//+|#+|/\\*+|<!--+)$" +delete_line_after = "^(//+|#+|\\*+/|--+>)$" diff --git a/testdata/scan/test.html b/testdata/scan/test.html index da2491e..bcfc0c3 100644 --- a/testdata/scan/test.html +++ b/testdata/scan/test.html @@ -1,5 +1,11 @@ <!-- Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved. +--> +<!-- Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. --->
\ No newline at end of file +--> + +1 +2 +3 |
