aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2026-01-09 03:44:34 +0700
committerShulhan <ms@kilabit.info>2026-01-09 04:01:38 +0700
commit2094be654563456e33dd9432b2538037fbc18e92 (patch)
treef1569370ec21178f90cc7902733aa7690ccf0fa4
parentbf2b9dd3c9bf0a52b8b102a8cb2763027a27be01 (diff)
downloadspdxconv-2094be654563456e33dd9432b2538037fbc18e92.tar.xz
all: implement the scan command
The scan command scan the files that need to be converted or inserted with SPDX identifiers in the current directory. The result of scan is stored inside a report file named "spdxconv.report". There are no other files modified after scan completed. User then can inspect and modify the report to exclude certain files or changes the behaviour of apply command. Deleting a line in the report means excluding the file from being processed by "apply" command.
-rw-r--r--.gitignore1
-rw-r--r--LICENSES/CC0-1.0.txt121
-rw-r--r--README.md110
-rw-r--r--cmd/spdxconv/main.go11
-rw-r--r--config.go9
-rw-r--r--config_match_copyright.go52
-rw-r--r--file.go135
-rw-r--r--go.mod9
-rw-r--r--go.sum12
-rw-r--r--report.go73
-rw-r--r--spdxconv.go45
-rw-r--r--spdxconv_test.go35
-rw-r--r--testdata/loadConfig/config_exists/spdxconv.cfg2
-rw-r--r--testdata/scan/spdxconv.cfg16
14 files changed, 596 insertions, 35 deletions
diff --git a/.gitignore b/.gitignore
index 73f1fa2..cebcb10 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ _doc/index.html
cover.html
cover.out
cover.txt
+testdata/scan/spdxconv.report
diff --git a/LICENSES/CC0-1.0.txt b/LICENSES/CC0-1.0.txt
new file mode 100644
index 0000000..0e259d4
--- /dev/null
+++ b/LICENSES/CC0-1.0.txt
@@ -0,0 +1,121 @@
+Creative Commons Legal Code
+
+CC0 1.0 Universal
+
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+ LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+ ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+ INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+ REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+ PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+ THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+ HEREUNDER.
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not
+limited to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display,
+ communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+ likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+ v. rights protecting the extraction, dissemination, use and reuse of data
+ in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation
+ thereof, including any amended or successor version of such
+ directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+ world based on applicable law or treaty, and any national
+ implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention
+of, applicable law, Affirmer hereby overtly, fully, permanently,
+irrevocably and unconditionally waives, abandons, and surrenders all of
+Affirmer's Copyright and Related Rights and associated claims and causes
+of action, whether now known or unknown (including existing as well as
+future claims and causes of action), in the Work (i) in all territories
+worldwide, (ii) for the maximum duration provided by applicable law or
+treaty (including future time extensions), (iii) in any current or future
+medium and for any number of copies, and (iv) for any purpose whatsoever,
+including without limitation commercial, advertising or promotional
+purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+member of the public at large and to the detriment of Affirmer's heirs and
+successors, fully intending that such Waiver shall not be subject to
+revocation, rescission, cancellation, termination, or any other legal or
+equitable action to disrupt the quiet enjoyment of the Work by the public
+as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason
+be judged legally invalid or ineffective under applicable law, then the
+Waiver shall be preserved to the maximum extent permitted taking into
+account Affirmer's express Statement of Purpose. In addition, to the
+extent the Waiver is so judged Affirmer hereby grants to each affected
+person a royalty-free, non transferable, non sublicensable, non exclusive,
+irrevocable and unconditional license to exercise Affirmer's Copyright and
+Related Rights in the Work (i) in all territories worldwide, (ii) for the
+maximum duration provided by applicable law or treaty (including future
+time extensions), (iii) in any current or future medium and for any number
+of copies, and (iv) for any purpose whatsoever, including without
+limitation commercial, advertising or promotional purposes (the
+"License"). The License shall be deemed effective as of the date CC0 was
+applied by Affirmer to the Work. Should any part of the License for any
+reason be judged legally invalid or ineffective under applicable law, such
+partial invalidity or ineffectiveness shall not invalidate the remainder
+of the License, and in such case Affirmer hereby affirms that he or she
+will not (i) exercise any of his or her remaining Copyright and Related
+Rights in the Work or (ii) assert any associated claims and causes of
+action with respect to the Work, in either case contrary to Affirmer's
+express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+ warranties of any kind concerning the Work, express, implied,
+ statutory or otherwise, including without limitation warranties of
+ title, merchantability, fitness for a particular purpose, non
+ infringement, or the absence of latent or other defects, accuracy, or
+ the present or absence of errors, whether or not discoverable, all to
+ the greatest extent permissible under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without
+ limitation any person's Copyright and Related Rights in the Work.
+ Further, Affirmer disclaims responsibility for obtaining any necessary
+ consents, permissions or other rights required for any use of the
+ Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to
+ this CC0 or use of the Work.
diff --git a/README.md b/README.md
index 16ec812..f83f73d 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ copyright as stated.
User then can repeat edit "spdxconv.cfg", "scan" and "apply" command
multiple times, until they satisfied with the result.
-### init command
+## init command
The first thing to do is to generate the configuration file using
@@ -117,19 +117,115 @@ If there is "delete_line_pattern" defined, it will search for line that match
with that pattern and delete it.
The "delete_line_pattern" can be defined zero or multiple times.
-### scan command
+## scan command
The scan command scan the files that need to be converted or inserted with
SPDX identifiers in the current directory.
+The result of scan is stored inside a report file named "spdxconv.report".
+There are no other files modified after scan completed.
-The result of scan is stored inside a file named "spdxconv.report".
-User then can inspect and modify the report to see and edit which files needs
-to proceed or not.
+User then can inspect and modify the report to exclude certain files or
+changes the behaviour of apply command.
+Deleting a line in the report means excluding the file from being processed
+by "apply" command.
-### apply command
+### spdxconv.report file format
+
+Each line in the report file has several columns separated by space
+
+```
+path license_id year copyright_id
+```
+
+where each column has the following values,
+
+```
+path = `"` { unicode_char } `"`
+
+license_id = "default" | exist_at_index | license_text
+ license_text = 1*{ unicode_char }
+ exist_at_index = "exists:" decimal_digit
+
+year = "unknown" / 4*decimal_digit
+ / 4*decimal_digit "-" 4*decimal_digit
+
+copyright_id = `"` ("default" | exist_at_index | copyright_text) `"`
+ copyright_text = author contact
+ author = 1*{ unicode_char }
+ contact = "<" 1*{ unicode_char } ">"
+```
+
+The `path` column define the path to the file that will be processed by
+`apply` command.
+Its value must be enclosed in double quote.
+
+The `license_id` column define the license identifier to be used.
+The value either,
+
+- default - insert new identifier and using the default license_identifier
+ value from configuration.
+- exist:index - the SPDX-License-Identifier already exist in the file at
+ line `index`.
+ Positive index means from the top, and negative means from the bottom.
+- license_text - the value of license_identifier from one of match-license
+ section.
+
+The `year` column define the copyright year for the work.
+The value is either,
+
+- unknown - program cannot detect year
+- YYYY - single year, for example 2026
+- YYYY-YYYY - range of years, for example 2000-2026
+
+The `copyright_id` define the author and contact.
+Its value must be enclosed in double quote.
+The value is either,
+
+- default - insert new identifier using the default copyright_text value from
+ configuration.
+- exist:index - the SPDX-FileCopyrightText already exist in the file at
+ line `index`.
+ Positive index means from the top, and negative means from the bottom.
+- copyright_text - the value of author and contact regex match group from
+ one of the match-copyright pattern.
+ Its value must be enclosed in double quote.
+
+### spdxconv.report file groups
+
+Each file in the report file is collected into three groups: regular,
+binary, and unknown files.
+Each group is separated by line prefixed with "//" and identifier,
+
+```
+//spdxconv:regular
+...
+//spdxconv:binary
+...
+//spdxconv:unknown
+...
+```
+
+Regular group are the file where program can detect the syntax of comment
+inside it.
+Program will insert the new SPDX identifiers into the file using the
+detected comment syntax.
+
+Binary group are non-text file, for example images (like jpg, png) or
+executable files.
+For binary file, program will create new file, in the same directory, with
+the same file name as binary file plus additional suffix ".license".
+Inside those "$name.license" file, the new SPDX identifiers will be inserted
+as defined in the report.
+
+Unknown group are regular file where program cannot detect the comment
+syntax used inside it.
+This files will not be processed, it is listed here so user can modify the
+configuration and rerun the scan command for the next cycle.
+
+## apply command
The apply command read the "spdxconv.report" and apply the license and
-copyright as stated on each file in the report.
+copyright as stated on each line in the report.
## References
diff --git a/cmd/spdxconv/main.go b/cmd/spdxconv/main.go
index f69fd5d..c6bb931 100644
--- a/cmd/spdxconv/main.go
+++ b/cmd/spdxconv/main.go
@@ -28,6 +28,17 @@ func main() {
}
return
+ case `scan`:
+ path := flag.Arg(1)
+ if path == `` {
+ path = `.`
+ }
+ err := spdxconv.Scan(path)
+ if err != nil {
+ log.Fatal(err)
+ }
+ return
+
case `help`:
usage()
os.Exit(1)
diff --git a/config.go b/config.go
index 879ba7e..1de5d7f 100644
--- a/config.go
+++ b/config.go
@@ -29,7 +29,8 @@ type config struct {
LicenseIdentifier string `ini:"default::license_identifier"`
FileCopyrightText string `ini:"default::file_copyright_text"`
- MatchLicense []*configMatchLicense `ini:"match-license"`
+ MatchLicense []*configMatchLicense `ini:"match-license"`
+ MatchCopyright []*configMatchCopyright `ini:"match-copyright"`
MaxLineMatch int `ini:"default::max_line_match"`
}
@@ -51,5 +52,11 @@ func (cfg *config) init() (err error) {
return err
}
}
+ for _, cmc := range cfg.MatchCopyright {
+ err = cmc.init()
+ if err != nil {
+ return err
+ }
+ }
return nil
}
diff --git a/config_match_copyright.go b/config_match_copyright.go
new file mode 100644
index 0000000..1dc9dfd
--- /dev/null
+++ b/config_match_copyright.go
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+import (
+ "fmt"
+ "regexp"
+)
+
+type configMatchCopyright struct {
+ rePattern *regexp.Regexp
+
+ // Pattern to be searched in file.
+ Pattern string `ini:"match-copyright::pattern"`
+
+ year string
+ author string
+ contact string
+}
+
+func (cmc *configMatchCopyright) init() (err error) {
+ var logp = `match-copyright`
+ if cmc.Pattern != `` {
+ cmc.rePattern, err = regexp.Compile(cmc.Pattern)
+ if err != nil {
+ return fmt.Errorf(`%s: pattern %q: %w`, logp, cmc.Pattern, err)
+ }
+ }
+ return nil
+}
+
+func (cmc *configMatchCopyright) match(line string) bool {
+ matches := cmc.rePattern.FindStringSubmatch(line)
+ if len(matches) == 0 {
+ return false
+ }
+ namedMatches := map[string]string{}
+ for x, name := range cmc.rePattern.SubexpNames() {
+ if x != 0 && name != `` {
+ namedMatches[name] = matches[x]
+ }
+ }
+ cmc.year = namedMatches[`year`]
+ cmc.author = namedMatches[`author`]
+ cmc.contact = namedMatches[`contact`]
+ return len(namedMatches) >= 1
+}
+
+func (cmc *configMatchCopyright) String() string {
+ return fmt.Sprintf(`%s <%s>`, cmc.author, cmc.contact)
+}
diff --git a/file.go b/file.go
index 9ba8d5b..c10cdb7 100644
--- a/file.go
+++ b/file.go
@@ -9,6 +9,15 @@ import (
"os"
"regexp"
"slices"
+
+ libos "git.sr.ht/~shulhan/pakakeh.go/lib/os"
+)
+
+// List of values for licenseID, copyrightYear, and copyrightText.
+const (
+ valDefault = `default` // Use the default value from configuration.
+ valExist = `exist` // The license/copyright exist in the file.
+ valUnknown = `unknown`
)
// REUSE-IgnoreStart
@@ -17,6 +26,9 @@ import (
// comment prefix.
var reLicenseID = regexp.MustCompile(`^(//+|#+|/\*+|<!--+)?\s?SPDX-License-Identifier:.*$`)
+// reCopyrightText regex to detect SPDX copyright text.
+var reCopyrightText = regexp.MustCompile(`^(//+|#+|/\*+|<!--+)?\s?SPDX-FileCopyrightText:.*$`)
+
// REUSE-IgnoreEnd
type file struct {
@@ -28,14 +40,23 @@ type file struct {
commentPrefix string
commentSuffix string
+ licenseID string
+ copyrightYear string
+ copyrightText string
+
lines [][]byte
topLines [][]byte
bottomLines [][]byte
- // idxLicenseID index of License-Identifier in the topLines.
- idxLicenseID int
+ // idxLicenseID index of License-Identifier.
+ // If its positive means the index is found at the topLines, if its
+ // negative means the index found at the bottomLines.
+ idxLicenseID int
+ idxCopyrightText int
hasSheBang bool
+ isBinary bool // True if file is binary.
+ isUnknown bool // True if file is regular with unknown comment.
}
func newFile(path string, maxLine int) (f *file, err error) {
@@ -46,10 +67,19 @@ func newFile(path string, maxLine int) (f *file, err error) {
}
f = &file{
- path: path,
- lines: bytes.Split(content, []byte{'\n'}),
- idxLicenseID: -1,
+ path: path,
+ idxLicenseID: -1,
+ licenseID: valDefault,
+ copyrightYear: valUnknown,
+ copyrightText: valDefault,
+ isBinary: libos.IsBinaryStream(content),
+ }
+ if f.isBinary {
+ return f, nil
}
+
+ f.lines = bytes.Split(content, []byte{'\n'})
+
nline := len(f.lines)
if nline < maxLine*2 {
f.topLines = f.lines
@@ -62,6 +92,15 @@ func newFile(path string, maxLine int) (f *file, err error) {
return f, nil
}
+func (f *file) scan(conv *SPDXConv) {
+ f.detectComment()
+ if f.isUnknown {
+ return
+ }
+ f.scanLicenseID(conv)
+ f.scanCopyrightText(conv)
+}
+
// apply the SPDX identifier to file.
func (f *file) apply(conv *SPDXConv) {
f.detectComment()
@@ -94,6 +133,76 @@ func (f *file) detectComment() {
return
}
}
+ f.isUnknown = true
+}
+
+func (f *file) scanLicenseID(conv *SPDXConv) {
+ for _, cml := range conv.cfg.MatchLicense {
+ for x, line := range f.topLines {
+ if reLicenseID.Match(line) {
+ f.idxLicenseID = x
+ f.licenseID = fmt.Sprintf("%s:%d", valExist, f.idxLicenseID)
+ if f.hasSheBang && x == 1 {
+ return
+ }
+ if x == 0 {
+ return
+ }
+ return
+ }
+ if cml.rePattern.Match(line) {
+ f.licenseID = cml.LicenseIdentifier
+ return
+ }
+ }
+ for x, line := range f.bottomLines {
+ if reLicenseID.Match(line) {
+ f.idxLicenseID = x * -1
+ f.licenseID = fmt.Sprintf("%s:%d", valExist, f.idxLicenseID)
+ return
+ }
+ if cml.rePattern.Match(line) {
+ f.licenseID = cml.LicenseIdentifier
+ return
+ }
+ }
+ }
+ f.licenseID = valDefault
+}
+
+func (f *file) scanCopyrightText(conv *SPDXConv) {
+ for _, cmc := range conv.cfg.MatchCopyright {
+ for x, line := range f.topLines {
+ if reCopyrightText.Match(line) {
+ f.idxCopyrightText = x
+ f.copyrightText = fmt.Sprintf("%s:%d", valExist, f.idxCopyrightText)
+ if f.hasSheBang && x == 1 {
+ return
+ }
+ if x == 0 {
+ return
+ }
+ return
+ }
+ if cmc.match(string(line)) {
+ f.copyrightText = cmc.String()
+ return
+ }
+ }
+ for x, line := range f.bottomLines {
+ if reLicenseID.Match(line) {
+ f.idxCopyrightText = x * -1
+ f.licenseID = fmt.Sprintf("%s:%d", valExist, f.idxCopyrightText)
+ return
+ }
+ if cmc.match(string(line)) {
+ f.copyrightText = cmc.String()
+ return
+ }
+ }
+ }
+ f.licenseID = valDefault
+
}
// applyLicenseID check and insert the SPDX-License-Identifier.
@@ -103,8 +212,6 @@ func (f *file) detectComment() {
// If one found at the top, but not at the first line, or at the
// bottom, move it to the first line, after shebang.
func (f *file) applyLicenseID(conv *SPDXConv) {
- var licenseID string
-
for _, cml := range conv.cfg.MatchLicense {
for x, line := range f.topLines {
if reLicenseID.Match(line) {
@@ -120,14 +227,14 @@ func (f *file) applyLicenseID(conv *SPDXConv) {
return
}
if cml.rePattern.Match(line) {
- licenseID = cml.LicenseIdentifier
+ f.licenseID = cml.LicenseIdentifier
if cml.DeleteMatch {
f.topLines = slices.Delete(f.topLines, x, x+1)
}
f.deleteLinePattern(f.topLines[x:], cml.reDeleteLine)
}
}
- if licenseID != `` {
+ if f.licenseID != valDefault {
break
}
for x, line := range f.bottomLines {
@@ -137,23 +244,23 @@ func (f *file) applyLicenseID(conv *SPDXConv) {
return
}
if cml.rePattern.Match(line) {
- licenseID = cml.LicenseIdentifier
+ f.licenseID = cml.LicenseIdentifier
if cml.DeleteMatch {
f.bottomLines = slices.Delete(f.bottomLines, x, x+1)
}
f.deleteLinePattern(f.bottomLines[x:], cml.reDeleteLine)
}
}
- if licenseID != `` {
+ if f.licenseID != valDefault {
break
}
}
- if licenseID == `` {
- licenseID = conv.cfg.LicenseIdentifier
+ if f.licenseID == valDefault {
+ f.licenseID = conv.cfg.LicenseIdentifier
}
// REUSE-IgnoreStart
line := fmt.Sprintf("%sSPDX-License-Identifier: %s%s",
- f.commentPrefix, licenseID, f.commentSuffix)
+ f.commentPrefix, f.licenseID, f.commentSuffix)
// REUSE-IgnoreEnd
f.insertLicenseID([]byte(line))
}
diff --git a/go.mod b/go.mod
index ec171fe..f42d1e6 100644
--- a/go.mod
+++ b/go.mod
@@ -5,6 +5,11 @@ module git.sr.ht/~shulhan/spdxconv
go 1.24.0
-require git.sr.ht/~shulhan/pakakeh.go v0.60.2
+require git.sr.ht/~shulhan/pakakeh.go v0.60.3-0.20260108173808-34693c1b7ab1
-require golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 // indirect
+require (
+ golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 // indirect
+ golang.org/x/mod v0.31.0 // indirect
+ golang.org/x/sync v0.19.0 // indirect
+ golang.org/x/tools v0.40.0 // indirect
+)
diff --git a/go.sum b/go.sum
index e98afd1..bce51b2 100644
--- a/go.sum
+++ b/go.sum
@@ -1,4 +1,12 @@
-git.sr.ht/~shulhan/pakakeh.go v0.60.2 h1:ZSRE77lYm+mkhvg9pSrxCIO81ydbqt93qbsWuZJpjtI=
-git.sr.ht/~shulhan/pakakeh.go v0.60.2/go.mod h1:1MkKXbLZRHTcnheeSEbRpGztkym4Yxzh90ep+jCxbDc=
+git.sr.ht/~shulhan/pakakeh.go v0.60.3-0.20260108173808-34693c1b7ab1 h1:vxhrVP1emFV6B+ZkUEwFcjDuUCBeXIC49H8FTrMlg/4=
+git.sr.ht/~shulhan/pakakeh.go v0.60.3-0.20260108173808-34693c1b7ab1/go.mod h1:1MkKXbLZRHTcnheeSEbRpGztkym4Yxzh90ep+jCxbDc=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 h1:fQsdNF2N+/YewlRZiricy4P1iimyPKZ/xwniHj8Q2a0=
golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93/go.mod h1:EPRbTFwzwjXj9NpYyyrvenVh9Y+GFeEvMNh7Xuz7xgU=
+golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI=
+golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg=
+golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
+golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
+golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=
diff --git a/report.go b/report.go
new file mode 100644
index 0000000..295299a
--- /dev/null
+++ b/report.go
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+)
+
+// ReportFile contains the list of files that will be converted or inserted
+// with SPDX identifiers.
+const ReportFile = `spdxconv.report`
+
+type report struct {
+ listRegular []*file
+ listBinary []*file
+ listUnknown []*file
+}
+
+func newReport() (rep *report) {
+ rep = &report{}
+ return rep
+}
+
+func (rep *report) scan(conv *SPDXConv, listFile []string) (err error) {
+ var logp = `report.scan`
+ for _, file := range listFile {
+ f, err := newFile(file, conv.cfg.MaxLineMatch)
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+ if f.isBinary {
+ rep.listBinary = append(rep.listBinary, f)
+ continue
+ }
+ f.scan(conv)
+ if f.isUnknown {
+ rep.listUnknown = append(rep.listUnknown, f)
+ continue
+ }
+ rep.listRegular = append(rep.listRegular, f)
+ }
+ return nil
+}
+
+func (rep *report) write() (err error) {
+ var buf bytes.Buffer
+ buf.WriteString("//spdxconv:regular\n")
+ for _, file := range rep.listRegular {
+ fmt.Fprintf(&buf, "%q %s %s %q\n", file.path, file.licenseID,
+ file.copyrightYear, file.copyrightText)
+ }
+
+ buf.WriteString("//spdxconv:binary\n")
+ for _, file := range rep.listBinary {
+ fmt.Fprintf(&buf, "%q %s %s %q\n", file.path, file.licenseID,
+ file.copyrightYear, file.copyrightText)
+ }
+
+ buf.WriteString("//spdxconv:unknown\n")
+ for _, file := range rep.listUnknown {
+ fmt.Fprintf(&buf, "%q %s %s %q\n", file.path, file.licenseID,
+ file.copyrightYear, file.copyrightText)
+ }
+
+ err = os.WriteFile(ReportFile, buf.Bytes(), 0600)
+ if err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/spdxconv.go b/spdxconv.go
index 25ebec0..73b2ad3 100644
--- a/spdxconv.go
+++ b/spdxconv.go
@@ -49,7 +49,7 @@ func Apply(path string) (err error) {
}
var listFile []string
if conv.name == `` {
- listFile, err = conv.scanFiles([]string{conv.dir})
+ listFile, err = conv.scanDir([]string{conv.dir})
} else {
listFile, err = conv.scanFile(conv.dir, conv.name)
}
@@ -73,6 +73,44 @@ func Init() (err error) {
return err
}
+// Scan the files that need to be converted or inserted with SPDX identifiers
+// in the current directory.
+//
+// The result of scan is stored inside a file named "spdxconv.report".
+// User then can inspect and modify the report, to remove the file from being
+// processed or changes the value of license, year, or copyright text.
+func Scan(path string) (err error) {
+ var logp = `Scan`
+
+ var conv *SPDXConv
+ conv, err = New(path)
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ var listFile []string
+ if conv.name == `` {
+ listFile, err = conv.scanDir([]string{path})
+ } else {
+ listFile, err = conv.scanFile(conv.dir, conv.name)
+ }
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ rep := newReport()
+ err = rep.scan(conv, listFile)
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+ err = rep.write()
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ return nil
+}
+
// New initialize new instance of SPDXConv.
func New(path string) (conv *SPDXConv, err error) {
var logp = `New`
@@ -182,15 +220,16 @@ func (conv *SPDXConv) scanFile(dir, name string) (listFile []string, err error)
return listFile, nil
}
-// scanFiles list file to be processed in directory `dir`, recursively.
+// scanDir list file to be processed in directory `dir`, recursively.
// A file ignored by ".gitignore" file will be excluded.
// A common ignore file or directory name likes ".git", "node_modules", and
// "vendor"; also will be excluded.
-func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error) {
+func (conv *SPDXConv) scanDir(listDir []string) (listFile []string, err error) {
var commonIgnore = map[string]struct{}{
`.git`: struct{}{},
`node_modules`: struct{}{},
ConfigFile: struct{}{},
+ ReportFile: struct{}{},
`vendor`: struct{}{},
}
var suffixLicense = `.license`
diff --git a/spdxconv_test.go b/spdxconv_test.go
index 236468d..9d2fe6c 100644
--- a/spdxconv_test.go
+++ b/spdxconv_test.go
@@ -28,6 +28,31 @@ func TestInit(t *testing.T) {
test.Assert(t, `Init`, configTemplate, string(got))
}
+func TestScan(t *testing.T) {
+ const scanDir = `testdata/scan/`
+ t.Chdir(scanDir)
+
+ err := Scan(`.`)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ got, err := os.ReadFile(ReportFile)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ exp := `//spdxconv:regular
+".gitignore" exist:0 unknown "exist:1"
+"test.go" BSD-3-Clause unknown "Shulhan <ms@kilabit.info>"
+"test.html" default unknown "default"
+"test.sh" BSD-3-Clause unknown "Shulhan <ms@kilabit.info>"
+//spdxconv:binary
+//spdxconv:unknown`
+
+ test.Assert(t, `Scan: `+scanDir, exp, string(got))
+}
+
func TestNew(t *testing.T) {
type testCase struct {
exp *SPDXConv
@@ -87,7 +112,7 @@ func TestSPDXConv_loadConfig(t *testing.T) {
dir: `testdata/loadConfig/config_exists`,
exp: config{
LicenseIdentifier: `GPL-3.0-only`,
- FileCopyrightText: `Author <author@email.info>`,
+ FileCopyrightText: `Author <contact@email.local>`,
MaxLineMatch: 10,
MatchLicense: []*configMatchLicense{{
Pattern: `^(//+|#+)\s+(.*)governed by a BSD-style(.*)$`,
@@ -101,6 +126,10 @@ func TestSPDXConv_loadConfig(t *testing.T) {
regexp.MustCompile(`^(//+|#+)\s+license that(.*)$`),
},
}},
+ MatchCopyright: []*configMatchCopyright{{
+ Pattern: `^(//+|#+)\s+Copyright\s+(?<year>\d{4}),?\s+(?<author>.*)\s+<*(?<contact>.*)>.*$`,
+ rePattern: regexp.MustCompile(`^(//+|#+)\s+Copyright\s+(?<year>\d{4}),?\s+(?<author>.*)\s+<*(?<contact>.*)>.*$`),
+ }},
},
}}
@@ -152,7 +181,7 @@ func TestSPDXConv_scanFile(t *testing.T) {
}
}
-func TestSPDXConv_scanFiles(t *testing.T) {
+func TestSPDXConv_scanDir(t *testing.T) {
type testCase struct {
dir string
exp []string
@@ -183,7 +212,7 @@ func TestSPDXConv_scanFiles(t *testing.T) {
if err != nil {
t.Fatal(err)
}
- got, err = conv.scanFiles([]string{tc.dir})
+ got, err = conv.scanDir([]string{tc.dir})
if err != nil {
t.Fatal(err)
}
diff --git a/testdata/loadConfig/config_exists/spdxconv.cfg b/testdata/loadConfig/config_exists/spdxconv.cfg
index c1a11e5..1defeef 100644
--- a/testdata/loadConfig/config_exists/spdxconv.cfg
+++ b/testdata/loadConfig/config_exists/spdxconv.cfg
@@ -3,7 +3,7 @@
[default]
license_identifier = GPL-3.0-only
-file_copyright_text = Author <author@email.info>
+file_copyright_text = Author <contact@email.local>
max_line_match = 10
[match-license]
diff --git a/testdata/scan/spdxconv.cfg b/testdata/scan/spdxconv.cfg
index b4e39c6..1defeef 100644
--- a/testdata/scan/spdxconv.cfg
+++ b/testdata/scan/spdxconv.cfg
@@ -1,4 +1,16 @@
-// SPDX-License-Identifier: GPL-3.0-only
-// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+# SPDX-License-Identifier: CC0-1.0
+# SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info>
[default]
+license_identifier = GPL-3.0-only
+file_copyright_text = Author <contact@email.local>
+max_line_match = 10
+
+[match-license]
+pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$"
+license_identifier = BSD-3-Clause
+delete_match = true
+delete_line_pattern = "^(//+|#+)\\s+license that(.*)$"
+
+[match-copyright]
+pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<contact>.*)>.*$"