aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2026-01-12 20:22:20 +0700
committerShulhan <ms@kilabit.info>2026-01-12 21:22:24 +0700
commit1d6f8cfc60be1ada414ac1a9ce8eb6f61a527a0f (patch)
treeef0e990b8cd554e7ff6e9068339a1ce0485687fb
parent2a8ab762eabbe53f358eecd337872957d1afa01c (diff)
downloadspdxconv-1d6f8cfc60be1ada414ac1a9ce8eb6f61a527a0f.tar.xz
all: implement match-file-comment for seting comment based on file name
The first thing that the program do is to detect which comment string to be used when inserting SPDX identifiers in the file. For each pattern in the "match-file-comment" section, the program will match it with file name to get the comment prefix and suffix to be used later. User can add their own "match-file-comment" section as they like or modify the existing one. The "match-file-comment" can have empty prefix and suffix. That means, if the file name match, it will create new file with ".license" suffix that contains SPDX identifiers only, instead of inserting to the file.
-rw-r--r--README.md100
-rw-r--r--config.go73
-rw-r--r--file.go58
-rw-r--r--file_test.go48
-rw-r--r--match_file_comment.go42
-rw-r--r--spdxconv.go12
-rw-r--r--spdxconv_test.go90
-rw-r--r--testdata/Apply_test.txt68
-rw-r--r--testdata/Init/.gitignore5
-rw-r--r--testdata/scan/spdxconv.cfg35
10 files changed, 404 insertions, 127 deletions
diff --git a/README.md b/README.md
index 6a028c9..179c043 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,20 @@ SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info>
# spdxconv
-spdxconv is a tool to convert and insert the SPDX identifiers.
+spdxconv is a tool to convert existing license and copyright into
+[SPDX](https://spdx.dev/)
+or insert the new identifiers.
+
+This tool works in tandem with [REUSE software](https://reuse.software).
+
+Features,
+
+- Customizable values for default license identifier and copyright
+- Customizable pattern for setting comment syntax based on file name
+- Customizable pattern for searching and capturing existing license through
+ regex
+- Customizable pattern for searching and capturing existing copyright year,
+ author, and contact through regex
## Background
@@ -59,6 +72,41 @@ license_identifier =
file_copyright_text =
max_line_match = 10
+[match-file-comment]
+pattern = "^.*\\.(adoc|asciidoc|c|cc|cpp|cs|dart|go|h|hh|hpp|java|js|jsx|jsonc|kt|kts|php|rs|sass|scss|swift|ts|tsx)$"
+comment_prefix = "//"
+
+[match-file-comment]
+pattern = "^.*\\.(bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$"
+prefix = "#"
+
+[match-file-comment]
+pattern = "^.*\\.(css)$"
+prefix = "/*"
+suffix = "*/"
+
+[match-file-comment]
+pattern = "^.*\\.(fxml|htm|html|html5|kml|markdown|md|xml)$"
+prefix = "<!--"
+suffix = "-->"
+
+[match-file-comment]
+pattern = "^.*\\.(lua|sql)$"
+prefix = "--"
+
+[match-file-comment]
+pattern = "^.*\\.(rst)$"
+prefix = ".."
+
+[match-file-comment]
+pattern = "^.*\\.(tex)$"
+prefix = "%"
+
+# File name that match with this pattern will have the ".license" file
+# created.
+[match-file-comment]
+pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$"
+
[match-license]
pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$"
license_identifier = BSD-3-Clause
@@ -69,8 +117,12 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
```
The configuration use the `ini` file format.
-You need to modify it by filling the "default" section, and can add another
-match-license and match-copyright pattern as required.
+You need to modify it by filling the "default" section before running the
+`scan` or `apply` command.
+
+You can add match-file-comment, match-license and match-copyright
+section as required, or modify the existing one to match with your use case.
+
For quick references here are several rules that you need to be aware of,
- The regex value must be enclosed in double quote
@@ -93,12 +145,28 @@ top and bottom of file for `SPDX-*` identifiers, `match-license`, and
`match-copyright` before the program insert the default values.
The default values is 10.
+### match-file-comment section
+
+The first thing that the program do is to detect which comment string to be
+used when inserting SPDX identifiers in the file.
+
+For each pattern in the "match-file-comment" section, the program will match
+it with file name to get the comment prefix and suffix to be used later.
+
+User can add their own "match-file-comment" section as they like or modify
+the existing one.
+
+The "match-file-comment" can have empty prefix and suffix.
+That means, if the file name match, it will create new file with ".license"
+suffix that contains SPDX identifiers only, instead of inserting to the file
+directly.
+
### match-license section
<!-- REUSE-IgnoreStart -->
-The first thing that the program do is search for line that match
-with "SPDX-License-Identifier:".
+After program detect the file comment syntax to use, then it will search for
+line that match with "SPDX-License-Identifier:".
<!-- REUSE-IgnoreEnd -->
@@ -208,22 +276,22 @@ identifier,
...
```
-Regular group are the file where program can detect the syntax of comment
-inside it.
+Regular group are list of file where program can detect its file comment to
+be used.
Program will insert the new SPDX identifiers into the file using the
-detected comment syntax.
+comment syntax.
-Binary group are non-text file, for example images (like jpg, png) or
-executable files.
-For binary file, program will create new file with the same name as binary
-file plus additional suffix ".license".
+Binary group are list of non-text file, for example images (like jpg, png)
+or executable files.
+For binary file, program will create new file with the same name plus
+additional suffix ".license".
Inside those "$name.license" file, the new SPDX identifiers will be inserted
as defined in the report.
-Unknown group are regular file where program cannot detect the comment
-syntax used inside it.
-This files will not be processed, it is listed here so user can modify the
-configuration and rerun the scan command for the next cycle.
+Unknown group are list of file where program cannot detect the file comment
+to be used.
+This files will not be processed, it is listed here so user can inspect,
+modify the configuration, and rerun the scan command for the next cycle.
## apply command
diff --git a/config.go b/config.go
index 0fd2705..7b3bd04 100644
--- a/config.go
+++ b/config.go
@@ -5,6 +5,8 @@ package spdxconv
import (
"errors"
+
+ "git.sr.ht/~shulhan/pakakeh.go/lib/ini"
)
var configTemplate string = `# SPDX-License-Identifier: CC0-1.0
@@ -15,6 +17,41 @@ license_identifier =
file_copyright_text =
max_line_match = 10
+[match-file-comment]
+pattern = "^.*\\.(adoc|asciidoc|c|cc|cpp|cs|dart|go|h|hh|hpp|java|js|jsx|jsonc|kt|kts|php|rs|sass|scss|swift|ts|tsx)$"
+prefix = "//"
+
+[match-file-comment]
+pattern = "^.*\\.(aff|bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$"
+prefix = "#"
+
+[match-file-comment]
+pattern = "^.*\\.(css)$"
+prefix = "/*"
+suffix = "*/"
+
+[match-file-comment]
+pattern = "^.*\\.(fxml|htm|html|html5|kml|markdown|md|xml)$"
+prefix = "<!--"
+suffix = "-->"
+
+[match-file-comment]
+pattern = "^.*\\.(lua|sql)$"
+prefix = "--"
+
+[match-file-comment]
+pattern = "^.*\\.(rst)$"
+prefix = ".."
+
+[match-file-comment]
+pattern = "^.*\\.(tex)$"
+prefix = "%"
+
+# File name that match with this pattern will have the ".license" file
+# created.
+[match-file-comment]
+pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$"
+
[match-license]
pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$"
license_identifier = BSD-3-Clause
@@ -28,23 +65,37 @@ type config struct {
LicenseIdentifier string `ini:"default::license_identifier"`
FileCopyrightText string `ini:"default::file_copyright_text"`
+ MatchFileComment []*matchFileComment `ini:"match-file-comment"`
+
MatchLicense []*configMatchLicense `ini:"match-license"`
MatchCopyright []*configMatchCopyright `ini:"match-copyright"`
MaxLineMatch int `ini:"default::max_line_match"`
}
-func (cfg *config) init() (err error) {
- const defMaxLineMatch = 10
- if cfg.LicenseIdentifier == `` {
- return errors.New(`empty default license_identifier`)
+func (cfg *config) parse(raw []byte) (err error) {
+ err = ini.Unmarshal(raw, cfg)
+ if err != nil {
+ return err
}
- if cfg.FileCopyrightText == `` {
- return errors.New(`empty default file_copyright_text`)
+ err = cfg.init()
+ if err != nil {
+ return err
}
+ return nil
+}
+
+func (cfg *config) init() (err error) {
+ const defMaxLineMatch = 10
if cfg.MaxLineMatch <= 0 {
cfg.MaxLineMatch = defMaxLineMatch
}
+ for _, cfc := range cfg.MatchFileComment {
+ err = cfc.init()
+ if err != nil {
+ return err
+ }
+ }
for _, cml := range cfg.MatchLicense {
err = cml.init()
if err != nil {
@@ -59,3 +110,13 @@ func (cfg *config) init() (err error) {
}
return nil
}
+
+func (cfg *config) validate() (err error) {
+ if cfg.LicenseIdentifier == `` {
+ return errors.New(`empty default license_identifier`)
+ }
+ if cfg.FileCopyrightText == `` {
+ return errors.New(`empty default file_copyright_text`)
+ }
+ return nil
+}
diff --git a/file.go b/file.go
index 714a6c5..75842dd 100644
--- a/file.go
+++ b/file.go
@@ -7,8 +7,10 @@ import (
"bytes"
"fmt"
"os"
+ "path/filepath"
"regexp"
"slices"
+ "strings"
libos "git.sr.ht/~shulhan/pakakeh.go/lib/os"
)
@@ -116,55 +118,29 @@ func (f *file) initLines(content []byte, maxLine int) (err error) {
}
func (f *file) scan(conv *SPDXConv) {
- f.detectComment()
- if f.isUnknown {
+ f.detectComment(&conv.cfg)
+ if f.isBinary || f.isUnknown {
return
}
f.scanLicenseID(conv)
f.scanCopyrightText(conv)
}
-func (f *file) detectComment() {
- if bytes.HasPrefix(f.topLines[0], []byte(`#!`)) {
+// detectComment get comment prefix and suffix using the "match-file-comment"
+// pattern in the configuration.
+func (f *file) detectComment(cfg *config) {
+ if len(f.topLines) != 0 && bytes.HasPrefix(f.topLines[0], []byte(`#!`)) {
f.hasSheBang = true
- f.commentPrefix = `# `
- return
}
- for _, line := range f.topLines {
- if bytes.HasPrefix(line, []byte(`#`)) {
- f.commentPrefix = `# `
- return
- }
- if bytes.HasPrefix(line, []byte(`//`)) {
- f.commentPrefix = `// `
- return
- }
- if bytes.HasPrefix(line, []byte(`/*`)) {
- f.commentPrefix = `// `
- return
- }
- if bytes.HasPrefix(line, []byte(`<!--`)) {
- f.commentPrefix = `<!-- `
- f.commentSuffix = ` -->`
- return
- }
- }
- for _, line := range f.bottomLines {
- if bytes.HasPrefix(line, []byte(`#`)) {
- f.commentPrefix = `# `
- return
- }
- if bytes.HasPrefix(line, []byte(`//`)) {
- f.commentPrefix = `// `
- return
- }
- if bytes.HasPrefix(line, []byte(`/*`)) {
- f.commentPrefix = `// `
- return
- }
- if bytes.HasPrefix(line, []byte(`<!--`)) {
- f.commentPrefix = `<!-- `
- f.commentSuffix = ` -->`
+ var basename = strings.ToLower(filepath.Base(f.path))
+ for _, mfc := range cfg.MatchFileComment {
+ if mfc.rePattern.MatchString(basename) {
+ if mfc.isDirectLicense() {
+ f.isBinary = true
+ return
+ }
+ f.commentPrefix = mfc.Prefix
+ f.commentSuffix = mfc.Suffix
return
}
}
diff --git a/file_test.go b/file_test.go
index 1b3823c..20110ca 100644
--- a/file_test.go
+++ b/file_test.go
@@ -11,53 +11,65 @@ import (
func TestFile_detectComment(t *testing.T) {
type testCase struct {
- topLines [][]byte
- expFile file
+ f file
+ expFile file
}
listCase := []testCase{{
- topLines: [][]byte{
- []byte(`#!/bin/sh`),
+ f: file{
+ path: `test.sh`,
+ topLines: [][]byte{
+ []byte(`#!/bin/sh`),
+ },
},
expFile: file{
+ path: `test.sh`,
+ topLines: [][]byte{
+ []byte(`#!/bin/sh`),
+ },
commentPrefix: `# `,
hasSheBang: true,
},
}, {
- topLines: [][]byte{
- []byte(`# comment`),
+ f: file{
+ path: `test.rb`,
},
expFile: file{
+ path: `test.rb`,
commentPrefix: `# `,
},
}, {
- topLines: [][]byte{
- []byte(`// comment`),
+ f: file{
+ path: `test.c`,
},
expFile: file{
+ path: `test.c`,
commentPrefix: `// `,
},
}, {
- topLines: [][]byte{
- []byte(`/*`),
+ f: file{
+ path: `test.go`,
},
expFile: file{
+ path: `test.go`,
commentPrefix: `// `,
},
}, {
- topLines: [][]byte{
- []byte(`<!--`),
+ f: file{
+ path: `test.html`,
},
expFile: file{
+ path: `test.html`,
commentPrefix: `<!-- `,
commentSuffix: ` -->`,
},
}}
+ cfg := &config{}
+ err := cfg.parse([]byte(configTemplate))
+ if err != nil {
+ t.Fatal(err)
+ }
for _, tc := range listCase {
- f := file{
- topLines: tc.topLines,
- }
- f.detectComment()
- f.topLines = nil
- test.Assert(t, string(tc.topLines[0]), tc.expFile, f)
+ tc.f.detectComment(cfg)
+ test.Assert(t, tc.f.path, tc.expFile, tc.f)
}
}
diff --git a/match_file_comment.go b/match_file_comment.go
new file mode 100644
index 0000000..ac2db29
--- /dev/null
+++ b/match_file_comment.go
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+import (
+ "fmt"
+ "regexp"
+)
+
+// matchFileComment define the configuration to set the file comment prefix
+// and suffix based on pattern on file name.
+//
+// File name that have empty Prefix and Suffix and match with the pattern will
+// have the ".license" file created.
+type matchFileComment struct {
+ rePattern *regexp.Regexp
+
+ Pattern string `ini:"match-file-comment::pattern"`
+ Prefix string `ini:"match-file-comment::prefix"`
+ Suffix string `ini:"match-file-comment::suffix"`
+}
+
+func (mfc *matchFileComment) init() (err error) {
+ var logp = `match-file-comment`
+
+ mfc.rePattern, err = regexp.Compile(mfc.Pattern)
+ if err != nil {
+ return fmt.Errorf(`%s: pattern %q: %w`, logp, mfc.Pattern, err)
+ }
+ if mfc.Prefix != `` && mfc.Prefix[len(mfc.Prefix)-1] != ' ' {
+ mfc.Prefix += ` `
+ }
+ if mfc.Suffix != `` && mfc.Suffix[0] != ' ' {
+ mfc.Suffix = ` ` + mfc.Suffix
+ }
+ return nil
+}
+
+func (mfc *matchFileComment) isDirectLicense() bool {
+ return mfc.Prefix == `` && mfc.Suffix == ``
+}
diff --git a/spdxconv.go b/spdxconv.go
index d56b283..422f9fe 100644
--- a/spdxconv.go
+++ b/spdxconv.go
@@ -15,7 +15,6 @@ import (
"strings"
"git.sr.ht/~shulhan/pakakeh.go/lib/git"
- "git.sr.ht/~shulhan/pakakeh.go/lib/ini"
)
// ConfigFile the file name for configuration file.
@@ -99,6 +98,11 @@ func Apply() (err error) {
return fmt.Errorf(`%s: %w`, logp, err)
}
+ err = conv.cfg.validate()
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+
var rep *report
rep, err = loadReport()
if err != nil {
@@ -207,11 +211,7 @@ func (conv *SPDXConv) loadConfig(dir string) (err error) {
}
return fmt.Errorf(`%s %s: %w`, logp, pathcfg, err)
}
- err = ini.Unmarshal(rawcfg, &conv.cfg)
- if err != nil {
- return fmt.Errorf(`%s %s: %w`, logp, pathcfg, err)
- }
- err = conv.cfg.init()
+ err = conv.cfg.parse(rawcfg)
if err != nil {
return fmt.Errorf(`%s %s: %w`, logp, pathcfg, err)
}
diff --git a/spdxconv_test.go b/spdxconv_test.go
index 8fbeb7e..c057a71 100644
--- a/spdxconv_test.go
+++ b/spdxconv_test.go
@@ -13,8 +13,7 @@ import (
)
func TestInit(t *testing.T) {
- tempDir := t.TempDir()
- t.Chdir(tempDir)
+ t.Chdir(`testdata/Init/`)
err := Init()
if err != nil {
@@ -26,6 +25,69 @@ func TestInit(t *testing.T) {
t.Fatal(err)
}
test.Assert(t, `Init`, configTemplate, string(got))
+
+ // Test loading the config back, so we have consistent template and
+ // fields.
+
+ conv := SPDXConv{}
+ err = conv.loadConfig(`.`)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ exp := config{
+ MaxLineMatch: 10,
+ MatchFileComment: []*matchFileComment{{
+ Pattern: `^.*\.(adoc|asciidoc|c|cc|cpp|cs|dart|go|h|hh|hpp|java|js|jsx|jsonc|kt|kts|php|rs|sass|scss|swift|ts|tsx)$`,
+ Prefix: `// `,
+ }, {
+ Pattern: `^.*\.(aff|bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$`,
+ Prefix: `# `,
+ }, {
+ Pattern: `^.*\.(css)$`,
+ Prefix: `/* `,
+ Suffix: ` */`,
+ }, {
+ Pattern: `^.*\.(fxml|htm|html|html5|kml|markdown|md|xml)$`,
+ Prefix: `<!-- `,
+ Suffix: ` -->`,
+ }, {
+ Pattern: `^.*\.(lua|sql)$`,
+ Prefix: `-- `,
+ }, {
+ Pattern: `^.*\.(rst)$`,
+ Prefix: `.. `,
+ }, {
+ Pattern: `^.*\.(tex)$`,
+ Prefix: `% `,
+ }, {
+ Pattern: `^.*\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$`,
+ }},
+ MatchLicense: []*configMatchLicense{{
+ Pattern: `^(//+|#+)\s+(.*)governed by a BSD-style(.*)$`,
+ LicenseIdentifier: `BSD-3-Clause`,
+ DeleteLinePattern: []string{
+ `^(//+|#+)\s+license that(.*)$`,
+ },
+ }},
+ MatchCopyright: []*configMatchCopyright{{
+ Pattern: `^(//+|#+)?\s*Copyright\s+(?<year>\d{4}),?\s+(?<author>.*)\s+<*(?<contact>.*)>.*$`,
+ }},
+ }
+ for _, mfc := range exp.MatchFileComment {
+ mfc.rePattern = regexp.MustCompile(mfc.Pattern)
+ }
+ for _, ml := range exp.MatchLicense {
+ ml.rePattern = regexp.MustCompile(ml.Pattern)
+ for _, dlp := range ml.DeleteLinePattern {
+ re := regexp.MustCompile(dlp)
+ ml.reDeleteLine = append(ml.reDeleteLine, re)
+ }
+ }
+ for _, mc := range exp.MatchCopyright {
+ mc.rePattern = regexp.MustCompile(mc.Pattern)
+ }
+ test.Assert(t, `Init: loadConfig`, exp, conv.cfg)
}
func TestScan(t *testing.T) {
@@ -58,7 +120,7 @@ test.sh,match,1,unknown,match,0,# ,
}
func TestApply(t *testing.T) {
- // Populate the test files from `testdata/Apply_test.txt`.
+ // Populate the test files.
var testData *test.Data
var err error
@@ -151,28 +213,6 @@ func TestSPDXConv_loadConfig(t *testing.T) {
var listCase = []testCase{{
dir: `testdata/loadConfig/config_not_exists`,
exp: config{},
- }, {
- dir: `testdata/loadConfig/config_exists`,
- exp: config{
- LicenseIdentifier: `GPL-3.0-only`,
- FileCopyrightText: `Author <contact@email.local>`,
- MaxLineMatch: 10,
- MatchLicense: []*configMatchLicense{{
- Pattern: `^(//+|#+)\s+(.*)governed by a BSD-style(.*)$`,
- LicenseIdentifier: `BSD-3-Clause`,
- DeleteLinePattern: []string{
- `^(//+|#+)\s+license that(.*)$`,
- },
- rePattern: regexp.MustCompile(`^(//+|#+)\s+(.*)governed by a BSD-style(.*)$`),
- reDeleteLine: []*regexp.Regexp{
- regexp.MustCompile(`^(//+|#+)\s+license that(.*)$`),
- },
- }},
- MatchCopyright: []*configMatchCopyright{{
- Pattern: `^(//+|#+)\s+Copyright\s+(?<year>\d{4}),?\s+(?<author>.*)\s+<*(?<contact>.*)>.*$`,
- rePattern: regexp.MustCompile(`^(//+|#+)\s+Copyright\s+(?<year>\d{4}),?\s+(?<author>.*)\s+<*(?<contact>.*)>.*$`),
- }},
- },
}}
var conv = SPDXConv{}
diff --git a/testdata/Apply_test.txt b/testdata/Apply_test.txt
index 712c2bc..43360b4 100644
--- a/testdata/Apply_test.txt
+++ b/testdata/Apply_test.txt
@@ -4,13 +4,48 @@
// REUSE-IgnoreStart
>>> spdxconv.cfg
-# SPDX-License-Identifier: BSD-3-Clause
+# SPDX-License-Identifier: CC0-1.0
# SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info>
[default]
license_identifier = GPL-3.0-only
file_copyright_text = M. Shulhan <ms@kilabit.info>
+[match-file-comment]
+pattern = "^.*\\.(adoc|asciidoc|c|cc|cpp|cs|dart|go|h|hh|hpp|java|js|jsx|jsonc|kt|kts|php|rs|sass|scss|swift|ts|tsx)$"
+prefix = "//"
+
+[match-file-comment]
+pattern = "^.*\\.(bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$"
+prefix = "#"
+
+[match-file-comment]
+pattern = "^.*\\.(css)$"
+prefix = "/*"
+suffix = "*/"
+
+[match-file-comment]
+pattern = "^.*\\.(fxml|htm|html|html5|kml|markdown|md|xml)$"
+prefix = "<!--"
+suffix = "-->"
+
+[match-file-comment]
+pattern = "^.*\\.(lua|sql)$"
+prefix = "--"
+
+[match-file-comment]
+pattern = "^.*\\.(rst)$"
+prefix = ".."
+
+[match-file-comment]
+pattern = "^.*\\.(tex)$"
+prefix = "%"
+
+# File name that match with this pattern will have the ".license" file
+# created.
+[match-file-comment]
+pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$"
+
[match-license]
pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$"
license_identifier = BSD-3-Clause
@@ -19,23 +54,26 @@ delete_line_pattern = "^(//+|#+)\\s+license that(.*)$"
[match-copyright]
pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<contact>.*)>.*$"
->>> without_spdx_license_id.txt
+>>> without_spdx_license_id.go
1
2
3
-<<< without_spdx_license_id.txt
+<<< without_spdx_license_id.go
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: M. Shulhan <ms@kilabit.info>
+
1
2
3
->>> with_spdx_at_bottom.txt
+>>> with_spdx_at_bottom.go
1
2
3
// SPDX-License-Identifier: GPL-3.0-only
-<<< with_spdx_at_bottom.txt
+<<< with_spdx_at_bottom.go
// SPDX-License-Identifier: GPL-3.0-only
// SPDX-FileCopyrightText: M. Shulhan <ms@kilabit.info>
@@ -43,13 +81,13 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
2
3
->>> with_spdx_license_id_only.txt
+>>> with_spdx_license_id_only.go
// SPDX-License-Identifier: GPL-3.0-only
1
2
3
-<<< with_spdx_license_id_only.txt
+<<< with_spdx_license_id_only.go
// SPDX-License-Identifier: GPL-3.0-only
// SPDX-FileCopyrightText: M. Shulhan <ms@kilabit.info>
@@ -57,7 +95,7 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
2
3
->>> with_spdx.txt
+>>> with_spdx.go
// SPDX-License-Identifier: BSD-3-Clause
// SPDX-FileCopyrightText: 2018 Shulhan <ms@kilabit.info>
@@ -65,7 +103,7 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
2
3
-<<< with_spdx.txt
+<<< with_spdx.go
// SPDX-License-Identifier: BSD-3-Clause
// SPDX-FileCopyrightText: 2018 Shulhan <ms@kilabit.info>
@@ -73,7 +111,7 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
2
3
->>> with_spdx_no_order.txt
+>>> with_spdx_no_order.go
// SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info>
// SPDX-License-Identifier: BSD-3-Clause
@@ -81,7 +119,7 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
2
3
-<<< with_spdx_no_order.txt
+<<< with_spdx_no_order.go
// SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info>
// SPDX-License-Identifier: BSD-3-Clause
@@ -89,7 +127,7 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
2
3
->>> with_match_license.txt
+>>> with_match_license.go
// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
@@ -98,7 +136,7 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
2
3
-<<< with_match_license.txt
+<<< with_match_license.go
// SPDX-License-Identifier: BSD-3-Clause
// SPDX-FileCopyrightText: 2018 Shulhan <ms@kilabit.info>
@@ -106,7 +144,7 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
2
3
->>> with_match_license_bottom.txt
+>>> with_match_license_bottom.go
1
2
3
@@ -115,7 +153,7 @@ pattern = "^(//+|#+)\\s+Copyright\\s+(?<year>\\d{4}),?\\s+(?<author>.*)\\s+<*(?<
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-<<< with_match_license_bottom.txt
+<<< with_match_license_bottom.go
// SPDX-License-Identifier: BSD-3-Clause
// SPDX-FileCopyrightText: 2018 Shulhan <ms@kilabit.info>
diff --git a/testdata/Init/.gitignore b/testdata/Init/.gitignore
new file mode 100644
index 0000000..234c892
--- /dev/null
+++ b/testdata/Init/.gitignore
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info>
+
+/*
+!/.gitignore
diff --git a/testdata/scan/spdxconv.cfg b/testdata/scan/spdxconv.cfg
index ec401a3..d317d64 100644
--- a/testdata/scan/spdxconv.cfg
+++ b/testdata/scan/spdxconv.cfg
@@ -6,6 +6,41 @@ license_identifier = GPL-3.0-only
file_copyright_text = Author <contact@email.local>
max_line_match = 10
+[match-file-comment]
+pattern = "^.*\\.(adoc|asciidoc|c|cc|cpp|cs|dart|go|h|hh|hpp|java|js|jsx|jsonc|kt|kts|php|rs|sass|scss|swift|ts|tsx)$"
+prefix = "//"
+
+[match-file-comment]
+pattern = "^.*\\.(aff|bash|csh|dockerfile|env|gitignore|hcl|ipynb|make|pl|pm|py|ps1|rb|sh|tf|yaml|yml|zsh)$"
+prefix = "#"
+
+[match-file-comment]
+pattern = "^.*\\.(css)$"
+prefix = "/*"
+suffix = "*/"
+
+[match-file-comment]
+pattern = "^.*\\.(fxml|htm|html|html5|kml|markdown|md|xml)$"
+prefix = "<!--"
+suffix = "-->"
+
+[match-file-comment]
+pattern = "^.*\\.(lua|sql)$"
+prefix = "--"
+
+[match-file-comment]
+pattern = "^.*\\.(rst)$"
+prefix = ".."
+
+[match-file-comment]
+pattern = "^.*\\.(tex)$"
+prefix = "%"
+
+# File name that match with this pattern will have the ".license" file
+# created.
+[match-file-comment]
+pattern = "^.*\\.(apk|app|bz2|csv|doc|docx|exe|gif|gz|jpeg|jpg|json|pdf|png|ppt|pptx|svg|svgz|tar|tgz|xls|xlsx|zip)$"
+
[match-license]
pattern = "^(//+|#+)\\s+(.*)governed by a BSD-style(.*)$"
license_identifier = BSD-3-Clause