diff options
| author | Shulhan <ms@kilabit.info> | 2026-01-14 15:41:31 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2026-01-14 17:00:11 +0700 |
| commit | a6f72c549c43ae9d2d74946769221cb3431e9458 (patch) | |
| tree | 3c709143e14ecdd137091b43e8b8a5f31e9f3daa | |
| parent | b37a4cd18827d6a8dafe2729ba72a8e13e8fbb8c (diff) | |
| download | spdxconv-a6f72c549c43ae9d2d74946769221cb3431e9458.tar.xz | |
all: detect annotation from REUSE configuration
During scan, the program will read the REUSE.toml configuration.
File that is already annotated inside REUSE.toml will be ignored during
scan.
| -rw-r--r-- | README.md | 4 | ||||
| -rw-r--r-- | go.mod | 5 | ||||
| -rw-r--r-- | go.sum | 6 | ||||
| -rw-r--r-- | reuse_config.go | 93 | ||||
| -rw-r--r-- | reuse_config_test.go | 52 | ||||
| -rw-r--r-- | spdxconv.go | 35 | ||||
| -rw-r--r-- | spdxconv_test.go | 3 | ||||
| -rw-r--r-- | testdata/scan/REUSE.toml | 13 | ||||
| -rw-r--r-- | testdata/scan/po/included.go | 7 | ||||
| -rw-r--r-- | testdata/scan/po/test.po | 1 | ||||
| -rw-r--r-- | testdata/scan/po/test.pot | 1 |
11 files changed, 209 insertions, 11 deletions
@@ -13,6 +13,7 @@ This tool works in tandem with [REUSE software](https://reuse.software). Features, +- Detect annotations from REUSE configuration (REUSE.toml) - Customizable values for default license identifier and copyright - Customizable pattern for setting comment syntax based on file name - Customizable pattern for searching and capturing existing license through @@ -249,6 +250,9 @@ by "apply" command. The scan command work in the following way, +(0) Skip the file if its ignored by git or already annotated in REUSE.toml +configuration. + (1) Check the file for SPDX-License-Identifier and SPDX-FileCopyrightText. If both exist, skip the file. @@ -5,7 +5,10 @@ module git.sr.ht/~shulhan/spdxconv go 1.24.0 -require git.sr.ht/~shulhan/pakakeh.go v0.60.3-0.20260113140641-d18ec5d8635b +require ( + git.sr.ht/~shulhan/pakakeh.go v0.60.3-0.20260114074200-83aa06572a83 + github.com/BurntSushi/toml v1.6.0 +) require ( golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 // indirect @@ -1,5 +1,7 @@ -git.sr.ht/~shulhan/pakakeh.go v0.60.3-0.20260113140641-d18ec5d8635b h1:tPRWSIjzQTWoQJ4WG2zJFP1GybZuvpkniKjjJc4gvJ8= -git.sr.ht/~shulhan/pakakeh.go v0.60.3-0.20260113140641-d18ec5d8635b/go.mod h1:1MkKXbLZRHTcnheeSEbRpGztkym4Yxzh90ep+jCxbDc= +git.sr.ht/~shulhan/pakakeh.go v0.60.3-0.20260114074200-83aa06572a83 h1:7sKv4zQd+o5XC0hmNV+qjAlexpkrlkC5Rseh6WRagWM= +git.sr.ht/~shulhan/pakakeh.go v0.60.3-0.20260114074200-83aa06572a83/go.mod h1:1MkKXbLZRHTcnheeSEbRpGztkym4Yxzh90ep+jCxbDc= +github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= +github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 h1:fQsdNF2N+/YewlRZiricy4P1iimyPKZ/xwniHj8Q2a0= diff --git a/reuse_config.go b/reuse_config.go new file mode 100644 index 0000000..3079762 --- /dev/null +++ b/reuse_config.go @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info> + +package spdxconv + +import ( + "fmt" + "os" + + "git.sr.ht/~shulhan/pakakeh.go/lib/git" + "github.com/BurntSushi/toml" +) + +// ReuseConfigFile file name of REUSE configuration. +const ReuseConfigFile = `REUSE.toml` + +type annotation struct { + precedence string + fileCopyrightText string + licenseIdentifier string + + path []string + + includePattern []git.IgnorePattern +} + +func (ann *annotation) UnmarshalTOML(raw any) (err error) { + data, _ := raw.(map[string]any) + for k, v := range data { + switch k { + case `path`: + switch vv := v.(type) { + case string: + ann.path = append(ann.path, vv) + case []any: + for _, p := range vv { + path, _ := p.(string) + ann.path = append(ann.path, path) + } + } + case `precedence`: + ann.precedence, _ = v.(string) + case `SPDX-FileCopyrightText`: + ann.fileCopyrightText, _ = v.(string) + case `SPDX-License-Identifier`: + ann.licenseIdentifier, _ = v.(string) + } + } + return nil +} + +// reuseConfig define the struct for storing REUSE.toml configuration. +type reuseConfig struct { + ListAnnotation []*annotation `toml:"annotations"` + + Version int64 +} + +func loadReuseConfig(path string) (reuse *reuseConfig, err error) { + var logp = `loadReuseConfig` + content, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + reuse = &reuseConfig{} + err = toml.Unmarshal(content, reuse) + if err != nil { + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + for _, ann := range reuse.ListAnnotation { + for _, path := range ann.path { + ign := git.ParseIgnorePattern([]byte(path)) + ann.includePattern = append(ann.includePattern, ign) + } + } + return reuse, nil +} + +// isAnnotated return true if the path is included in one of the annotations +// table. +func (reuse *reuseConfig) isAnnotated(path string) bool { + for _, ann := range reuse.ListAnnotation { + for _, pat := range ann.includePattern { + if pat.IsMatch(path) { + return true + } + } + } + return false +} diff --git a/reuse_config_test.go b/reuse_config_test.go new file mode 100644 index 0000000..110daae --- /dev/null +++ b/reuse_config_test.go @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2026 M. Shulhan <ms@kilabit.info> + +package spdxconv + +import ( + "testing" + + "git.sr.ht/~shulhan/pakakeh.go/lib/git" + "git.sr.ht/~shulhan/pakakeh.go/lib/test" +) + +func TestLoadReuseConfig(t *testing.T) { + listCase := []struct { + path string + exp *reuseConfig + expError string + }{{ + path: `testdata/scan/REUSE.toml`, + exp: &reuseConfig{ + Version: 1, + ListAnnotation: []*annotation{{ + includePattern: []git.IgnorePattern{ + git.ParseIgnorePattern([]byte(`po/*.po`)), + git.ParseIgnorePattern([]byte(`po/*.pot`)), + }, + path: []string{`po/*.po`, `po/*.pot`}, + precedence: `aggregate`, + fileCopyrightText: `2026 M. Shulhan <ms@kilabit.info>`, + licenseIdentifier: `GPL-3.0-only`, + }, { + includePattern: []git.IgnorePattern{ + git.ParseIgnorePattern([]byte(`tests/resources/**`)), + }, + path: []string{`tests/resources/**`}, + precedence: `override`, + fileCopyrightText: `2026 M. Shulhan <ms@kilabit.info>`, + licenseIdentifier: `CC0-1.0`, + }}, + }, + }, { + path: `not_exist`, + }} + for _, tc := range listCase { + got, err := loadReuseConfig(tc.path) + if err != nil { + test.Assert(t, tc.path+` error`, tc.expError, err.Error()) + continue + } + test.Assert(t, tc.path, tc.exp, got) + } +} diff --git a/spdxconv.go b/spdxconv.go index 466b0ee..778ec51 100644 --- a/spdxconv.go +++ b/spdxconv.go @@ -26,6 +26,9 @@ var suffixLicense = `.license` // SPDXConv the main type for converting files to SPDX format. type SPDXConv struct { + // reuse contains the REUSE.toml configuration. + reuse *reuseConfig + scm sourceCodeManagement // curDir contains the current working directory. @@ -65,6 +68,8 @@ func Scan(path string) (err error) { return fmt.Errorf(`%s: %w`, logp, err) } + conv.loadReuseConfig() + var listFile []string if conv.name == `` { listFile, err = conv.scanDir([]string{path}) @@ -218,6 +223,16 @@ func (conv *SPDXConv) loadConfig(dir string) (err error) { return nil } +// loadReuseConfig load the REUSE.toml configuration from current directory. +func (conv *SPDXConv) loadReuseConfig() { + path := filepath.Join(conv.curDir, ReuseConfigFile) + var err error + conv.reuse, err = loadReuseConfig(path) + if err != nil { + return + } +} + // scanForSCM scan for source-code management (SCM) from directory `dir` until // the current working directory. // Currently, only support git. @@ -267,14 +282,15 @@ func (conv *SPDXConv) scanFile(dir, name string) (listFile []string, err error) func (conv *SPDXConv) scanDir(listDir []string) (listFile []string, err error) { var logp = `scanDir` var commonIgnore = map[string]struct{}{ - `.git`: struct{}{}, - ConfigFile: struct{}{}, - `COPYING`: struct{}{}, - `LICENSE`: struct{}{}, - `LICENSES`: struct{}{}, // Directory contains licenses. - `node_modules`: struct{}{}, - ReportFile: struct{}{}, - `vendor`: struct{}{}, + `.git`: struct{}{}, + ConfigFile: struct{}{}, + `COPYING`: struct{}{}, + `LICENSE`: struct{}{}, + `LICENSES`: struct{}{}, // Directory contains licenses. + `node_modules`: struct{}{}, + ReportFile: struct{}{}, + ReuseConfigFile: struct{}{}, + `vendor`: struct{}{}, } var dir string @@ -309,6 +325,9 @@ func (conv *SPDXConv) scanDir(listDir []string) (listFile []string, err error) { if conv.scm.IsIgnored(relpath) { continue } + if conv.reuse != nil && conv.reuse.isAnnotated(relpath) { + continue + } if de.IsDir() { listDir = append(listDir, relpath) continue diff --git a/spdxconv_test.go b/spdxconv_test.go index f473a2e..fd318b8 100644 --- a/spdxconv_test.go +++ b/spdxconv_test.go @@ -135,6 +135,7 @@ func TestScan(t *testing.T) { //spdxconv:regular // no_copyright_year.md,match,2,2026,default,0,<!-- ," -->" +po/included.go,match,1,2026,match,0,// , test.go,match,1,2022,match,0,// , test.html,match,4,2022,match,1,<!-- ," -->" test.sh,default,0,2026,default,0,# , @@ -337,6 +338,7 @@ func TestSPDXConv_scanDir(t *testing.T) { `.gitignore`, `a/b/.gitignore`, `no_copyright_year.md`, + `po/included.go`, `test.go`, `test.html`, `test.sh`, @@ -362,6 +364,7 @@ func TestSPDXConv_scanDir(t *testing.T) { if err != nil { tt.Fatal(err) } + conv.loadReuseConfig() got, err = conv.scanDir([]string{`.`}) if err != nil { tt.Fatal(err) diff --git a/testdata/scan/REUSE.toml b/testdata/scan/REUSE.toml new file mode 100644 index 0000000..3621d08 --- /dev/null +++ b/testdata/scan/REUSE.toml @@ -0,0 +1,13 @@ +version = 1 + +[[annotations]] +path = ["po/*.po", "po/*.pot"] +precedence = "aggregate" +SPDX-FileCopyrightText = "2026 M. Shulhan <ms@kilabit.info>" +SPDX-License-Identifier = "GPL-3.0-only" + +[[annotations]] +path = "tests/resources/**" +precedence = "override" +SPDX-FileCopyrightText = "2026 M. Shulhan <ms@kilabit.info>" +SPDX-License-Identifier = "CC0-1.0" diff --git a/testdata/scan/po/included.go b/testdata/scan/po/included.go new file mode 100644 index 0000000..d499f94 --- /dev/null +++ b/testdata/scan/po/included.go @@ -0,0 +1,7 @@ +// Copyright 2026, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package po + +// This file not annotated by REUSE.toml. diff --git a/testdata/scan/po/test.po b/testdata/scan/po/test.po new file mode 100644 index 0000000..327d157 --- /dev/null +++ b/testdata/scan/po/test.po @@ -0,0 +1 @@ +This file is annotated by REUSE.toml. diff --git a/testdata/scan/po/test.pot b/testdata/scan/po/test.pot new file mode 100644 index 0000000..327d157 --- /dev/null +++ b/testdata/scan/po/test.pot @@ -0,0 +1 @@ +This file is annotated by REUSE.toml. |
