diff options
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | README.md | 86 | ||||
| -rw-r--r-- | config.go | 29 | ||||
| -rw-r--r-- | config_match_license.go | 23 | ||||
| -rw-r--r-- | go.mod | 7 | ||||
| -rw-r--r-- | go.sum | 4 | ||||
| -rw-r--r-- | scm.go | 16 | ||||
| -rw-r--r-- | spdxconv.go | 202 | ||||
| -rw-r--r-- | spdxconv_test.go | 152 | ||||
| -rw-r--r-- | testdata/New_test.txt | 36 | ||||
| -rw-r--r-- | testdata/loadConfig/config_exists/spdxconv.cfg | 14 | ||||
| -rw-r--r-- | testdata/scan/.gitignore | 4 | ||||
| -rw-r--r-- | testdata/scan/test.go | 3 | ||||
| -rw-r--r-- | testdata/scan/test.html | 5 | ||||
| -rw-r--r-- | testdata/scan/test.sh | 3 |
15 files changed, 586 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3782838 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +cover.html +cover.out diff --git a/README.md b/README.md new file mode 100644 index 0000000..6113d3c --- /dev/null +++ b/README.md @@ -0,0 +1,86 @@ +<!-- +SPDX-License-Identifier: GPL-3.0-only +SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> +--> + +# spdxconv + +spdxconv is a tool to convert and insert the SPDX license headers. + +## Background + +Converting the license and copyright in the source codes to become compliant +with the SPDX headers is very tedious works, especially if we have so many +files with different year, copyright, and licenses. + +This program help to do that by using pattern-matching, search, replace, and +sometimes deletion. + +## Usage + +The first thing to do is to generate the configuration file using + +``` +$ spdxconv init +``` + +This will create the `spdxconv.cfg` file in the current directory with the +following content, + +``` +[default] +license_identifier = +file_copyright_text = +max_line_match = 10 + +[match_license] +pattern = ^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$ +license_identifier = BSD-3-Clause +delete_match = true +delete_line_pattern = ^(//+|#+)*\s+license that(.*)$ + +[match_copyright] +pattern = ^(//+|#+)*\s+Copyright\s+(?<year>\d{4},?\s+(?<holder>.*)\s+<*(?<email>.*)>.*$ +``` + +The next subsection explain each of this section in the configuration file. + +### default section + +First is the `[default]` section. +This section define the default license identifier and copyright text to be +inserted into file if no match_license or match_copyright found on the file. +You should fill the `license_identifier` and `file_copyright_text` before +continue running the program. + +The `max_line_match` define the number of lines to be searched at the +top and bottom of file for match_license and match_copyright before the +program insert the default values. + +### match_license section + +The first thing that the program do is search for line that match +"SPDX-License-Identifier:". + +If there is a match, at the top or bottom, the scan will stop. + +If there is no match it will search for a line that match with "pattern" +regular expression. +If there is a line that match with it, the value in +"match_license::license_identifier" will replace the +"default::license_identifier" value. +If the "delete_match" is true, it will delete the line from the file. +If there is "delete_line_pattern" defined, it will search for line that match +with that regular expression and delete it. +The "delete_line_pattern" can be defined zero or more times. + +## References + +[SPDX License List](https://spdx.org/licenses/). +The SPDX License List includes a standardized short identifier, the full +name, the license text, and a canonical permanent URL for each license and +exception. + +[REUSE FAQ](https://reuse.software/faq/). +This page lists common questions and their answers when dealing with +licensing and copyright, and with the adoption of REUSE specifically. diff --git a/config.go b/config.go new file mode 100644 index 0000000..962b0ad --- /dev/null +++ b/config.go @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package spdxconv + +import "errors" + +type config struct { + LicenseIdentifier string `ini:"default::license_identifier"` + FileCopyrightText string `ini:"default::file_copyright_text"` + + MatchLicense []configMatchLicense `ini:"match-license"` + + MaxLineMatch int `ini:"default::max_line_match"` +} + +func (cfg *config) init() (err error) { + const defMaxLineMatch = 10 + if cfg.LicenseIdentifier == `` { + return errors.New(`empty default license_identifier`) + } + if cfg.FileCopyrightText == `` { + return errors.New(`empty default file_copyright_text`) + } + if cfg.MaxLineMatch <= 0 { + cfg.MaxLineMatch = defMaxLineMatch + } + return nil +} diff --git a/config_match_license.go b/config_match_license.go new file mode 100644 index 0000000..11e09fe --- /dev/null +++ b/config_match_license.go @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package spdxconv + +type configMatchLicense struct { + // Pattern to be searched in file. + Pattern string `ini:"match-license::pattern"` + + // LicenseIdentifier that replace the default "license_identifier" + // value if Pattern match. + LicenseIdentifier string `ini:"match-license::license_identifier"` + + // DeleteLinePattern zero or more pattern that will be search after + // Pattern match line. + // A line that match with this pattern will be deleted. + // An empty line stop the search. + DeleteLinePattern []string `ini:"match-license::delete_line_pattern"` + + // DeleteMatch if set to true, the line that match with Pattern will + // be deleted. + DeleteMatch bool `ini:"match-license::delete_match"` +} @@ -0,0 +1,7 @@ +module git.sr.ht/~shulhan/spdxconv + +go 1.24.0 + +require git.sr.ht/~shulhan/pakakeh.go v0.60.2 + +require golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 // indirect @@ -0,0 +1,4 @@ +git.sr.ht/~shulhan/pakakeh.go v0.60.2 h1:ZSRE77lYm+mkhvg9pSrxCIO81ydbqt93qbsWuZJpjtI= +git.sr.ht/~shulhan/pakakeh.go v0.60.2/go.mod h1:1MkKXbLZRHTcnheeSEbRpGztkym4Yxzh90ep+jCxbDc= +golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 h1:fQsdNF2N+/YewlRZiricy4P1iimyPKZ/xwniHj8Q2a0= +golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93/go.mod h1:EPRbTFwzwjXj9NpYyyrvenVh9Y+GFeEvMNh7Xuz7xgU= @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package spdxconv + +// sourceCodeManagement define the interface for SCM tools. +type sourceCodeManagement interface { + IsIgnored(path string) bool +} + +// dummySCM is a no scm. It always return false on IsIgnored. +type dummySCM struct{} + +func (scm *dummySCM) IsIgnored(path string) bool { + return false +} diff --git a/spdxconv.go b/spdxconv.go new file mode 100644 index 0000000..5f28e5a --- /dev/null +++ b/spdxconv.go @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package spdxconv + +import ( + "fmt" + "os" + "path/filepath" + + "git.sr.ht/~shulhan/pakakeh.go/lib/git" + "git.sr.ht/~shulhan/pakakeh.go/lib/ini" +) + +// SPDXConv the main type for converting files to SPDX format. +type SPDXConv struct { + scm sourceCodeManagement + + // dir define the directory that will processed. + // `name` is single file inside `dir` to be processed. + // If `name` is empty means process all files inside `dir`. + dir string + name string + + // scmDir define the root directory for source-code management. + // In git, the path that contains the ".git" directory. + scmDir string + + cfg config +} + +// Apply the SPDX license headers to all files inside the directory `path` or +// to single file only. +func Apply(path string) (err error) { + var logp = `Apply` + var conv *SPDXConv + + conv, err = New(path) + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } + var listFile []string + if conv.name == `` { + listFile, err = conv.scanFiles([]string{conv.dir}) + } else { + listFile, err = conv.scanFile(conv.dir, conv.name) + } + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } + + var pathFile string + for _, pathFile = range listFile { + conv.convert(pathFile) + } + return nil +} + +// New initialize new instance of SPDXConv +func New(path string) (conv *SPDXConv, err error) { + var logp = `New` + + path, err = filepath.Abs(path) + if err != nil { + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + + conv = &SPDXConv{} + var fi os.FileInfo + fi, err = os.Stat(path) + if err != nil { + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + + if fi.IsDir() { + conv.dir = path + } else { + conv.dir, conv.name = filepath.Split(path) + } + + err = conv.loadConfig(conv.dir) + if err != nil { + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + + err = conv.scanForSCM(conv.dir) + if err != nil { + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + + return conv, nil +} + +// loadConfig load the program configuration from file `spdxconv.cfg` in the +// current directory. +func (conv *SPDXConv) loadConfig(dir string) (err error) { + var logp = `loadConfig` + var pathcfg = filepath.Join(dir, `spdxconv.cfg`) + var rawcfg []byte + rawcfg, err = os.ReadFile(pathcfg) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return fmt.Errorf(`%s %s: %w`, logp, pathcfg, err) + } + err = ini.Unmarshal(rawcfg, &conv.cfg) + if err != nil { + return fmt.Errorf(`%s %s: %w`, logp, pathcfg, err) + } + err = conv.cfg.init() + if err != nil { + return fmt.Errorf(`%s %s: %w`, logp, pathcfg, err) + } + return nil +} + +// scanForSCM scan for source-code management (SCM) from directory `dir` up +// until the root. +// Currently, only support git. +func (conv *SPDXConv) scanForSCM(dir string) (err error) { + var scmDir string + var fi os.FileInfo + for dir != `/` { + scmDir = filepath.Join(dir, `.git`) + fi, err = os.Stat(scmDir) + if err != nil { + if os.IsNotExist(err) { + continue + } + return err + } + if !fi.IsDir() { + continue + } + conv.scmDir = dir + conv.scm, err = git.New(dir) + if err != nil { + return err + } + return nil + } + conv.scm = &dummySCM{} + return nil +} + +// scanFile check if the single file can be processed or not. +func (conv *SPDXConv) scanFile(dir, name string) (listFile []string, err error) { + var pathName = filepath.Join(dir, name) + if conv.scm.IsIgnored(pathName) { + return nil, nil + } + listFile = append(listFile, pathName) + return listFile, nil +} + +// scanFiles list file to be processed in directory `dir`, recursively. +// A file ignored by ".gitignore" file will be excluded. +// A common ignore file or directory name likes ".git", "node_modules", and +// "vendor"; also will be excluded. +func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error) { + var commonIgnore = map[string]struct{}{ + `.git`: struct{}{}, + `node_modules`: struct{}{}, + `vendor`: struct{}{}, + } + var dir string + var listde []os.DirEntry + var de os.DirEntry + var ok bool + for len(listDir) != 0 { + dir = listDir[0] + listDir = listDir[1:] + + listde, err = os.ReadDir(dir) + if err != nil { + return listFile, err + } + for _, de = range listde { + var name = de.Name() + if conv.scm.IsIgnored(name) { + continue + } + _, ok = commonIgnore[name] + if ok { + continue + } + var pathName = filepath.Join(dir, name) + if de.IsDir() { + listDir = append(listDir, pathName) + continue + } + listFile = append(listFile, pathName) + } + } + return listFile, nil +} + +// convert the given pathFile to SPDX license format. +func (conv *SPDXConv) convert(pathFile string) { + +} diff --git a/spdxconv_test.go b/spdxconv_test.go new file mode 100644 index 0000000..9c66aea --- /dev/null +++ b/spdxconv_test.go @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +package spdxconv + +import ( + "testing" + + "git.sr.ht/~shulhan/pakakeh.go/lib/git" + "git.sr.ht/~shulhan/pakakeh.go/lib/test" +) + +func TestNew(t *testing.T) { + type testCase struct { + exp *SPDXConv + dir string + expError string + } + + var testData *test.Data + var err error + testData, err = test.LoadData(`testdata/New_test.txt`) + if err != nil { + t.Fatal(err) + } + var tempDir = t.TempDir() + testData.ExtractInput(tempDir) + t.Chdir(tempDir) + + var gitRoot *git.Git + gitRoot, err = git.New(`.`) + if err != nil { + t.Fatal(err) + } + + var listCase = []testCase{{ + dir: `.`, + exp: &SPDXConv{ + dir: tempDir, + scmDir: tempDir, + scm: gitRoot, + }, + }} + var tc testCase + var conv *SPDXConv + for _, tc = range listCase { + conv, err = New(`.`) + if err != nil { + test.Assert(t, tc.dir+`: error`, tc.expError, err.Error()) + continue + } + test.Assert(t, tc.dir, tc.exp, conv) + } +} + +func TestSPDXConv_loadConfig(t *testing.T) { + type testCase struct { + dir string + exp config + } + var listCase = []testCase{{ + dir: `testdata/loadConfig/config_not_exists`, + exp: config{}, + }, { + dir: `testdata/loadConfig/config_exists`, + exp: config{ + LicenseIdentifier: `GPL-3.0-only`, + FileCopyrightText: `Author <author@email.info>`, + MaxLineMatch: 10, + MatchLicense: []configMatchLicense{{ + Pattern: `^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`, + LicenseIdentifier: `BSD-3-Clause`, + DeleteLinePattern: []string{ + `^(//+|#+)*\s*$`, + `^(//+|#+)*\s+license that(.*)$`, + }, + DeleteMatch: true, + }}, + }, + }} + + var conv = SPDXConv{} + var tc testCase + var err error + for _, tc = range listCase { + conv.dir = tc.dir + err = conv.loadConfig(tc.dir) + if err != nil { + t.Fatal(err) + } + test.Assert(t, tc.dir, tc.exp, conv.cfg) + } +} + +func TestSPDXConv_scanFile(t *testing.T) { + var conv *SPDXConv + var err error + conv, err = New(`testdata/scan/`) + if err != nil { + t.Fatal(err) + } + + type testCase struct { + dir string + name string + exp []string + } + var listCase = []testCase{{ + dir: `.`, + name: `test.go`, + exp: []string{`test.go`}, + }, { + dir: `.`, + name: `test.html`, + exp: []string{`test.html`}, + }, { + dir: `vendor`, + name: `dummy`, + }} + var tc testCase + for _, tc = range listCase { + got, err := conv.scanFile(tc.dir, tc.name) + if err != nil { + t.Fatal(err) + } + test.Assert(t, tc.dir+"/"+tc.name, tc.exp, got) + } +} + +func TestSPDXConv_scanFiles(t *testing.T) { + var conv *SPDXConv + var err error + + conv, err = New(`testdata/scan/`) + if err != nil { + t.Fatal(err) + } + + var got []string + got, err = conv.scanFiles([]string{`testdata/scan/`}) + if err != nil { + t.Fatal(err) + } + + var exp = []string{ + `testdata/scan/.gitignore`, + `testdata/scan/test.go`, + `testdata/scan/test.html`, + `testdata/scan/test.sh`, + } + test.Assert(t, `scanFiles: testdata/scan/`, exp, got) +} diff --git a/testdata/New_test.txt b/testdata/New_test.txt new file mode 100644 index 0000000..2b718d3 --- /dev/null +++ b/testdata/New_test.txt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-3.0-only +// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info> + +>>> .git/config +# Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +>>> .gitignore +node_modules/ +*.html +!test.html +vendor/ + +>>> node_modules/dummy +Dummy content for node_modules directory. + +>>> test.go +// Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +>>> test.html +<!-- +Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved. +Use of this source code is governed by a BSD-style +license that can be found in the LICENSE file. +--> + +>>> test.sh +# Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +>>> vendor/dummy +Dummy content for vendor directory. diff --git a/testdata/loadConfig/config_exists/spdxconv.cfg b/testdata/loadConfig/config_exists/spdxconv.cfg new file mode 100644 index 0000000..e4edf89 --- /dev/null +++ b/testdata/loadConfig/config_exists/spdxconv.cfg @@ -0,0 +1,14 @@ +[default] +license_identifier = GPL-3.0-only +file_copyright_text = Author <author@email.info> +max_line_match = 10 + +[match-license] +pattern = "^(//+|#+)*\\s+(.*)governed by a BSD-style(.*)$" +license_identifier = BSD-3-Clause +delete_match = true +delete_line_pattern = "^(//+|#+)*\\s*$" +delete_line_pattern = "^(//+|#+)*\\s+license that(.*)$" + +[match-copyright] +pattern = "^(//+|#+)*\\s+Copyright\\s+(?<year>\\d{4},?\\s+(?<holder>.*)\\s+<*(?<email>.*)>.*$" diff --git a/testdata/scan/.gitignore b/testdata/scan/.gitignore new file mode 100644 index 0000000..746a46e --- /dev/null +++ b/testdata/scan/.gitignore @@ -0,0 +1,4 @@ +node_modules/ +*.html +!test.html +vendor/
\ No newline at end of file diff --git a/testdata/scan/test.go b/testdata/scan/test.go new file mode 100644 index 0000000..46e2bd9 --- /dev/null +++ b/testdata/scan/test.go @@ -0,0 +1,3 @@ +// Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file.
\ No newline at end of file diff --git a/testdata/scan/test.html b/testdata/scan/test.html new file mode 100644 index 0000000..da2491e --- /dev/null +++ b/testdata/scan/test.html @@ -0,0 +1,5 @@ +<!-- +Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved. +Use of this source code is governed by a BSD-style +license that can be found in the LICENSE file. +-->
\ No newline at end of file diff --git a/testdata/scan/test.sh b/testdata/scan/test.sh new file mode 100644 index 0000000..8fd6949 --- /dev/null +++ b/testdata/scan/test.sh @@ -0,0 +1,3 @@ +# Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file.
\ No newline at end of file |
