aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--README.md86
-rw-r--r--config.go29
-rw-r--r--config_match_license.go23
-rw-r--r--go.mod7
-rw-r--r--go.sum4
-rw-r--r--scm.go16
-rw-r--r--spdxconv.go202
-rw-r--r--spdxconv_test.go152
-rw-r--r--testdata/New_test.txt36
-rw-r--r--testdata/loadConfig/config_exists/spdxconv.cfg14
-rw-r--r--testdata/scan/.gitignore4
-rw-r--r--testdata/scan/test.go3
-rw-r--r--testdata/scan/test.html5
-rw-r--r--testdata/scan/test.sh3
15 files changed, 586 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3782838
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+cover.html
+cover.out
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6113d3c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,86 @@
+<!--
+SPDX-License-Identifier: GPL-3.0-only
+SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+-->
+
+# spdxconv
+
+spdxconv is a tool to convert and insert the SPDX license headers.
+
+## Background
+
+Converting the license and copyright in the source codes to become compliant
+with the SPDX headers is very tedious works, especially if we have so many
+files with different year, copyright, and licenses.
+
+This program help to do that by using pattern-matching, search, replace, and
+sometimes deletion.
+
+## Usage
+
+The first thing to do is to generate the configuration file using
+
+```
+$ spdxconv init
+```
+
+This will create the `spdxconv.cfg` file in the current directory with the
+following content,
+
+```
+[default]
+license_identifier =
+file_copyright_text =
+max_line_match = 10
+
+[match_license]
+pattern = ^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$
+license_identifier = BSD-3-Clause
+delete_match = true
+delete_line_pattern = ^(//+|#+)*\s+license that(.*)$
+
+[match_copyright]
+pattern = ^(//+|#+)*\s+Copyright\s+(?<year>\d{4},?\s+(?<holder>.*)\s+<*(?<email>.*)>.*$
+```
+
+The next subsection explain each of this section in the configuration file.
+
+### default section
+
+First is the `[default]` section.
+This section define the default license identifier and copyright text to be
+inserted into file if no match_license or match_copyright found on the file.
+You should fill the `license_identifier` and `file_copyright_text` before
+continue running the program.
+
+The `max_line_match` define the number of lines to be searched at the
+top and bottom of file for match_license and match_copyright before the
+program insert the default values.
+
+### match_license section
+
+The first thing that the program do is search for line that match
+"SPDX-License-Identifier:".
+
+If there is a match, at the top or bottom, the scan will stop.
+
+If there is no match it will search for a line that match with "pattern"
+regular expression.
+If there is a line that match with it, the value in
+"match_license::license_identifier" will replace the
+"default::license_identifier" value.
+If the "delete_match" is true, it will delete the line from the file.
+If there is "delete_line_pattern" defined, it will search for line that match
+with that regular expression and delete it.
+The "delete_line_pattern" can be defined zero or more times.
+
+## References
+
+[SPDX License List](https://spdx.org/licenses/).
+The SPDX License List includes a standardized short identifier, the full
+name, the license text, and a canonical permanent URL for each license and
+exception.
+
+[REUSE FAQ](https://reuse.software/faq/).
+This page lists common questions and their answers when dealing with
+licensing and copyright, and with the adoption of REUSE specifically.
diff --git a/config.go b/config.go
new file mode 100644
index 0000000..962b0ad
--- /dev/null
+++ b/config.go
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+import "errors"
+
+type config struct {
+ LicenseIdentifier string `ini:"default::license_identifier"`
+ FileCopyrightText string `ini:"default::file_copyright_text"`
+
+ MatchLicense []configMatchLicense `ini:"match-license"`
+
+ MaxLineMatch int `ini:"default::max_line_match"`
+}
+
+func (cfg *config) init() (err error) {
+ const defMaxLineMatch = 10
+ if cfg.LicenseIdentifier == `` {
+ return errors.New(`empty default license_identifier`)
+ }
+ if cfg.FileCopyrightText == `` {
+ return errors.New(`empty default file_copyright_text`)
+ }
+ if cfg.MaxLineMatch <= 0 {
+ cfg.MaxLineMatch = defMaxLineMatch
+ }
+ return nil
+}
diff --git a/config_match_license.go b/config_match_license.go
new file mode 100644
index 0000000..11e09fe
--- /dev/null
+++ b/config_match_license.go
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+type configMatchLicense struct {
+ // Pattern to be searched in file.
+ Pattern string `ini:"match-license::pattern"`
+
+ // LicenseIdentifier that replace the default "license_identifier"
+ // value if Pattern match.
+ LicenseIdentifier string `ini:"match-license::license_identifier"`
+
+ // DeleteLinePattern zero or more pattern that will be search after
+ // Pattern match line.
+ // A line that match with this pattern will be deleted.
+ // An empty line stop the search.
+ DeleteLinePattern []string `ini:"match-license::delete_line_pattern"`
+
+ // DeleteMatch if set to true, the line that match with Pattern will
+ // be deleted.
+ DeleteMatch bool `ini:"match-license::delete_match"`
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..feeffc8
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,7 @@
+module git.sr.ht/~shulhan/spdxconv
+
+go 1.24.0
+
+require git.sr.ht/~shulhan/pakakeh.go v0.60.2
+
+require golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 // indirect
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..e98afd1
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,4 @@
+git.sr.ht/~shulhan/pakakeh.go v0.60.2 h1:ZSRE77lYm+mkhvg9pSrxCIO81ydbqt93qbsWuZJpjtI=
+git.sr.ht/~shulhan/pakakeh.go v0.60.2/go.mod h1:1MkKXbLZRHTcnheeSEbRpGztkym4Yxzh90ep+jCxbDc=
+golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 h1:fQsdNF2N+/YewlRZiricy4P1iimyPKZ/xwniHj8Q2a0=
+golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93/go.mod h1:EPRbTFwzwjXj9NpYyyrvenVh9Y+GFeEvMNh7Xuz7xgU=
diff --git a/scm.go b/scm.go
new file mode 100644
index 0000000..d94ee18
--- /dev/null
+++ b/scm.go
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+// sourceCodeManagement define the interface for SCM tools.
+type sourceCodeManagement interface {
+ IsIgnored(path string) bool
+}
+
+// dummySCM is a no scm. It always return false on IsIgnored.
+type dummySCM struct{}
+
+func (scm *dummySCM) IsIgnored(path string) bool {
+ return false
+}
diff --git a/spdxconv.go b/spdxconv.go
new file mode 100644
index 0000000..5f28e5a
--- /dev/null
+++ b/spdxconv.go
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+
+ "git.sr.ht/~shulhan/pakakeh.go/lib/git"
+ "git.sr.ht/~shulhan/pakakeh.go/lib/ini"
+)
+
+// SPDXConv the main type for converting files to SPDX format.
+type SPDXConv struct {
+ scm sourceCodeManagement
+
+ // dir define the directory that will processed.
+ // `name` is single file inside `dir` to be processed.
+ // If `name` is empty means process all files inside `dir`.
+ dir string
+ name string
+
+ // scmDir define the root directory for source-code management.
+ // In git, the path that contains the ".git" directory.
+ scmDir string
+
+ cfg config
+}
+
+// Apply the SPDX license headers to all files inside the directory `path` or
+// to single file only.
+func Apply(path string) (err error) {
+ var logp = `Apply`
+ var conv *SPDXConv
+
+ conv, err = New(path)
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+ var listFile []string
+ if conv.name == `` {
+ listFile, err = conv.scanFiles([]string{conv.dir})
+ } else {
+ listFile, err = conv.scanFile(conv.dir, conv.name)
+ }
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ var pathFile string
+ for _, pathFile = range listFile {
+ conv.convert(pathFile)
+ }
+ return nil
+}
+
+// New initialize new instance of SPDXConv
+func New(path string) (conv *SPDXConv, err error) {
+ var logp = `New`
+
+ path, err = filepath.Abs(path)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ conv = &SPDXConv{}
+ var fi os.FileInfo
+ fi, err = os.Stat(path)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ if fi.IsDir() {
+ conv.dir = path
+ } else {
+ conv.dir, conv.name = filepath.Split(path)
+ }
+
+ err = conv.loadConfig(conv.dir)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ err = conv.scanForSCM(conv.dir)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ return conv, nil
+}
+
+// loadConfig load the program configuration from file `spdxconv.cfg` in the
+// current directory.
+func (conv *SPDXConv) loadConfig(dir string) (err error) {
+ var logp = `loadConfig`
+ var pathcfg = filepath.Join(dir, `spdxconv.cfg`)
+ var rawcfg []byte
+ rawcfg, err = os.ReadFile(pathcfg)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return fmt.Errorf(`%s %s: %w`, logp, pathcfg, err)
+ }
+ err = ini.Unmarshal(rawcfg, &conv.cfg)
+ if err != nil {
+ return fmt.Errorf(`%s %s: %w`, logp, pathcfg, err)
+ }
+ err = conv.cfg.init()
+ if err != nil {
+ return fmt.Errorf(`%s %s: %w`, logp, pathcfg, err)
+ }
+ return nil
+}
+
+// scanForSCM scan for source-code management (SCM) from directory `dir` up
+// until the root.
+// Currently, only support git.
+func (conv *SPDXConv) scanForSCM(dir string) (err error) {
+ var scmDir string
+ var fi os.FileInfo
+ for dir != `/` {
+ scmDir = filepath.Join(dir, `.git`)
+ fi, err = os.Stat(scmDir)
+ if err != nil {
+ if os.IsNotExist(err) {
+ continue
+ }
+ return err
+ }
+ if !fi.IsDir() {
+ continue
+ }
+ conv.scmDir = dir
+ conv.scm, err = git.New(dir)
+ if err != nil {
+ return err
+ }
+ return nil
+ }
+ conv.scm = &dummySCM{}
+ return nil
+}
+
+// scanFile check if the single file can be processed or not.
+func (conv *SPDXConv) scanFile(dir, name string) (listFile []string, err error) {
+ var pathName = filepath.Join(dir, name)
+ if conv.scm.IsIgnored(pathName) {
+ return nil, nil
+ }
+ listFile = append(listFile, pathName)
+ return listFile, nil
+}
+
+// scanFiles list file to be processed in directory `dir`, recursively.
+// A file ignored by ".gitignore" file will be excluded.
+// A common ignore file or directory name likes ".git", "node_modules", and
+// "vendor"; also will be excluded.
+func (conv *SPDXConv) scanFiles(listDir []string) (listFile []string, err error) {
+ var commonIgnore = map[string]struct{}{
+ `.git`: struct{}{},
+ `node_modules`: struct{}{},
+ `vendor`: struct{}{},
+ }
+ var dir string
+ var listde []os.DirEntry
+ var de os.DirEntry
+ var ok bool
+ for len(listDir) != 0 {
+ dir = listDir[0]
+ listDir = listDir[1:]
+
+ listde, err = os.ReadDir(dir)
+ if err != nil {
+ return listFile, err
+ }
+ for _, de = range listde {
+ var name = de.Name()
+ if conv.scm.IsIgnored(name) {
+ continue
+ }
+ _, ok = commonIgnore[name]
+ if ok {
+ continue
+ }
+ var pathName = filepath.Join(dir, name)
+ if de.IsDir() {
+ listDir = append(listDir, pathName)
+ continue
+ }
+ listFile = append(listFile, pathName)
+ }
+ }
+ return listFile, nil
+}
+
+// convert the given pathFile to SPDX license format.
+func (conv *SPDXConv) convert(pathFile string) {
+
+}
diff --git a/spdxconv_test.go b/spdxconv_test.go
new file mode 100644
index 0000000..9c66aea
--- /dev/null
+++ b/spdxconv_test.go
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+package spdxconv
+
+import (
+ "testing"
+
+ "git.sr.ht/~shulhan/pakakeh.go/lib/git"
+ "git.sr.ht/~shulhan/pakakeh.go/lib/test"
+)
+
+func TestNew(t *testing.T) {
+ type testCase struct {
+ exp *SPDXConv
+ dir string
+ expError string
+ }
+
+ var testData *test.Data
+ var err error
+ testData, err = test.LoadData(`testdata/New_test.txt`)
+ if err != nil {
+ t.Fatal(err)
+ }
+ var tempDir = t.TempDir()
+ testData.ExtractInput(tempDir)
+ t.Chdir(tempDir)
+
+ var gitRoot *git.Git
+ gitRoot, err = git.New(`.`)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var listCase = []testCase{{
+ dir: `.`,
+ exp: &SPDXConv{
+ dir: tempDir,
+ scmDir: tempDir,
+ scm: gitRoot,
+ },
+ }}
+ var tc testCase
+ var conv *SPDXConv
+ for _, tc = range listCase {
+ conv, err = New(`.`)
+ if err != nil {
+ test.Assert(t, tc.dir+`: error`, tc.expError, err.Error())
+ continue
+ }
+ test.Assert(t, tc.dir, tc.exp, conv)
+ }
+}
+
+func TestSPDXConv_loadConfig(t *testing.T) {
+ type testCase struct {
+ dir string
+ exp config
+ }
+ var listCase = []testCase{{
+ dir: `testdata/loadConfig/config_not_exists`,
+ exp: config{},
+ }, {
+ dir: `testdata/loadConfig/config_exists`,
+ exp: config{
+ LicenseIdentifier: `GPL-3.0-only`,
+ FileCopyrightText: `Author <author@email.info>`,
+ MaxLineMatch: 10,
+ MatchLicense: []configMatchLicense{{
+ Pattern: `^(//+|#+)*\s+(.*)governed by a BSD-style(.*)$`,
+ LicenseIdentifier: `BSD-3-Clause`,
+ DeleteLinePattern: []string{
+ `^(//+|#+)*\s*$`,
+ `^(//+|#+)*\s+license that(.*)$`,
+ },
+ DeleteMatch: true,
+ }},
+ },
+ }}
+
+ var conv = SPDXConv{}
+ var tc testCase
+ var err error
+ for _, tc = range listCase {
+ conv.dir = tc.dir
+ err = conv.loadConfig(tc.dir)
+ if err != nil {
+ t.Fatal(err)
+ }
+ test.Assert(t, tc.dir, tc.exp, conv.cfg)
+ }
+}
+
+func TestSPDXConv_scanFile(t *testing.T) {
+ var conv *SPDXConv
+ var err error
+ conv, err = New(`testdata/scan/`)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ type testCase struct {
+ dir string
+ name string
+ exp []string
+ }
+ var listCase = []testCase{{
+ dir: `.`,
+ name: `test.go`,
+ exp: []string{`test.go`},
+ }, {
+ dir: `.`,
+ name: `test.html`,
+ exp: []string{`test.html`},
+ }, {
+ dir: `vendor`,
+ name: `dummy`,
+ }}
+ var tc testCase
+ for _, tc = range listCase {
+ got, err := conv.scanFile(tc.dir, tc.name)
+ if err != nil {
+ t.Fatal(err)
+ }
+ test.Assert(t, tc.dir+"/"+tc.name, tc.exp, got)
+ }
+}
+
+func TestSPDXConv_scanFiles(t *testing.T) {
+ var conv *SPDXConv
+ var err error
+
+ conv, err = New(`testdata/scan/`)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var got []string
+ got, err = conv.scanFiles([]string{`testdata/scan/`})
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var exp = []string{
+ `testdata/scan/.gitignore`,
+ `testdata/scan/test.go`,
+ `testdata/scan/test.html`,
+ `testdata/scan/test.sh`,
+ }
+ test.Assert(t, `scanFiles: testdata/scan/`, exp, got)
+}
diff --git a/testdata/New_test.txt b/testdata/New_test.txt
new file mode 100644
index 0000000..2b718d3
--- /dev/null
+++ b/testdata/New_test.txt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-3.0-only
+// SPDX-FileCopyrightText: 2025 M. Shulhan <ms@kilabit.info>
+
+>>> .git/config
+# Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+>>> .gitignore
+node_modules/
+*.html
+!test.html
+vendor/
+
+>>> node_modules/dummy
+Dummy content for node_modules directory.
+
+>>> test.go
+// Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+>>> test.html
+<!--
+Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved.
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file.
+-->
+
+>>> test.sh
+# Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+>>> vendor/dummy
+Dummy content for vendor directory.
diff --git a/testdata/loadConfig/config_exists/spdxconv.cfg b/testdata/loadConfig/config_exists/spdxconv.cfg
new file mode 100644
index 0000000..e4edf89
--- /dev/null
+++ b/testdata/loadConfig/config_exists/spdxconv.cfg
@@ -0,0 +1,14 @@
+[default]
+license_identifier = GPL-3.0-only
+file_copyright_text = Author <author@email.info>
+max_line_match = 10
+
+[match-license]
+pattern = "^(//+|#+)*\\s+(.*)governed by a BSD-style(.*)$"
+license_identifier = BSD-3-Clause
+delete_match = true
+delete_line_pattern = "^(//+|#+)*\\s*$"
+delete_line_pattern = "^(//+|#+)*\\s+license that(.*)$"
+
+[match-copyright]
+pattern = "^(//+|#+)*\\s+Copyright\\s+(?<year>\\d{4},?\\s+(?<holder>.*)\\s+<*(?<email>.*)>.*$"
diff --git a/testdata/scan/.gitignore b/testdata/scan/.gitignore
new file mode 100644
index 0000000..746a46e
--- /dev/null
+++ b/testdata/scan/.gitignore
@@ -0,0 +1,4 @@
+node_modules/
+*.html
+!test.html
+vendor/ \ No newline at end of file
diff --git a/testdata/scan/test.go b/testdata/scan/test.go
new file mode 100644
index 0000000..46e2bd9
--- /dev/null
+++ b/testdata/scan/test.go
@@ -0,0 +1,3 @@
+// Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file. \ No newline at end of file
diff --git a/testdata/scan/test.html b/testdata/scan/test.html
new file mode 100644
index 0000000..da2491e
--- /dev/null
+++ b/testdata/scan/test.html
@@ -0,0 +1,5 @@
+<!--
+Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved.
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file.
+--> \ No newline at end of file
diff --git a/testdata/scan/test.sh b/testdata/scan/test.sh
new file mode 100644
index 0000000..8fd6949
--- /dev/null
+++ b/testdata/scan/test.sh
@@ -0,0 +1,3 @@
+# Copyright 2022, Shulhan <ms@kilabit.info>. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file. \ No newline at end of file