summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2026-01-15 01:44:47 +0700
committerShulhan <ms@kilabit.info>2026-01-15 01:44:47 +0700
commitd0cb81287185db0c0fb088da5c35004af315cd42 (patch)
tree581c0cee6648c82f8fe9674702fab89c6bdc2616
parent56f2fb3751f73ec7bc04f19a7bb36587340de298 (diff)
downloadspdxconv-d0cb81287185db0c0fb088da5c35004af315cd42.tar.xz
all: improve the scan by loading the existing report
In the second or next scan, load the existing spdxconv.report and check if the file is already scanned before. If the file is in group regular or binary, skip the scan; otherwise re-scan it again.
-rw-r--r--report.go104
-rw-r--r--report_test.go78
-rw-r--r--spdxconv.go16
3 files changed, 114 insertions, 84 deletions
diff --git a/report.go b/report.go
index df77fa6..d8acc0c 100644
--- a/report.go
+++ b/report.go
@@ -9,7 +9,9 @@ import (
"errors"
"fmt"
"io"
+ "maps"
"os"
+ "slices"
"strconv"
"strings"
)
@@ -30,9 +32,9 @@ const (
const v1FieldsPerRecord = 8
type report struct {
- listRegular []*file
- listBinary []*file
- listUnknown []*file
+ listBinary map[string]*file
+ listRegular map[string]*file
+ listUnknown map[string]*file
}
// loadReport load the [ReportFile] from the current directory.
@@ -50,7 +52,7 @@ func loadReport() (rep *report, err error) {
csvr.ReuseRecord = true
csvr.TrimLeadingSpace = true
- rep = &report{}
+ rep = newReport()
var group string
var record []string
var f *file
@@ -91,13 +93,13 @@ func loadReport() (rep *report, err error) {
switch group {
case reportGroupRegular:
- rep.listRegular = append(rep.listRegular, f)
+ rep.listRegular[f.path] = f
case reportGroupBinary:
f.group = groupBinary
- rep.listBinary = append(rep.listBinary, f)
+ rep.listBinary[f.path] = f
case reportGroupUnknown:
f.group = groupUnknown
- rep.listUnknown = append(rep.listUnknown, f)
+ rep.listUnknown[f.path] = f
}
next:
record, err = csvr.Read()
@@ -108,44 +110,67 @@ func loadReport() (rep *report, err error) {
}
func newReport() (rep *report) {
- rep = &report{}
+ rep = &report{
+ listBinary: map[string]*file{},
+ listRegular: map[string]*file{},
+ listUnknown: map[string]*file{},
+ }
return rep
}
func (rep *report) scan(conv *SPDXConv, listFile []string) (err error) {
var logp = `report.scan`
for _, file := range listFile {
+ if rep.hasScanned(file) {
+ continue
+ }
+
f, err := newFile(file, conv.cfg.MaxLineMatch)
if err != nil {
return fmt.Errorf(`%s: %w`, logp, err)
}
if f.group == groupBinary {
- rep.listBinary = append(rep.listBinary, f)
+ rep.listBinary[f.path] = f
continue
}
f.scan(conv)
if f.group == groupBinary {
// json file should be detected as binary, since its
// does not have comment syntax.
- rep.listBinary = append(rep.listBinary, f)
+ rep.listBinary[f.path] = f
continue
}
if f.group == groupUnknown {
- rep.listUnknown = append(rep.listUnknown, f)
+ rep.listUnknown[f.path] = f
continue
}
if f.licenseID == valExist && f.copyrightText == valExist {
continue
}
if f.copyrightYear == `` {
- rep.listUnknown = append(rep.listUnknown, f)
+ rep.listUnknown[f.path] = f
continue
}
- rep.listRegular = append(rep.listRegular, f)
+ rep.listRegular[f.path] = f
}
return nil
}
+// hasScanned return true if the file is already reported in regular or
+// binary group before.
+func (rep *report) hasScanned(path string) bool {
+ var ok bool
+ _, ok = rep.listBinary[path]
+ if ok {
+ return true
+ }
+ _, ok = rep.listRegular[path]
+ if ok {
+ return true
+ }
+ return false
+}
+
func (rep *report) write() (err error) {
var buf bytes.Buffer
@@ -161,19 +186,13 @@ func (rep *report) write() (err error) {
var csvw = csv.NewWriter(&buf)
var record = make([]string, v1FieldsPerRecord)
+ var f *file
buf.WriteString("//\n")
buf.WriteString(reportMetaPrefix + reportGroupRegular + "\n")
buf.WriteString("//\n")
- for _, file := range rep.listRegular {
- record[0] = file.path
- record[1] = file.licenseID
- record[2] = strconv.Itoa(file.idxLicenseID)
- record[3] = file.copyrightYear
- record[4] = file.copyrightText
- record[5] = strconv.Itoa(file.idxCopyrightText)
- record[6] = file.commentPrefix
- record[7] = file.commentSuffix
- err = csvw.Write(record)
+ for _, key := range slices.Sorted(maps.Keys(rep.listRegular)) {
+ f = rep.listRegular[key]
+ err = csvWrite(csvw, f, record)
if err != nil {
return err
}
@@ -183,16 +202,9 @@ func (rep *report) write() (err error) {
buf.WriteString("//\n")
buf.WriteString(reportMetaPrefix + reportGroupBinary + "\n")
buf.WriteString("//\n")
- for _, file := range rep.listBinary {
- record[0] = file.path
- record[1] = file.licenseID
- record[2] = strconv.Itoa(file.idxLicenseID)
- record[3] = file.copyrightYear
- record[4] = file.copyrightText
- record[5] = strconv.Itoa(file.idxCopyrightText)
- record[6] = file.commentPrefix
- record[7] = file.commentSuffix
- err = csvw.Write(record)
+ for _, key := range slices.Sorted(maps.Keys(rep.listBinary)) {
+ f = rep.listBinary[key]
+ err = csvWrite(csvw, f, record)
if err != nil {
return err
}
@@ -202,16 +214,9 @@ func (rep *report) write() (err error) {
buf.WriteString("//\n")
buf.WriteString(reportMetaPrefix + reportGroupUnknown + "\n")
buf.WriteString("//\n")
- for _, file := range rep.listUnknown {
- record[0] = file.path
- record[1] = file.licenseID
- record[2] = strconv.Itoa(file.idxLicenseID)
- record[3] = file.copyrightYear
- record[4] = file.copyrightText
- record[5] = strconv.Itoa(file.idxCopyrightText)
- record[6] = file.commentPrefix
- record[7] = file.commentSuffix
- err = csvw.Write(record)
+ for _, key := range slices.Sorted(maps.Keys(rep.listUnknown)) {
+ f = rep.listUnknown[key]
+ err = csvWrite(csvw, f, record)
if err != nil {
return err
}
@@ -224,3 +229,16 @@ func (rep *report) write() (err error) {
}
return nil
}
+
+func csvWrite(csvw *csv.Writer, f *file, record []string) (err error) {
+ record[0] = f.path
+ record[1] = f.licenseID
+ record[2] = strconv.Itoa(f.idxLicenseID)
+ record[3] = f.copyrightYear
+ record[4] = f.copyrightText
+ record[5] = strconv.Itoa(f.idxCopyrightText)
+ record[6] = f.commentPrefix
+ record[7] = f.commentSuffix
+ err = csvw.Write(record)
+ return err
+}
diff --git a/report_test.go b/report_test.go
index abe7766..a042652 100644
--- a/report_test.go
+++ b/report_test.go
@@ -18,41 +18,49 @@ func TestLoadReport(t *testing.T) {
t.Fatal(err)
}
exp := &report{
- listRegular: []*file{{
- path: `fileR1`,
- licenseID: valDefault,
- copyrightText: valDefault,
- commentPrefix: `# `,
- }, {
- path: `file R2`,
- licenseID: valExist,
- idxLicenseID: 1,
- copyrightYear: `2024`,
- copyrightText: valExist,
- idxCopyrightText: -1,
- commentPrefix: `// `,
- }, {
- path: `fileR3`,
- licenseID: valMatch,
- idxLicenseID: -2,
- copyrightYear: `2000-2026`,
- copyrightText: valMatch,
- idxCopyrightText: -3,
- commentPrefix: `<!-- `,
- commentSuffix: ` -->`,
- }},
- listBinary: []*file{{
- path: `fileB1`,
- licenseID: valDefault,
- copyrightText: valDefault,
- group: groupBinary,
- }},
- listUnknown: []*file{{
- path: `fileU1`,
- licenseID: valDefault,
- copyrightText: valDefault,
- group: groupUnknown,
- }},
+ listRegular: map[string]*file{
+ `fileR1`: &file{
+ path: `fileR1`,
+ licenseID: valDefault,
+ copyrightText: valDefault,
+ commentPrefix: `# `,
+ },
+ `file R2`: &file{
+ path: `file R2`,
+ licenseID: valExist,
+ idxLicenseID: 1,
+ copyrightYear: `2024`,
+ copyrightText: valExist,
+ idxCopyrightText: -1,
+ commentPrefix: `// `,
+ },
+ `fileR3`: &file{
+ path: `fileR3`,
+ licenseID: valMatch,
+ idxLicenseID: -2,
+ copyrightYear: `2000-2026`,
+ copyrightText: valMatch,
+ idxCopyrightText: -3,
+ commentPrefix: `<!-- `,
+ commentSuffix: ` -->`,
+ },
+ },
+ listBinary: map[string]*file{
+ `fileB1`: &file{
+ path: `fileB1`,
+ licenseID: valDefault,
+ copyrightText: valDefault,
+ group: groupBinary,
+ },
+ },
+ listUnknown: map[string]*file{
+ `fileU1`: &file{
+ path: `fileU1`,
+ licenseID: valDefault,
+ copyrightText: valDefault,
+ group: groupUnknown,
+ },
+ },
}
test.Assert(t, workDir, exp, got)
}
diff --git a/spdxconv.go b/spdxconv.go
index 778ec51..64f138f 100644
--- a/spdxconv.go
+++ b/spdxconv.go
@@ -80,7 +80,11 @@ func Scan(path string) (err error) {
return fmt.Errorf(`%s: %w`, logp, err)
}
- rep := newReport()
+ var rep *report
+ rep, err = loadReport()
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
err = rep.scan(conv, listFile)
if err != nil {
return fmt.Errorf(`%s: %w`, logp, err)
@@ -114,18 +118,18 @@ func Apply() (err error) {
return fmt.Errorf(`%s: %w`, logp, err)
}
- var listFail []*file
+ var listFail = make(map[string]*file)
for _, f := range rep.listRegular {
err = f.apply(conv)
if err != nil {
- listFail = append(listFail, f)
+ listFail[f.path] = f
log.Printf(`%s: %s`, logp, err)
continue
}
err = f.write()
if err != nil {
- listFail = append(listFail, f)
+ listFail[f.path] = f
log.Printf(`%s: %s`, logp, err)
continue
}
@@ -138,12 +142,12 @@ func Apply() (err error) {
fmt.Fprintf(&buf, "SPDX-FileCopyrightText: %s\n", conv.cfg.FileCopyrightText)
// REUSE-IgnoreEnd
- listFail = nil
+ listFail = make(map[string]*file)
for _, f := range rep.listBinary {
pathLicense := f.path + suffixLicense
err = os.WriteFile(pathLicense, buf.Bytes(), 0600)
if err != nil {
- listFail = append(listFail, f)
+ listFail[f.path] = f
log.Printf(`%s: failed to write %s`, logp, pathLicense)
continue
}