From d0cb81287185db0c0fb088da5c35004af315cd42 Mon Sep 17 00:00:00 2001 From: Shulhan Date: Thu, 15 Jan 2026 01:44:47 +0700 Subject: all: improve the scan by loading the existing report In the second or next scan, load the existing spdxconv.report and check if the file is already scanned before. If the file is in group regular or binary, skip the scan; otherwise re-scan it again. --- report.go | 104 +++++++++++++++++++++++++++++++++------------------------ report_test.go | 78 ++++++++++++++++++++++++------------------- spdxconv.go | 16 +++++---- 3 files changed, 114 insertions(+), 84 deletions(-) diff --git a/report.go b/report.go index df77fa6..d8acc0c 100644 --- a/report.go +++ b/report.go @@ -9,7 +9,9 @@ import ( "errors" "fmt" "io" + "maps" "os" + "slices" "strconv" "strings" ) @@ -30,9 +32,9 @@ const ( const v1FieldsPerRecord = 8 type report struct { - listRegular []*file - listBinary []*file - listUnknown []*file + listBinary map[string]*file + listRegular map[string]*file + listUnknown map[string]*file } // loadReport load the [ReportFile] from the current directory. @@ -50,7 +52,7 @@ func loadReport() (rep *report, err error) { csvr.ReuseRecord = true csvr.TrimLeadingSpace = true - rep = &report{} + rep = newReport() var group string var record []string var f *file @@ -91,13 +93,13 @@ func loadReport() (rep *report, err error) { switch group { case reportGroupRegular: - rep.listRegular = append(rep.listRegular, f) + rep.listRegular[f.path] = f case reportGroupBinary: f.group = groupBinary - rep.listBinary = append(rep.listBinary, f) + rep.listBinary[f.path] = f case reportGroupUnknown: f.group = groupUnknown - rep.listUnknown = append(rep.listUnknown, f) + rep.listUnknown[f.path] = f } next: record, err = csvr.Read() @@ -108,44 +110,67 @@ func loadReport() (rep *report, err error) { } func newReport() (rep *report) { - rep = &report{} + rep = &report{ + listBinary: map[string]*file{}, + listRegular: map[string]*file{}, + listUnknown: map[string]*file{}, + } return rep } func (rep *report) scan(conv *SPDXConv, listFile []string) (err error) { var logp = `report.scan` for _, file := range listFile { + if rep.hasScanned(file) { + continue + } + f, err := newFile(file, conv.cfg.MaxLineMatch) if err != nil { return fmt.Errorf(`%s: %w`, logp, err) } if f.group == groupBinary { - rep.listBinary = append(rep.listBinary, f) + rep.listBinary[f.path] = f continue } f.scan(conv) if f.group == groupBinary { // json file should be detected as binary, since its // does not have comment syntax. - rep.listBinary = append(rep.listBinary, f) + rep.listBinary[f.path] = f continue } if f.group == groupUnknown { - rep.listUnknown = append(rep.listUnknown, f) + rep.listUnknown[f.path] = f continue } if f.licenseID == valExist && f.copyrightText == valExist { continue } if f.copyrightYear == `` { - rep.listUnknown = append(rep.listUnknown, f) + rep.listUnknown[f.path] = f continue } - rep.listRegular = append(rep.listRegular, f) + rep.listRegular[f.path] = f } return nil } +// hasScanned return true if the file is already reported in regular or +// binary group before. +func (rep *report) hasScanned(path string) bool { + var ok bool + _, ok = rep.listBinary[path] + if ok { + return true + } + _, ok = rep.listRegular[path] + if ok { + return true + } + return false +} + func (rep *report) write() (err error) { var buf bytes.Buffer @@ -161,19 +186,13 @@ func (rep *report) write() (err error) { var csvw = csv.NewWriter(&buf) var record = make([]string, v1FieldsPerRecord) + var f *file buf.WriteString("//\n") buf.WriteString(reportMetaPrefix + reportGroupRegular + "\n") buf.WriteString("//\n") - for _, file := range rep.listRegular { - record[0] = file.path - record[1] = file.licenseID - record[2] = strconv.Itoa(file.idxLicenseID) - record[3] = file.copyrightYear - record[4] = file.copyrightText - record[5] = strconv.Itoa(file.idxCopyrightText) - record[6] = file.commentPrefix - record[7] = file.commentSuffix - err = csvw.Write(record) + for _, key := range slices.Sorted(maps.Keys(rep.listRegular)) { + f = rep.listRegular[key] + err = csvWrite(csvw, f, record) if err != nil { return err } @@ -183,16 +202,9 @@ func (rep *report) write() (err error) { buf.WriteString("//\n") buf.WriteString(reportMetaPrefix + reportGroupBinary + "\n") buf.WriteString("//\n") - for _, file := range rep.listBinary { - record[0] = file.path - record[1] = file.licenseID - record[2] = strconv.Itoa(file.idxLicenseID) - record[3] = file.copyrightYear - record[4] = file.copyrightText - record[5] = strconv.Itoa(file.idxCopyrightText) - record[6] = file.commentPrefix - record[7] = file.commentSuffix - err = csvw.Write(record) + for _, key := range slices.Sorted(maps.Keys(rep.listBinary)) { + f = rep.listBinary[key] + err = csvWrite(csvw, f, record) if err != nil { return err } @@ -202,16 +214,9 @@ func (rep *report) write() (err error) { buf.WriteString("//\n") buf.WriteString(reportMetaPrefix + reportGroupUnknown + "\n") buf.WriteString("//\n") - for _, file := range rep.listUnknown { - record[0] = file.path - record[1] = file.licenseID - record[2] = strconv.Itoa(file.idxLicenseID) - record[3] = file.copyrightYear - record[4] = file.copyrightText - record[5] = strconv.Itoa(file.idxCopyrightText) - record[6] = file.commentPrefix - record[7] = file.commentSuffix - err = csvw.Write(record) + for _, key := range slices.Sorted(maps.Keys(rep.listUnknown)) { + f = rep.listUnknown[key] + err = csvWrite(csvw, f, record) if err != nil { return err } @@ -224,3 +229,16 @@ func (rep *report) write() (err error) { } return nil } + +func csvWrite(csvw *csv.Writer, f *file, record []string) (err error) { + record[0] = f.path + record[1] = f.licenseID + record[2] = strconv.Itoa(f.idxLicenseID) + record[3] = f.copyrightYear + record[4] = f.copyrightText + record[5] = strconv.Itoa(f.idxCopyrightText) + record[6] = f.commentPrefix + record[7] = f.commentSuffix + err = csvw.Write(record) + return err +} diff --git a/report_test.go b/report_test.go index abe7766..a042652 100644 --- a/report_test.go +++ b/report_test.go @@ -18,41 +18,49 @@ func TestLoadReport(t *testing.T) { t.Fatal(err) } exp := &report{ - listRegular: []*file{{ - path: `fileR1`, - licenseID: valDefault, - copyrightText: valDefault, - commentPrefix: `# `, - }, { - path: `file R2`, - licenseID: valExist, - idxLicenseID: 1, - copyrightYear: `2024`, - copyrightText: valExist, - idxCopyrightText: -1, - commentPrefix: `// `, - }, { - path: `fileR3`, - licenseID: valMatch, - idxLicenseID: -2, - copyrightYear: `2000-2026`, - copyrightText: valMatch, - idxCopyrightText: -3, - commentPrefix: ``, - }}, - listBinary: []*file{{ - path: `fileB1`, - licenseID: valDefault, - copyrightText: valDefault, - group: groupBinary, - }}, - listUnknown: []*file{{ - path: `fileU1`, - licenseID: valDefault, - copyrightText: valDefault, - group: groupUnknown, - }}, + listRegular: map[string]*file{ + `fileR1`: &file{ + path: `fileR1`, + licenseID: valDefault, + copyrightText: valDefault, + commentPrefix: `# `, + }, + `file R2`: &file{ + path: `file R2`, + licenseID: valExist, + idxLicenseID: 1, + copyrightYear: `2024`, + copyrightText: valExist, + idxCopyrightText: -1, + commentPrefix: `// `, + }, + `fileR3`: &file{ + path: `fileR3`, + licenseID: valMatch, + idxLicenseID: -2, + copyrightYear: `2000-2026`, + copyrightText: valMatch, + idxCopyrightText: -3, + commentPrefix: ``, + }, + }, + listBinary: map[string]*file{ + `fileB1`: &file{ + path: `fileB1`, + licenseID: valDefault, + copyrightText: valDefault, + group: groupBinary, + }, + }, + listUnknown: map[string]*file{ + `fileU1`: &file{ + path: `fileU1`, + licenseID: valDefault, + copyrightText: valDefault, + group: groupUnknown, + }, + }, } test.Assert(t, workDir, exp, got) } diff --git a/spdxconv.go b/spdxconv.go index 778ec51..64f138f 100644 --- a/spdxconv.go +++ b/spdxconv.go @@ -80,7 +80,11 @@ func Scan(path string) (err error) { return fmt.Errorf(`%s: %w`, logp, err) } - rep := newReport() + var rep *report + rep, err = loadReport() + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } err = rep.scan(conv, listFile) if err != nil { return fmt.Errorf(`%s: %w`, logp, err) @@ -114,18 +118,18 @@ func Apply() (err error) { return fmt.Errorf(`%s: %w`, logp, err) } - var listFail []*file + var listFail = make(map[string]*file) for _, f := range rep.listRegular { err = f.apply(conv) if err != nil { - listFail = append(listFail, f) + listFail[f.path] = f log.Printf(`%s: %s`, logp, err) continue } err = f.write() if err != nil { - listFail = append(listFail, f) + listFail[f.path] = f log.Printf(`%s: %s`, logp, err) continue } @@ -138,12 +142,12 @@ func Apply() (err error) { fmt.Fprintf(&buf, "SPDX-FileCopyrightText: %s\n", conv.cfg.FileCopyrightText) // REUSE-IgnoreEnd - listFail = nil + listFail = make(map[string]*file) for _, f := range rep.listBinary { pathLicense := f.path + suffixLicense err = os.WriteFile(pathLicense, buf.Bytes(), 0600) if err != nil { - listFail = append(listFail, f) + listFail[f.path] = f log.Printf(`%s: failed to write %s`, logp, pathLicense) continue } -- cgit v1.3