report: change the output to use CSV format

Using space separated with double quote on some fields are not easy to parse. Using CSV allow us to use the [encoding/csv] package from standard library.
author: Shulhan <ms@kilabit.info> 2026-01-09 17:01:39 +0700
committer: Shulhan <ms@kilabit.info> 2026-01-09 17:08:00 +0700
commit: 813039a51f150c3f25e8596a7d4b480967e12bf5 (patch)
tree: 36d947642dbc940c5c10c95c6261d64f09ee6ef6
parent: 2094be654563456e33dd9432b2538037fbc18e92 (diff)
download: spdxconv-813039a51f150c3f25e8596a7d4b480967e12bf5.tar.xz
4 files changed, 60 insertions, 28 deletions
diff --git a/README.md b/README.md
index f83f73d..6afdd10 100644
--- a/README.md
+++ b/README.md
@@ -131,33 +131,33 @@ by "apply" command.
 
 ### spdxconv.report file format
 
-Each line in the report file has several columns separated by space
+Each line in the report file is formatted using CSV and has several columns
+separated by comma,
 
 ```
-path license_id year copyright_id
+path "," license_id "," year "," copyright_id
 ```
 
 where each column has the following values,
 
 ```
-path             = `"` { unicode_char } `"`
+path              = { unicode_char }
 
 license_id        = "default" | exist_at_index | license_text
-  license_text    = 1*{ unicode_char }
   exist_at_index  = "exists:" decimal_digit
+  license_text    = 1 * { unicode_char }
 
-year             = "unknown" / 4*decimal_digit
-                 / 4*decimal_digit "-" 4*decimal_digit
+year              = "unknown" | 4 * decimal_digit
+                  | 4 * decimal_digit "-" 4*decimal_digit
 
-copyright_id     = `"` ("default" | exist_at_index | copyright_text) `"`
-  copyright_text = author contact
-  author         = 1*{ unicode_char }
-  contact        = "<" 1*{ unicode_char } ">"
+copyright_id      = "default" | exist_at_index | copyright_text
+  copyright_text  = author contact
+  author          = 1 * { unicode_char }
+  contact         = "<" 1 * { unicode_char } ">"
 ```
 
 The `path` column define the path to the file that will be processed by
 `apply` command.
-Its value must be enclosed in double quote.
 
 The `license_id` column define the license identifier to be used.
 The value either,
@@ -178,7 +178,6 @@ The value is either,
 - YYYY-YYYY - range of years, for example 2000-2026
 
 The `copyright_id` define the author and contact.
-Its value must be enclosed in double quote.
 The value is either,
 
 - default - insert new identifier using the default copyright_text value from
@@ -188,13 +187,13 @@ The value is either,
   Positive index means from the top, and negative means from the bottom.
 - copyright_text - the value of author and contact regex match group from
   one of the match-copyright pattern.
-  Its value must be enclosed in double quote.
 
 ### spdxconv.report file groups
 
 Each file in the report file is collected into three groups: regular,
 binary, and unknown files.
-Each group is separated by line prefixed with "//" and identifier,
+Each group is separated by line prefixed with "//spdxconv:" and its
+identifier,
 
 ```
 //spdxconv:regular
diff --git a/file.go b/file.go
index c10cdb7..63cf35a 100644
--- a/file.go
+++ b/file.go
@@ -48,10 +48,14 @@ type file struct {
 	topLines    [][]byte
 	bottomLines [][]byte
 
-	// idxLicenseID index of License-Identifier.
+	// idxLicenseID index of License-Identifier if the licenseID value is
+	// "exist:".
 	// If its positive means the index is found at the topLines, if its
 	// negative means the index found at the bottomLines.
-	idxLicenseID     int
+	idxLicenseID int
+
+	// idxCopyrightText index of FileCopyrightText if the copyrightText is
+	// "exist:".
 	idxCopyrightText int
 
 	hasSheBang bool
@@ -68,7 +72,6 @@ func newFile(path string, maxLine int) (f *file, err error) {
 
 	f = &file{
 		path:          path,
-		idxLicenseID:  -1,
 		licenseID:     valDefault,
 		copyrightYear: valUnknown,
 		copyrightText: valDefault,
diff --git a/report.go b/report.go
index 295299a..5ac725e 100644
--- a/report.go
+++ b/report.go
@@ -5,6 +5,7 @@ package spdxconv
 
 import (
 	"bytes"
+	"encoding/csv"
 	"fmt"
 	"os"
 )
@@ -47,23 +48,50 @@ func (rep *report) scan(conv *SPDXConv, listFile []string) (err error) {
 
 func (rep *report) write() (err error) {
 	var buf bytes.Buffer
+	buf.WriteString("//spdxconv:version:v1\n")
+	buf.WriteString("//spdxconv:header:path,license_id,year,copyright_id\n")
+
+	var csvw = csv.NewWriter(&buf)
+	var record = make([]string, 4)
+
 	buf.WriteString("//spdxconv:regular\n")
 	for _, file := range rep.listRegular {
-		fmt.Fprintf(&buf, "%q %s %s %q\n", file.path, file.licenseID,
-			file.copyrightYear, file.copyrightText)
+		record[0] = file.path
+		record[1] = file.licenseID
+		record[2] = file.copyrightYear
+		record[3] = file.copyrightText
+		err = csvw.Write(record)
+		if err != nil {
+			return err
+		}
 	}
+	csvw.Flush()
 
 	buf.WriteString("//spdxconv:binary\n")
 	for _, file := range rep.listBinary {
-		fmt.Fprintf(&buf, "%q %s %s %q\n", file.path, file.licenseID,
-			file.copyrightYear, file.copyrightText)
+		record[0] = file.path
+		record[1] = file.licenseID
+		record[2] = file.copyrightYear
+		record[3] = file.copyrightText
+		err = csvw.Write(record)
+		if err != nil {
+			return err
+		}
 	}
+	csvw.Flush()
 
 	buf.WriteString("//spdxconv:unknown\n")
 	for _, file := range rep.listUnknown {
-		fmt.Fprintf(&buf, "%q %s %s %q\n", file.path, file.licenseID,
-			file.copyrightYear, file.copyrightText)
+		record[0] = file.path
+		record[1] = file.licenseID
+		record[2] = file.copyrightYear
+		record[3] = file.copyrightText
+		err = csvw.Write(record)
+		if err != nil {
+			return err
+		}
 	}
+	csvw.Flush()
 
 	err = os.WriteFile(ReportFile, buf.Bytes(), 0600)
 	if err != nil {
diff --git a/spdxconv_test.go b/spdxconv_test.go
index 9d2fe6c..969f4ca 100644
--- a/spdxconv_test.go
+++ b/spdxconv_test.go
@@ -42,11 +42,13 @@ func TestScan(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	exp := `//spdxconv:regular
-".gitignore" exist:0 unknown "exist:1"
-"test.go" BSD-3-Clause unknown "Shulhan <ms@kilabit.info>"
-"test.html" default unknown "default"
-"test.sh" BSD-3-Clause unknown "Shulhan <ms@kilabit.info>"
+	exp := `//spdxconv:version:v1
+//spdxconv:header:path,license_id,year,copyright_id
+//spdxconv:regular
+.gitignore,exist:0,unknown,exist:1
+test.go,BSD-3-Clause,unknown,Shulhan <ms@kilabit.info>
+test.html,default,unknown,default
+test.sh,BSD-3-Clause,unknown,Shulhan <ms@kilabit.info>
 //spdxconv:binary
 //spdxconv:unknown`
author	Shulhan <ms@kilabit.info>	2026-01-09 17:01:39 +0700
committer	Shulhan <ms@kilabit.info>	2026-01-09 17:08:00 +0700
commit	813039a51f150c3f25e8596a7d4b480967e12bf5 (patch)
tree	36d947642dbc940c5c10c95c6261d64f09ee6ef6
parent	2094be654563456e33dd9432b2538037fbc18e92 (diff)
download	spdxconv-813039a51f150c3f25e8596a7d4b480967e12bf5.tar.xz