aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2024-12-26 04:15:28 +0700
committerShulhan <ms@kilabit.info>2024-12-28 16:46:59 +0700
commit4003b6359747f6e43357e4bf190d4e71a66ec796 (patch)
treef219d067b0a5a3a2bfe8554f88f39f580a5c59d6
parent052d4673d0d7777bd429134f9f30c4a87462208f (diff)
downloadpakakeh.go-4003b6359747f6e43357e4bf190d4e71a66ec796.tar.xz
lib/binary: implement append-only binary file
The binary is new package that complement the standard binary package Currently it implement append-only binary that encode the data using [binary.Writer]. We call them "Apo" for short.
-rw-r--r--README.md6
-rw-r--r--lib/binary/apo_file.go272
-rw-r--r--lib/binary/apo_file_test.go168
-rw-r--r--lib/binary/apo_footer.go55
-rw-r--r--lib/binary/apo_header.go25
-rw-r--r--lib/binary/apo_meta.go32
-rw-r--r--lib/binary/apo_meta_data.go11
-rw-r--r--lib/binary/binary.go12
-rw-r--r--lib/binary/binary_test.go19
-rw-r--r--lib/binary/testdata/ApoFileWrite_test.txt18
-rw-r--r--lib/binary/testdata/OpenApo_test.txt15
11 files changed, 633 insertions, 0 deletions
diff --git a/README.md b/README.md
index 7d9cacb6..1e94ca10 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,12 @@ encryption.
[**ascii**](https://pkg.go.dev/git.sr.ht/~shulhan/pakakeh.go/lib/ascii)::
A library for working with ASCII characters.
+[**binary**](https://pkg.go.dev/git.sr.ht/~shulhan/pakakeh.go/lib/binary)::
+Package binary complement the standard [binary] package.
+Currently it implement append-only binary that encode the data using
+binary.Writer.
+We call them "Apo" for short.
+
[**bytes**](https://pkg.go.dev/git.sr.ht/~shulhan/pakakeh.go/lib/bytes)::
A library for working with slice of bytes.
diff --git a/lib/binary/apo_file.go b/lib/binary/apo_file.go
new file mode 100644
index 00000000..b44614ee
--- /dev/null
+++ b/lib/binary/apo_file.go
@@ -0,0 +1,272 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+package binary
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "sync"
+)
+
+// ApoFile implement append-only writer that encode the data using
+// [binary.Write] with [binary.BigEndian] order.
+// The data to be written must support [binary.Write] (must contains
+// fixed-size type).
+// Type like string or map will not supported, so does struct with that
+// field.
+// To do that one need to implement [io.WriterTo] in the type.
+//
+// The file that writen by ApoFile have the following structure,
+//
+// Apohead
+// * ApoMeta data
+// Apofoot
+//
+// Each data prepended by [ApoMeta] as metadata that contains the write
+// operation, the time when its written, and kind of data being writen.
+//
+// The [ApoMeta] define the operation to data:
+//
+// - [ApoOpInsert] operation insert new data, which should be unique among
+// others.
+// - [ApoOpUpdate] operation update indicates that the next data contains
+// update for previous inserted data.
+// The data being updated can be partial or all of it.
+// - [ApoOpReplace] operation replace indicated that the next data replace
+// whole previous inserted data.
+// - [ApoOpDelete] operation delete the previous inserted data.
+// Which record being deleted is defined inside the data (probably by
+// using some ID).
+//
+// The update and replace may seems duplicate.
+// The update operation is provided to help the writer to write partial data
+// when needed.
+type ApoFile struct {
+ file *os.File `noequal:""`
+
+ name string
+ foot apoFooter
+ head apoHeader
+
+ mtx sync.Mutex
+}
+
+// OpenApo open file for writing in append mode.
+// If the file does not exist, it will be created.
+// Once the file is opened it is ready for write-only.
+//
+// To open a file for reading use [ReadAofile].
+func OpenApo(name string) (apo *ApoFile, err error) {
+ var logp = `OpenApo`
+ var isNew bool
+ var openFlag = os.O_RDWR
+
+ apo = &ApoFile{
+ name: name,
+ }
+ _, err = os.Stat(name)
+ if err != nil {
+ if !errors.Is(err, os.ErrNotExist) {
+ return nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+ openFlag |= os.O_CREATE
+ isNew = true
+ }
+
+ apo.file, err = os.OpenFile(name, openFlag, 0600)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+ if isNew {
+ err = apo.init()
+ if err != nil {
+ _ = apo.Close()
+ return nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+ } else {
+ err = binary.Read(apo.file, binary.BigEndian, &apo.head)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: read header: %w`, logp, err)
+ }
+
+ _, err = apo.file.Seek(apo.head.OffFoot, 0)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: seek footer: %w`, logp, err)
+ }
+
+ _, err = apo.foot.ReadFrom(apo.file)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: read footer: %w`, logp, err)
+ }
+ }
+
+ return apo, nil
+}
+
+// Close the file.
+func (apo *ApoFile) Close() (err error) {
+ apo.mtx.Lock()
+ err = apo.file.Close()
+ apo.mtx.Unlock()
+ return err
+}
+
+// ReadAll read all meta and data from file where all data has the same
+// type.
+// If data implement [io.ReaderFrom] it will use [io.ReaderFrom.ReadForm],
+// otherwise it will use [binary.Read].
+func (apo *ApoFile) ReadAll(data any) (list []ApoMetaData, err error) {
+ var logp = `ReadAll`
+
+ var hdrSize = int64(binary.Size(apo.head))
+ _, err = apo.file.Seek(hdrSize, 0)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ var meta ApoMeta
+ for x := range apo.head.TotalData {
+ err = binary.Read(apo.file, binary.BigEndian, &meta)
+ if err != nil {
+ return nil, fmt.Errorf(`%s: at %d: %w`, logp, x, err)
+ }
+
+ switch v := data.(type) {
+ case io.ReaderFrom:
+ _, err = v.ReadFrom(apo.file)
+ default:
+ err = binary.Read(apo.file, binary.BigEndian, data)
+ }
+ if err != nil {
+ return nil, fmt.Errorf(`%s: at %d: %w`, logp, x, err)
+ }
+
+ list = append(list, ApoMetaData{
+ Meta: meta,
+ Data: data,
+ })
+ }
+ return list, nil
+}
+
+// Write the meta and data into file.
+// If the data is a type with non-fixed size, like slice,
+// string, or map (or struct with non-fixed size field); it should implement
+// [io.WriterTo], otherwise the write will fail.
+func (apo *ApoFile) Write(meta ApoMeta, data any) (err error) {
+ var (
+ logp = `Write`
+ buf bytes.Buffer
+ )
+
+ if meta.At <= 0 {
+ meta.At = timeNow().UnixNano()
+ }
+
+ switch v := data.(type) {
+ case io.WriterTo:
+ _, err = v.WriteTo(&buf)
+ if err != nil {
+ return fmt.Errorf(`%s: using io.WriterTo: %w`,
+ logp, err)
+ }
+
+ default:
+ err = binary.Write(&buf, binary.BigEndian, data)
+ if err != nil {
+ return fmt.Errorf(`%s: encode data: %w`, logp, err)
+ }
+ }
+
+ apo.mtx.Lock()
+ defer apo.mtx.Unlock()
+
+ apo.head.TotalData++
+
+ // Remember the current footer offset as the new meta-data index.
+ apo.foot.idxMetaOff = append(apo.foot.idxMetaOff, apo.head.OffFoot)
+
+ err = apo.commit(meta, buf.Bytes())
+ if err != nil {
+ return fmt.Errorf(`%s: %w`, logp, err)
+ }
+ return nil
+}
+
+func (apo *ApoFile) commit(meta ApoMeta, data []byte) (err error) {
+ // Move back to the offset of footer ...
+ _, err = apo.file.Seek(apo.head.OffFoot, 0)
+ if err != nil {
+ return fmt.Errorf(`seek back %d: %w`, apo.head.OffFoot, err)
+ }
+
+ // write meta and data ...
+ err = binary.Write(apo.file, binary.BigEndian, meta)
+ if err != nil {
+ return fmt.Errorf(`write meta: %w`, err)
+ }
+
+ _, err = apo.file.Write(data)
+ if err != nil {
+ return fmt.Errorf(`write data: %w`, err)
+ }
+
+ // get the current offset for new footer ...
+ apo.head.OffFoot, err = apo.file.Seek(0, 1)
+ if err != nil {
+ return fmt.Errorf(`seek current: %w`, err)
+ }
+
+ // write footer ...
+ _, err = apo.foot.WriteTo(apo.file)
+ if err != nil {
+ return fmt.Errorf(`write footer: %w`, err)
+ }
+
+ // ... and finally write the header.
+ _, err = apo.file.Seek(0, 0)
+ if err != nil {
+ return fmt.Errorf(`seek header: %w`, err)
+ }
+
+ err = binary.Write(apo.file, binary.BigEndian, apo.head)
+ if err != nil {
+ return fmt.Errorf(`write header: %w`, err)
+ }
+
+ err = apo.file.Sync()
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (apo *ApoFile) init() (err error) {
+ var logp = `init`
+
+ apo.head.init()
+ apo.head.OffFoot = int64(binary.Size(apo.head))
+
+ err = binary.Write(apo.file, binary.BigEndian, apo.head)
+ if err != nil {
+ return fmt.Errorf(`%s: writing header: %w`, logp, err)
+ }
+
+ _, err = apo.foot.WriteTo(apo.file)
+ if err != nil {
+ return fmt.Errorf(`%s: writing footer: %w`, logp, err)
+ }
+
+ err = apo.file.Sync()
+ if err != nil {
+ return fmt.Errorf(`%s: on Sync: %w`, logp, err)
+ }
+ return nil
+}
diff --git a/lib/binary/apo_file_test.go b/lib/binary/apo_file_test.go
new file mode 100644
index 00000000..52d54500
--- /dev/null
+++ b/lib/binary/apo_file_test.go
@@ -0,0 +1,168 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: BSD-3-Clause
+
+package binary
+
+import (
+ "bytes"
+ "os"
+ "path/filepath"
+ "testing"
+
+ libbytes "git.sr.ht/~shulhan/pakakeh.go/lib/bytes"
+ "git.sr.ht/~shulhan/pakakeh.go/lib/test"
+)
+
+// Opening the ApoFile should create the file if its not exist, and write
+// the header and footer.
+func TestOpenApo(t *testing.T) {
+ var (
+ dir = t.TempDir()
+ path = filepath.Join(dir, `OpenApo_test.bin`)
+
+ apo *ApoFile
+ err error
+ )
+
+ apo, err = OpenApo(path)
+ if err != nil {
+ t.Fatal(err)
+ }
+ err = apo.Close()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var tdata *test.Data
+
+ tdata, err = test.LoadData(`testdata/OpenApo_test.txt`)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var gotb []byte
+ gotb, err = os.ReadFile(path)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var gotDump bytes.Buffer
+ libbytes.DumpPrettyTable(&gotDump, `empty`, gotb)
+
+ var exp = string(tdata.Output[`empty`])
+ test.Assert(t, `empty`, exp, gotDump.String())
+
+ // Test reading ...
+
+ var apor *ApoFile
+ apor, err = OpenApo(path)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ test.Assert(t, `ReadApo`, apo, apor)
+}
+
+type testCaseWrite struct {
+ tag string
+ expHexdump string
+ expMetaData []ApoMetaData
+ expFooter apoFooter
+ expHeader apoHeader
+}
+
+type dataWrite struct {
+ ID int64
+}
+
+func TestApoFileWrite(t *testing.T) {
+ tdata, err := test.LoadData(`testdata/ApoFileWrite_test.txt`)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var dir = t.TempDir()
+ var path = filepath.Join(dir, `ApoFileWrite_test.apo`)
+
+ apo, err := OpenApo(path)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ t.Cleanup(func() {
+ _ = apo.Close()
+ })
+
+ var listCase = []testCaseWrite{{
+ tag: `insert`,
+ expHexdump: string(tdata.Output[`insert`]),
+ expHeader: apoHeader{
+ Version: apoVersionOne,
+ TotalData: 1,
+ OffFoot: 38,
+ },
+ expFooter: apoFooter{
+ idxMetaOff: []int64{
+ 17,
+ },
+ },
+ expMetaData: []ApoMetaData{{
+ Meta: ApoMeta{
+ At: 1735179660000000000,
+ },
+ Data: &dataWrite{
+ ID: 1,
+ },
+ }},
+ }}
+
+ for _, tcase := range listCase {
+ t.Run(tcase.tag, func(t *testing.T) {
+ testWrite(t, tcase, apo)
+ })
+
+ t.Run(tcase.tag+` read`, func(t *testing.T) {
+ testRead(t, tcase, apo)
+ })
+ }
+}
+
+func testWrite(t *testing.T, tcase testCaseWrite, apow *ApoFile) {
+ for _, md := range tcase.expMetaData {
+ err := apow.Write(md.Meta, md.Data)
+ if err != nil {
+ t.Fatal(err)
+ }
+ }
+
+ gotb, err := os.ReadFile(apow.name)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var gotDump bytes.Buffer
+ libbytes.DumpPrettyTable(&gotDump, tcase.tag, gotb)
+
+ test.Assert(t, tcase.tag, tcase.expHexdump, gotDump.String())
+}
+
+func testRead(t *testing.T, tcase testCaseWrite, apow *ApoFile) {
+ apor, err := OpenApo(apow.name)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ t.Cleanup(func() {
+ _ = apor.Close()
+ })
+
+ test.Assert(t, `header`, tcase.expHeader, apor.head)
+ test.Assert(t, `footer`, tcase.expFooter, apor.foot)
+
+ var data dataWrite
+ gotMetaData, err := apor.ReadAll(&data)
+ if err != nil {
+ t.Fatal(err)
+ }
+ test.Assert(t, `meta-data`, tcase.expMetaData, gotMetaData)
+}
diff --git a/lib/binary/apo_footer.go b/lib/binary/apo_footer.go
new file mode 100644
index 00000000..7e6670c1
--- /dev/null
+++ b/lib/binary/apo_footer.go
@@ -0,0 +1,55 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+package binary
+
+import (
+ "encoding/binary"
+ "io"
+)
+
+// apoFooter contains dynamic meta data for single Apo file.
+type apoFooter struct {
+ // idxMetaOff contains the offset of ApoMeta.
+ idxMetaOff []int64
+}
+
+func (foot *apoFooter) WriteTo(w io.Writer) (n int64, err error) {
+ var nidx int64 = int64(len(foot.idxMetaOff))
+ _ = binary.Write(w, binary.BigEndian, nidx)
+ if err != nil {
+ return 0, err
+ }
+ var sizei64 = int64(binary.Size(nidx))
+ n = sizei64
+ for _, off := range foot.idxMetaOff {
+ err = binary.Write(w, binary.BigEndian, off)
+ if err != nil {
+ return n, err
+ }
+ n += sizei64
+ }
+ return n, nil
+}
+
+func (foot *apoFooter) ReadFrom(r io.Reader) (n int64, err error) {
+ var nidx int64
+ err = binary.Read(r, binary.BigEndian, &nidx)
+ if err != nil {
+ return 0, err
+ }
+ var (
+ off int64
+ size = int64(binary.Size(off))
+ )
+ for range nidx {
+ err = binary.Read(r, binary.BigEndian, &off)
+ if err != nil {
+ return n, err
+ }
+ foot.idxMetaOff = append(foot.idxMetaOff, off)
+ n += size
+ }
+ return n, nil
+}
diff --git a/lib/binary/apo_header.go b/lib/binary/apo_header.go
new file mode 100644
index 00000000..ec8a0a7b
--- /dev/null
+++ b/lib/binary/apo_header.go
@@ -0,0 +1,25 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+package binary
+
+type apoVersion byte
+
+const apoVersionOne apoVersion = 1
+
+// apoHeader define the header for Apo file.
+type apoHeader struct {
+ // Version define the version of the Apo file.
+ Version apoVersion
+
+ // TotalData number of data in the file.
+ TotalData int64
+
+ // OffFoot define the offset of Apo footer in the file.
+ OffFoot int64
+}
+
+func (head *apoHeader) init() {
+ head.Version = apoVersionOne
+}
diff --git a/lib/binary/apo_meta.go b/lib/binary/apo_meta.go
new file mode 100644
index 00000000..7f9aa63f
--- /dev/null
+++ b/lib/binary/apo_meta.go
@@ -0,0 +1,32 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+package binary
+
+// ApoOp define the write operation of data.
+type ApoOp byte
+
+// List of possible Apo write operation.
+const (
+ ApoOpInsert ApoOp = 0 // Default operation.
+ ApoOpUpdate = 1
+ ApoOpReplace = 2
+ ApoOpDelete = 4
+)
+
+// ApoMeta define the metadata for each data.
+type ApoMeta struct {
+ // At contains the timestamp with nanoseconds, in UTC timezone, when
+ // Write called.
+ At int64
+
+ // Kind define the type of data.
+ // The value of this field is defined by user, to know type of data
+ // stored for reading later.
+ Kind int32
+
+ // Op define the write operation, including: insert, update,
+ // replace, or delete.
+ Op ApoOp
+}
diff --git a/lib/binary/apo_meta_data.go b/lib/binary/apo_meta_data.go
new file mode 100644
index 00000000..54796426
--- /dev/null
+++ b/lib/binary/apo_meta_data.go
@@ -0,0 +1,11 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+package binary
+
+// ApoMetaData contains the meta and data stored on the file.
+type ApoMetaData struct {
+ Data any
+ Meta ApoMeta
+}
diff --git a/lib/binary/binary.go b/lib/binary/binary.go
new file mode 100644
index 00000000..72479d2d
--- /dev/null
+++ b/lib/binary/binary.go
@@ -0,0 +1,12 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+// Package binary complement the standard [binary] package.
+package binary
+
+import "time"
+
+var timeNow = func() time.Time {
+ return time.Now().UTC()
+}
diff --git a/lib/binary/binary_test.go b/lib/binary/binary_test.go
new file mode 100644
index 00000000..ca394101
--- /dev/null
+++ b/lib/binary/binary_test.go
@@ -0,0 +1,19 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+package binary
+
+import (
+ "os"
+ "testing"
+ "time"
+)
+
+func TestMain(m *testing.M) {
+ var now = time.Date(2024, 12, 26, 2, 21, 0, 0, time.UTC)
+ timeNow = func() time.Time {
+ return now
+ }
+ os.Exit(m.Run())
+}
diff --git a/lib/binary/testdata/ApoFileWrite_test.txt b/lib/binary/testdata/ApoFileWrite_test.txt
new file mode 100644
index 00000000..97d9f293
--- /dev/null
+++ b/lib/binary/testdata/ApoFileWrite_test.txt
@@ -0,0 +1,18 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: BSD-3-Clause
+
+Test writing data to ApoWriter.
+
+The hex "18 14 98 b5 17 c8 78 00" is time stamp for 2024-12-26T02:21:00Z.
+
+<<< insert
+insert
+ | 0 1 2 3 4 5 6 7 | 01234567 | 0 1 2 3 4 5 6 7 |
+ | 8 9 A B C D E F | 89ABCDEF | 8 9 A B C D E F |
+0x00000000| 01 00 00 00 00 00 00 00 | ........ | 1 0 0 0 0 0 0 0 |0
+0x00000008| 01 00 00 00 00 00 00 00 | ........ | 1 0 0 0 0 0 0 0 |8
+0x00000010| 26 18 14 98 b5 17 c8 78 | &......x | 38 24 20 152 181 23 200 120 |16
+0x00000018| 00 00 00 00 00 00 00 00 | ........ | 0 0 0 0 0 0 0 0 |24
+0x00000020| 00 00 00 00 00 01 00 00 | ........ | 0 0 0 0 0 1 0 0 |32
+0x00000028| 00 00 00 00 00 01 00 00 | ........ | 0 0 0 0 0 1 0 0 |40
+0x00000030| 00 00 00 00 00 11 | ...... | 0 0 0 0 0 17 |48
diff --git a/lib/binary/testdata/OpenApo_test.txt b/lib/binary/testdata/OpenApo_test.txt
new file mode 100644
index 00000000..b1be246f
--- /dev/null
+++ b/lib/binary/testdata/OpenApo_test.txt
@@ -0,0 +1,15 @@
+// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info>
+// SPDX-License-Identifier: BSD-3-Clause
+
+Test opening Apo file for writing and reading.
+
+<<< This is what the file looks like on empty data.
+
+<<< empty
+empty
+ | 0 1 2 3 4 5 6 7 | 01234567 | 0 1 2 3 4 5 6 7 |
+ | 8 9 A B C D E F | 89ABCDEF | 8 9 A B C D E F |
+0x00000000| 01 00 00 00 00 00 00 00 | ........ | 1 0 0 0 0 0 0 0 |0
+0x00000008| 00 00 00 00 00 00 00 00 | ........ | 0 0 0 0 0 0 0 0 |8
+0x00000010| 11 00 00 00 00 00 00 00 | ........ | 17 0 0 0 0 0 0 0 |16
+0x00000018| 00 | . | 0 |24