diff options
| author | Shulhan <ms@kilabit.info> | 2024-12-26 04:15:28 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2024-12-28 16:46:59 +0700 |
| commit | 4003b6359747f6e43357e4bf190d4e71a66ec796 (patch) | |
| tree | f219d067b0a5a3a2bfe8554f88f39f580a5c59d6 /lib/binary | |
| parent | 052d4673d0d7777bd429134f9f30c4a87462208f (diff) | |
| download | pakakeh.go-4003b6359747f6e43357e4bf190d4e71a66ec796.tar.xz | |
lib/binary: implement append-only binary file
The binary is new package that complement the standard binary package
Currently it implement append-only binary that encode the data using
[binary.Writer].
We call them "Apo" for short.
Diffstat (limited to 'lib/binary')
| -rw-r--r-- | lib/binary/apo_file.go | 272 | ||||
| -rw-r--r-- | lib/binary/apo_file_test.go | 168 | ||||
| -rw-r--r-- | lib/binary/apo_footer.go | 55 | ||||
| -rw-r--r-- | lib/binary/apo_header.go | 25 | ||||
| -rw-r--r-- | lib/binary/apo_meta.go | 32 | ||||
| -rw-r--r-- | lib/binary/apo_meta_data.go | 11 | ||||
| -rw-r--r-- | lib/binary/binary.go | 12 | ||||
| -rw-r--r-- | lib/binary/binary_test.go | 19 | ||||
| -rw-r--r-- | lib/binary/testdata/ApoFileWrite_test.txt | 18 | ||||
| -rw-r--r-- | lib/binary/testdata/OpenApo_test.txt | 15 |
10 files changed, 627 insertions, 0 deletions
diff --git a/lib/binary/apo_file.go b/lib/binary/apo_file.go new file mode 100644 index 00000000..b44614ee --- /dev/null +++ b/lib/binary/apo_file.go @@ -0,0 +1,272 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// +// SPDX-License-Identifier: BSD-3-Clause + +package binary + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "os" + "sync" +) + +// ApoFile implement append-only writer that encode the data using +// [binary.Write] with [binary.BigEndian] order. +// The data to be written must support [binary.Write] (must contains +// fixed-size type). +// Type like string or map will not supported, so does struct with that +// field. +// To do that one need to implement [io.WriterTo] in the type. +// +// The file that writen by ApoFile have the following structure, +// +// Apohead +// * ApoMeta data +// Apofoot +// +// Each data prepended by [ApoMeta] as metadata that contains the write +// operation, the time when its written, and kind of data being writen. +// +// The [ApoMeta] define the operation to data: +// +// - [ApoOpInsert] operation insert new data, which should be unique among +// others. +// - [ApoOpUpdate] operation update indicates that the next data contains +// update for previous inserted data. +// The data being updated can be partial or all of it. +// - [ApoOpReplace] operation replace indicated that the next data replace +// whole previous inserted data. +// - [ApoOpDelete] operation delete the previous inserted data. +// Which record being deleted is defined inside the data (probably by +// using some ID). +// +// The update and replace may seems duplicate. +// The update operation is provided to help the writer to write partial data +// when needed. +type ApoFile struct { + file *os.File `noequal:""` + + name string + foot apoFooter + head apoHeader + + mtx sync.Mutex +} + +// OpenApo open file for writing in append mode. +// If the file does not exist, it will be created. +// Once the file is opened it is ready for write-only. +// +// To open a file for reading use [ReadAofile]. +func OpenApo(name string) (apo *ApoFile, err error) { + var logp = `OpenApo` + var isNew bool + var openFlag = os.O_RDWR + + apo = &ApoFile{ + name: name, + } + _, err = os.Stat(name) + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + openFlag |= os.O_CREATE + isNew = true + } + + apo.file, err = os.OpenFile(name, openFlag, 0600) + if err != nil { + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + if isNew { + err = apo.init() + if err != nil { + _ = apo.Close() + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + } else { + err = binary.Read(apo.file, binary.BigEndian, &apo.head) + if err != nil { + return nil, fmt.Errorf(`%s: read header: %w`, logp, err) + } + + _, err = apo.file.Seek(apo.head.OffFoot, 0) + if err != nil { + return nil, fmt.Errorf(`%s: seek footer: %w`, logp, err) + } + + _, err = apo.foot.ReadFrom(apo.file) + if err != nil { + return nil, fmt.Errorf(`%s: read footer: %w`, logp, err) + } + } + + return apo, nil +} + +// Close the file. +func (apo *ApoFile) Close() (err error) { + apo.mtx.Lock() + err = apo.file.Close() + apo.mtx.Unlock() + return err +} + +// ReadAll read all meta and data from file where all data has the same +// type. +// If data implement [io.ReaderFrom] it will use [io.ReaderFrom.ReadForm], +// otherwise it will use [binary.Read]. +func (apo *ApoFile) ReadAll(data any) (list []ApoMetaData, err error) { + var logp = `ReadAll` + + var hdrSize = int64(binary.Size(apo.head)) + _, err = apo.file.Seek(hdrSize, 0) + if err != nil { + return nil, fmt.Errorf(`%s: %w`, logp, err) + } + + var meta ApoMeta + for x := range apo.head.TotalData { + err = binary.Read(apo.file, binary.BigEndian, &meta) + if err != nil { + return nil, fmt.Errorf(`%s: at %d: %w`, logp, x, err) + } + + switch v := data.(type) { + case io.ReaderFrom: + _, err = v.ReadFrom(apo.file) + default: + err = binary.Read(apo.file, binary.BigEndian, data) + } + if err != nil { + return nil, fmt.Errorf(`%s: at %d: %w`, logp, x, err) + } + + list = append(list, ApoMetaData{ + Meta: meta, + Data: data, + }) + } + return list, nil +} + +// Write the meta and data into file. +// If the data is a type with non-fixed size, like slice, +// string, or map (or struct with non-fixed size field); it should implement +// [io.WriterTo], otherwise the write will fail. +func (apo *ApoFile) Write(meta ApoMeta, data any) (err error) { + var ( + logp = `Write` + buf bytes.Buffer + ) + + if meta.At <= 0 { + meta.At = timeNow().UnixNano() + } + + switch v := data.(type) { + case io.WriterTo: + _, err = v.WriteTo(&buf) + if err != nil { + return fmt.Errorf(`%s: using io.WriterTo: %w`, + logp, err) + } + + default: + err = binary.Write(&buf, binary.BigEndian, data) + if err != nil { + return fmt.Errorf(`%s: encode data: %w`, logp, err) + } + } + + apo.mtx.Lock() + defer apo.mtx.Unlock() + + apo.head.TotalData++ + + // Remember the current footer offset as the new meta-data index. + apo.foot.idxMetaOff = append(apo.foot.idxMetaOff, apo.head.OffFoot) + + err = apo.commit(meta, buf.Bytes()) + if err != nil { + return fmt.Errorf(`%s: %w`, logp, err) + } + return nil +} + +func (apo *ApoFile) commit(meta ApoMeta, data []byte) (err error) { + // Move back to the offset of footer ... + _, err = apo.file.Seek(apo.head.OffFoot, 0) + if err != nil { + return fmt.Errorf(`seek back %d: %w`, apo.head.OffFoot, err) + } + + // write meta and data ... + err = binary.Write(apo.file, binary.BigEndian, meta) + if err != nil { + return fmt.Errorf(`write meta: %w`, err) + } + + _, err = apo.file.Write(data) + if err != nil { + return fmt.Errorf(`write data: %w`, err) + } + + // get the current offset for new footer ... + apo.head.OffFoot, err = apo.file.Seek(0, 1) + if err != nil { + return fmt.Errorf(`seek current: %w`, err) + } + + // write footer ... + _, err = apo.foot.WriteTo(apo.file) + if err != nil { + return fmt.Errorf(`write footer: %w`, err) + } + + // ... and finally write the header. + _, err = apo.file.Seek(0, 0) + if err != nil { + return fmt.Errorf(`seek header: %w`, err) + } + + err = binary.Write(apo.file, binary.BigEndian, apo.head) + if err != nil { + return fmt.Errorf(`write header: %w`, err) + } + + err = apo.file.Sync() + if err != nil { + return err + } + + return nil +} + +func (apo *ApoFile) init() (err error) { + var logp = `init` + + apo.head.init() + apo.head.OffFoot = int64(binary.Size(apo.head)) + + err = binary.Write(apo.file, binary.BigEndian, apo.head) + if err != nil { + return fmt.Errorf(`%s: writing header: %w`, logp, err) + } + + _, err = apo.foot.WriteTo(apo.file) + if err != nil { + return fmt.Errorf(`%s: writing footer: %w`, logp, err) + } + + err = apo.file.Sync() + if err != nil { + return fmt.Errorf(`%s: on Sync: %w`, logp, err) + } + return nil +} diff --git a/lib/binary/apo_file_test.go b/lib/binary/apo_file_test.go new file mode 100644 index 00000000..52d54500 --- /dev/null +++ b/lib/binary/apo_file_test.go @@ -0,0 +1,168 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// SPDX-License-Identifier: BSD-3-Clause + +package binary + +import ( + "bytes" + "os" + "path/filepath" + "testing" + + libbytes "git.sr.ht/~shulhan/pakakeh.go/lib/bytes" + "git.sr.ht/~shulhan/pakakeh.go/lib/test" +) + +// Opening the ApoFile should create the file if its not exist, and write +// the header and footer. +func TestOpenApo(t *testing.T) { + var ( + dir = t.TempDir() + path = filepath.Join(dir, `OpenApo_test.bin`) + + apo *ApoFile + err error + ) + + apo, err = OpenApo(path) + if err != nil { + t.Fatal(err) + } + err = apo.Close() + if err != nil { + t.Fatal(err) + } + + var tdata *test.Data + + tdata, err = test.LoadData(`testdata/OpenApo_test.txt`) + if err != nil { + t.Fatal(err) + } + + var gotb []byte + gotb, err = os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + + var gotDump bytes.Buffer + libbytes.DumpPrettyTable(&gotDump, `empty`, gotb) + + var exp = string(tdata.Output[`empty`]) + test.Assert(t, `empty`, exp, gotDump.String()) + + // Test reading ... + + var apor *ApoFile + apor, err = OpenApo(path) + if err != nil { + t.Fatal(err) + } + + test.Assert(t, `ReadApo`, apo, apor) +} + +type testCaseWrite struct { + tag string + expHexdump string + expMetaData []ApoMetaData + expFooter apoFooter + expHeader apoHeader +} + +type dataWrite struct { + ID int64 +} + +func TestApoFileWrite(t *testing.T) { + tdata, err := test.LoadData(`testdata/ApoFileWrite_test.txt`) + if err != nil { + t.Fatal(err) + } + + var dir = t.TempDir() + var path = filepath.Join(dir, `ApoFileWrite_test.apo`) + + apo, err := OpenApo(path) + if err != nil { + t.Fatal(err) + } + + t.Cleanup(func() { + _ = apo.Close() + }) + + var listCase = []testCaseWrite{{ + tag: `insert`, + expHexdump: string(tdata.Output[`insert`]), + expHeader: apoHeader{ + Version: apoVersionOne, + TotalData: 1, + OffFoot: 38, + }, + expFooter: apoFooter{ + idxMetaOff: []int64{ + 17, + }, + }, + expMetaData: []ApoMetaData{{ + Meta: ApoMeta{ + At: 1735179660000000000, + }, + Data: &dataWrite{ + ID: 1, + }, + }}, + }} + + for _, tcase := range listCase { + t.Run(tcase.tag, func(t *testing.T) { + testWrite(t, tcase, apo) + }) + + t.Run(tcase.tag+` read`, func(t *testing.T) { + testRead(t, tcase, apo) + }) + } +} + +func testWrite(t *testing.T, tcase testCaseWrite, apow *ApoFile) { + for _, md := range tcase.expMetaData { + err := apow.Write(md.Meta, md.Data) + if err != nil { + t.Fatal(err) + } + } + + gotb, err := os.ReadFile(apow.name) + if err != nil { + t.Fatal(err) + } + + var gotDump bytes.Buffer + libbytes.DumpPrettyTable(&gotDump, tcase.tag, gotb) + + test.Assert(t, tcase.tag, tcase.expHexdump, gotDump.String()) +} + +func testRead(t *testing.T, tcase testCaseWrite, apow *ApoFile) { + apor, err := OpenApo(apow.name) + if err != nil { + t.Fatal(err) + } + + t.Cleanup(func() { + _ = apor.Close() + }) + + test.Assert(t, `header`, tcase.expHeader, apor.head) + test.Assert(t, `footer`, tcase.expFooter, apor.foot) + + var data dataWrite + gotMetaData, err := apor.ReadAll(&data) + if err != nil { + t.Fatal(err) + } + test.Assert(t, `meta-data`, tcase.expMetaData, gotMetaData) +} diff --git a/lib/binary/apo_footer.go b/lib/binary/apo_footer.go new file mode 100644 index 00000000..7e6670c1 --- /dev/null +++ b/lib/binary/apo_footer.go @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// +// SPDX-License-Identifier: BSD-3-Clause + +package binary + +import ( + "encoding/binary" + "io" +) + +// apoFooter contains dynamic meta data for single Apo file. +type apoFooter struct { + // idxMetaOff contains the offset of ApoMeta. + idxMetaOff []int64 +} + +func (foot *apoFooter) WriteTo(w io.Writer) (n int64, err error) { + var nidx int64 = int64(len(foot.idxMetaOff)) + _ = binary.Write(w, binary.BigEndian, nidx) + if err != nil { + return 0, err + } + var sizei64 = int64(binary.Size(nidx)) + n = sizei64 + for _, off := range foot.idxMetaOff { + err = binary.Write(w, binary.BigEndian, off) + if err != nil { + return n, err + } + n += sizei64 + } + return n, nil +} + +func (foot *apoFooter) ReadFrom(r io.Reader) (n int64, err error) { + var nidx int64 + err = binary.Read(r, binary.BigEndian, &nidx) + if err != nil { + return 0, err + } + var ( + off int64 + size = int64(binary.Size(off)) + ) + for range nidx { + err = binary.Read(r, binary.BigEndian, &off) + if err != nil { + return n, err + } + foot.idxMetaOff = append(foot.idxMetaOff, off) + n += size + } + return n, nil +} diff --git a/lib/binary/apo_header.go b/lib/binary/apo_header.go new file mode 100644 index 00000000..ec8a0a7b --- /dev/null +++ b/lib/binary/apo_header.go @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// +// SPDX-License-Identifier: BSD-3-Clause + +package binary + +type apoVersion byte + +const apoVersionOne apoVersion = 1 + +// apoHeader define the header for Apo file. +type apoHeader struct { + // Version define the version of the Apo file. + Version apoVersion + + // TotalData number of data in the file. + TotalData int64 + + // OffFoot define the offset of Apo footer in the file. + OffFoot int64 +} + +func (head *apoHeader) init() { + head.Version = apoVersionOne +} diff --git a/lib/binary/apo_meta.go b/lib/binary/apo_meta.go new file mode 100644 index 00000000..7f9aa63f --- /dev/null +++ b/lib/binary/apo_meta.go @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// +// SPDX-License-Identifier: BSD-3-Clause + +package binary + +// ApoOp define the write operation of data. +type ApoOp byte + +// List of possible Apo write operation. +const ( + ApoOpInsert ApoOp = 0 // Default operation. + ApoOpUpdate = 1 + ApoOpReplace = 2 + ApoOpDelete = 4 +) + +// ApoMeta define the metadata for each data. +type ApoMeta struct { + // At contains the timestamp with nanoseconds, in UTC timezone, when + // Write called. + At int64 + + // Kind define the type of data. + // The value of this field is defined by user, to know type of data + // stored for reading later. + Kind int32 + + // Op define the write operation, including: insert, update, + // replace, or delete. + Op ApoOp +} diff --git a/lib/binary/apo_meta_data.go b/lib/binary/apo_meta_data.go new file mode 100644 index 00000000..54796426 --- /dev/null +++ b/lib/binary/apo_meta_data.go @@ -0,0 +1,11 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// +// SPDX-License-Identifier: BSD-3-Clause + +package binary + +// ApoMetaData contains the meta and data stored on the file. +type ApoMetaData struct { + Data any + Meta ApoMeta +} diff --git a/lib/binary/binary.go b/lib/binary/binary.go new file mode 100644 index 00000000..72479d2d --- /dev/null +++ b/lib/binary/binary.go @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// +// SPDX-License-Identifier: BSD-3-Clause + +// Package binary complement the standard [binary] package. +package binary + +import "time" + +var timeNow = func() time.Time { + return time.Now().UTC() +} diff --git a/lib/binary/binary_test.go b/lib/binary/binary_test.go new file mode 100644 index 00000000..ca394101 --- /dev/null +++ b/lib/binary/binary_test.go @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// +// SPDX-License-Identifier: BSD-3-Clause + +package binary + +import ( + "os" + "testing" + "time" +) + +func TestMain(m *testing.M) { + var now = time.Date(2024, 12, 26, 2, 21, 0, 0, time.UTC) + timeNow = func() time.Time { + return now + } + os.Exit(m.Run()) +} diff --git a/lib/binary/testdata/ApoFileWrite_test.txt b/lib/binary/testdata/ApoFileWrite_test.txt new file mode 100644 index 00000000..97d9f293 --- /dev/null +++ b/lib/binary/testdata/ApoFileWrite_test.txt @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// SPDX-License-Identifier: BSD-3-Clause + +Test writing data to ApoWriter. + +The hex "18 14 98 b5 17 c8 78 00" is time stamp for 2024-12-26T02:21:00Z. + +<<< insert +insert + | 0 1 2 3 4 5 6 7 | 01234567 | 0 1 2 3 4 5 6 7 | + | 8 9 A B C D E F | 89ABCDEF | 8 9 A B C D E F | +0x00000000| 01 00 00 00 00 00 00 00 | ........ | 1 0 0 0 0 0 0 0 |0 +0x00000008| 01 00 00 00 00 00 00 00 | ........ | 1 0 0 0 0 0 0 0 |8 +0x00000010| 26 18 14 98 b5 17 c8 78 | &......x | 38 24 20 152 181 23 200 120 |16 +0x00000018| 00 00 00 00 00 00 00 00 | ........ | 0 0 0 0 0 0 0 0 |24 +0x00000020| 00 00 00 00 00 01 00 00 | ........ | 0 0 0 0 0 1 0 0 |32 +0x00000028| 00 00 00 00 00 01 00 00 | ........ | 0 0 0 0 0 1 0 0 |40 +0x00000030| 00 00 00 00 00 11 | ...... | 0 0 0 0 0 17 |48 diff --git a/lib/binary/testdata/OpenApo_test.txt b/lib/binary/testdata/OpenApo_test.txt new file mode 100644 index 00000000..b1be246f --- /dev/null +++ b/lib/binary/testdata/OpenApo_test.txt @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: 2024 M. Shulhan <ms@kilabit.info> +// SPDX-License-Identifier: BSD-3-Clause + +Test opening Apo file for writing and reading. + +<<< This is what the file looks like on empty data. + +<<< empty +empty + | 0 1 2 3 4 5 6 7 | 01234567 | 0 1 2 3 4 5 6 7 | + | 8 9 A B C D E F | 89ABCDEF | 8 9 A B C D E F | +0x00000000| 01 00 00 00 00 00 00 00 | ........ | 1 0 0 0 0 0 0 0 |0 +0x00000008| 00 00 00 00 00 00 00 00 | ........ | 0 0 0 0 0 0 0 0 |8 +0x00000010| 11 00 00 00 00 00 00 00 | ........ | 17 0 0 0 0 0 0 0 |16 +0x00000018| 00 | . | 0 |24 |
