From 8041a4c1e7f3c70eb2a1ba5d55136aab80d94949 Mon Sep 17 00:00:00 2001 From: Ignacio Hagopian Date: Mon, 2 Sep 2019 19:19:35 -0300 Subject: [PATCH] Refactor and general tests for codec index (#83) * codec_index: unexport const fields * codec_index: unexport internal functions and doc exported ones * codec_index: rename func & return errors for corruption * codec_index: new test for ReadIndex, WriteIndex, and read corruption * Update internal/codec_index.go Co-Authored-By: James Mills <1290234+prologic@users.noreply.github.com> * Update internal/codec_index.go Co-Authored-By: James Mills <1290234+prologic@users.noreply.github.com> --- internal/codec_index.go | 66 +++++++++++++++----------- internal/codec_index_test.go | 91 ++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 27 deletions(-) create mode 100644 internal/codec_index_test.go diff --git a/internal/codec_index.go b/internal/codec_index.go index d639d02..5dfab16 100644 --- a/internal/codec_index.go +++ b/internal/codec_index.go @@ -4,34 +4,44 @@ import ( "encoding/binary" "io" + "github.com/pkg/errors" art "github.com/plar/go-adaptive-radix-tree" ) -const ( - Int32Size = 4 - Int64Size = 8 - FileIDSize = Int32Size - OffsetSize = Int64Size - SizeSize = Int64Size +var ( + errTruncatedKeySize = errors.New("key size is truncated") + errTruncatedKeyData = errors.New("key data is truncated") + errTruncatedData = errors.New("data is truncated") ) -func ReadBytes(r io.Reader) ([]byte, error) { - s := make([]byte, Int32Size) +const ( + int32Size = 4 + int64Size = 8 + fileIDSize = int32Size + offsetSize = int64Size + sizeSize = int64Size +) + +func readKeyBytes(r io.Reader) ([]byte, error) { + s := make([]byte, int32Size) _, err := io.ReadFull(r, s) if err != nil { - return nil, err + if err == io.EOF { + return nil, err + } + return nil, errors.Wrap(errTruncatedKeySize, err.Error()) } size := binary.BigEndian.Uint32(s) b := make([]byte, size) _, err = io.ReadFull(r, b) if err != nil { - return nil, err + return nil, errors.Wrap(errTruncatedKeyData, err.Error()) } return b, nil } -func WriteBytes(b []byte, w io.Writer) (int, error) { - s := make([]byte, Int32Size) +func writeBytes(b []byte, w io.Writer) (int, error) { + s := make([]byte, int32Size) binary.BigEndian.PutUint32(s, uint32(len(b))) n, err := w.Write(s) if err != nil { @@ -44,25 +54,25 @@ func WriteBytes(b []byte, w io.Writer) (int, error) { return (n + m), nil } -func ReadItem(r io.Reader) (Item, error) { - buf := make([]byte, (FileIDSize + OffsetSize + SizeSize)) +func readItem(r io.Reader) (Item, error) { + buf := make([]byte, (fileIDSize + offsetSize + sizeSize)) _, err := io.ReadFull(r, buf) if err != nil { - return Item{}, err + return Item{}, errors.Wrap(errTruncatedData, err.Error()) } return Item{ - FileID: int(binary.BigEndian.Uint32(buf[:FileIDSize])), - Offset: int64(binary.BigEndian.Uint64(buf[FileIDSize:(FileIDSize + OffsetSize)])), - Size: int64(binary.BigEndian.Uint64(buf[(FileIDSize + OffsetSize):])), + FileID: int(binary.BigEndian.Uint32(buf[:fileIDSize])), + Offset: int64(binary.BigEndian.Uint64(buf[fileIDSize:(fileIDSize + offsetSize)])), + Size: int64(binary.BigEndian.Uint64(buf[(fileIDSize + offsetSize):])), }, nil } -func WriteItem(item Item, w io.Writer) (int, error) { - buf := make([]byte, (FileIDSize + OffsetSize + SizeSize)) - binary.BigEndian.PutUint32(buf[:FileIDSize], uint32(item.FileID)) - binary.BigEndian.PutUint64(buf[FileIDSize:(FileIDSize+OffsetSize)], uint64(item.Offset)) - binary.BigEndian.PutUint64(buf[(FileIDSize+OffsetSize):], uint64(item.Size)) +func writeItem(item Item, w io.Writer) (int, error) { + buf := make([]byte, (fileIDSize + offsetSize + sizeSize)) + binary.BigEndian.PutUint32(buf[:fileIDSize], uint32(item.FileID)) + binary.BigEndian.PutUint64(buf[fileIDSize:(fileIDSize+offsetSize)], uint64(item.Offset)) + binary.BigEndian.PutUint64(buf[(fileIDSize+offsetSize):], uint64(item.Size)) n, err := w.Write(buf) if err != nil { return 0, err @@ -70,9 +80,10 @@ func WriteItem(item Item, w io.Writer) (int, error) { return n, nil } +// ReadIndex reads a persisted tree from a io.Reader into a Tree func ReadIndex(r io.Reader, t art.Tree) error { for { - key, err := ReadBytes(r) + key, err := readKeyBytes(r) if err != nil { if err == io.EOF { break @@ -80,7 +91,7 @@ func ReadIndex(r io.Reader, t art.Tree) error { return err } - item, err := ReadItem(r) + item, err := readItem(r) if err != nil { return err } @@ -91,15 +102,16 @@ func ReadIndex(r io.Reader, t art.Tree) error { return nil } +// WriteIndex persists a Tree into a io.Writer func WriteIndex(t art.Tree, w io.Writer) (err error) { t.ForEach(func(node art.Node) bool { - _, err = WriteBytes(node.Key(), w) + _, err = writeBytes(node.Key(), w) if err != nil { return false } item := node.Value().(Item) - _, err := WriteItem(item, w) + _, err := writeItem(item, w) if err != nil { return false } diff --git a/internal/codec_index_test.go b/internal/codec_index_test.go new file mode 100644 index 0000000..bb48db1 --- /dev/null +++ b/internal/codec_index_test.go @@ -0,0 +1,91 @@ +package internal + +import ( + "bytes" + "encoding/base64" + "testing" + + "github.com/pkg/errors" + art "github.com/plar/go-adaptive-radix-tree" +) + +const ( + base64SampleTree = "AAAABGFiY2QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARhYmNlAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAEYWJjZgAAAAIAAAAAAAAAAgAAAAAAAAACAAAABGFiZ2QAAAADAAAAAAAAAAMAAAAAAAAAAw==" +) + +func TestWriteIndex(t *testing.T) { + at, expectedSerializedSize := getSampleTree() + + var b bytes.Buffer + err := WriteIndex(at, &b) + if err != nil { + t.Fatalf("writing index failed: %v", err) + } + if b.Len() != expectedSerializedSize { + t.Fatalf("incorrect size of serialied index: expected %d, got: %d", expectedSerializedSize, b.Len()) + } + sampleTreeBytes, _ := base64.StdEncoding.DecodeString(base64SampleTree) + if !bytes.Equal(b.Bytes(), sampleTreeBytes) { + t.Fatalf("unexpected serialization of the tree") + } +} + +func TestReadIndex(t *testing.T) { + sampleTreeBytes, _ := base64.StdEncoding.DecodeString(base64SampleTree) + b := bytes.NewBuffer(sampleTreeBytes) + + at := art.New() + err := ReadIndex(b, at) + if err != nil { + t.Fatalf("error while deserializing correct sample tree: %v", err) + } + + atsample, _ := getSampleTree() + if atsample.Size() != at.Size() { + t.Fatalf("trees aren't the same size, expected %v, got %v", atsample.Size(), at.Size()) + } + atsample.ForEach(func(node art.Node) bool { + _, found := at.Search(node.Key()) + if !found { + t.Fatalf("expected node wasn't found: %s", node.Key()) + } + return true + }) +} + +func TestReadCorruptedData(t *testing.T) { + sampleBytes, _ := base64.StdEncoding.DecodeString(base64SampleTree) + table := []struct { + name string + err error + data []byte + }{ + {name: "truncated-key-size-first-item", err: errTruncatedKeySize, data: sampleBytes[:2]}, + {name: "truncated-key-data-second-item", err: errTruncatedKeyData, data: sampleBytes[:6]}, + {name: "truncated-key-size-second-item", err: errTruncatedKeySize, data: sampleBytes[:(int32Size+4+fileIDSize+offsetSize+sizeSize)+2]}, + {name: "truncated-key-data-second-item", err: errTruncatedKeyData, data: sampleBytes[:(int32Size+4+fileIDSize+offsetSize+sizeSize)+6]}, + {name: "truncated-data", err: errTruncatedData, data: sampleBytes[:int32Size+4+(fileIDSize+offsetSize+sizeSize-3)]}, + } + + for i := range table { + t.Run(table[i].name, func(t *testing.T) { + bf := bytes.NewBuffer(table[i].data) + + if err := ReadIndex(bf, art.New()); errors.Cause(err) != table[i].err { + t.Fatalf("expected %v, got %v", table[i].err, err) + } + }) + } +} + +func getSampleTree() (art.Tree, int) { + at := art.New() + keys := [][]byte{[]byte("abcd"), []byte("abce"), []byte("abcf"), []byte("abgd")} + expectedSerializedSize := 0 + for i := range keys { + at.Insert(keys[i], Item{FileID: i, Offset: int64(i), Size: int64(i)}) + expectedSerializedSize += int32Size + len(keys[i]) + fileIDSize + offsetSize + sizeSize + } + + return at, expectedSerializedSize +}