Refactor and general tests for codec index (#83)

* codec_index: unexport const fields

* codec_index: unexport internal functions and doc exported ones

* codec_index: rename func & return errors for corruption

* codec_index: new test for ReadIndex, WriteIndex, and read corruption

* Update internal/codec_index.go

Co-Authored-By: James Mills <1290234+prologic@users.noreply.github.com>

* Update internal/codec_index.go

Co-Authored-By: James Mills <1290234+prologic@users.noreply.github.com>
This commit is contained in:
Ignacio Hagopian 2019-09-02 19:19:35 -03:00 committed by James Mills
parent 50d3971e86
commit 8041a4c1e7
2 changed files with 130 additions and 27 deletions

@ -4,34 +4,44 @@ import (
"encoding/binary"
"io"
"github.com/pkg/errors"
art "github.com/plar/go-adaptive-radix-tree"
)
const (
Int32Size = 4
Int64Size = 8
FileIDSize = Int32Size
OffsetSize = Int64Size
SizeSize = Int64Size
var (
errTruncatedKeySize = errors.New("key size is truncated")
errTruncatedKeyData = errors.New("key data is truncated")
errTruncatedData = errors.New("data is truncated")
)
func ReadBytes(r io.Reader) ([]byte, error) {
s := make([]byte, Int32Size)
const (
int32Size = 4
int64Size = 8
fileIDSize = int32Size
offsetSize = int64Size
sizeSize = int64Size
)
func readKeyBytes(r io.Reader) ([]byte, error) {
s := make([]byte, int32Size)
_, err := io.ReadFull(r, s)
if err != nil {
return nil, err
if err == io.EOF {
return nil, err
}
return nil, errors.Wrap(errTruncatedKeySize, err.Error())
}
size := binary.BigEndian.Uint32(s)
b := make([]byte, size)
_, err = io.ReadFull(r, b)
if err != nil {
return nil, err
return nil, errors.Wrap(errTruncatedKeyData, err.Error())
}
return b, nil
}
func WriteBytes(b []byte, w io.Writer) (int, error) {
s := make([]byte, Int32Size)
func writeBytes(b []byte, w io.Writer) (int, error) {
s := make([]byte, int32Size)
binary.BigEndian.PutUint32(s, uint32(len(b)))
n, err := w.Write(s)
if err != nil {
@ -44,25 +54,25 @@ func WriteBytes(b []byte, w io.Writer) (int, error) {
return (n + m), nil
}
func ReadItem(r io.Reader) (Item, error) {
buf := make([]byte, (FileIDSize + OffsetSize + SizeSize))
func readItem(r io.Reader) (Item, error) {
buf := make([]byte, (fileIDSize + offsetSize + sizeSize))
_, err := io.ReadFull(r, buf)
if err != nil {
return Item{}, err
return Item{}, errors.Wrap(errTruncatedData, err.Error())
}
return Item{
FileID: int(binary.BigEndian.Uint32(buf[:FileIDSize])),
Offset: int64(binary.BigEndian.Uint64(buf[FileIDSize:(FileIDSize + OffsetSize)])),
Size: int64(binary.BigEndian.Uint64(buf[(FileIDSize + OffsetSize):])),
FileID: int(binary.BigEndian.Uint32(buf[:fileIDSize])),
Offset: int64(binary.BigEndian.Uint64(buf[fileIDSize:(fileIDSize + offsetSize)])),
Size: int64(binary.BigEndian.Uint64(buf[(fileIDSize + offsetSize):])),
}, nil
}
func WriteItem(item Item, w io.Writer) (int, error) {
buf := make([]byte, (FileIDSize + OffsetSize + SizeSize))
binary.BigEndian.PutUint32(buf[:FileIDSize], uint32(item.FileID))
binary.BigEndian.PutUint64(buf[FileIDSize:(FileIDSize+OffsetSize)], uint64(item.Offset))
binary.BigEndian.PutUint64(buf[(FileIDSize+OffsetSize):], uint64(item.Size))
func writeItem(item Item, w io.Writer) (int, error) {
buf := make([]byte, (fileIDSize + offsetSize + sizeSize))
binary.BigEndian.PutUint32(buf[:fileIDSize], uint32(item.FileID))
binary.BigEndian.PutUint64(buf[fileIDSize:(fileIDSize+offsetSize)], uint64(item.Offset))
binary.BigEndian.PutUint64(buf[(fileIDSize+offsetSize):], uint64(item.Size))
n, err := w.Write(buf)
if err != nil {
return 0, err
@ -70,9 +80,10 @@ func WriteItem(item Item, w io.Writer) (int, error) {
return n, nil
}
// ReadIndex reads a persisted tree from a io.Reader into a Tree
func ReadIndex(r io.Reader, t art.Tree) error {
for {
key, err := ReadBytes(r)
key, err := readKeyBytes(r)
if err != nil {
if err == io.EOF {
break
@ -80,7 +91,7 @@ func ReadIndex(r io.Reader, t art.Tree) error {
return err
}
item, err := ReadItem(r)
item, err := readItem(r)
if err != nil {
return err
}
@ -91,15 +102,16 @@ func ReadIndex(r io.Reader, t art.Tree) error {
return nil
}
// WriteIndex persists a Tree into a io.Writer
func WriteIndex(t art.Tree, w io.Writer) (err error) {
t.ForEach(func(node art.Node) bool {
_, err = WriteBytes(node.Key(), w)
_, err = writeBytes(node.Key(), w)
if err != nil {
return false
}
item := node.Value().(Item)
_, err := WriteItem(item, w)
_, err := writeItem(item, w)
if err != nil {
return false
}

@ -0,0 +1,91 @@
package internal
import (
"bytes"
"encoding/base64"
"testing"
"github.com/pkg/errors"
art "github.com/plar/go-adaptive-radix-tree"
)
const (
base64SampleTree = "AAAABGFiY2QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARhYmNlAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAEYWJjZgAAAAIAAAAAAAAAAgAAAAAAAAACAAAABGFiZ2QAAAADAAAAAAAAAAMAAAAAAAAAAw=="
)
func TestWriteIndex(t *testing.T) {
at, expectedSerializedSize := getSampleTree()
var b bytes.Buffer
err := WriteIndex(at, &b)
if err != nil {
t.Fatalf("writing index failed: %v", err)
}
if b.Len() != expectedSerializedSize {
t.Fatalf("incorrect size of serialied index: expected %d, got: %d", expectedSerializedSize, b.Len())
}
sampleTreeBytes, _ := base64.StdEncoding.DecodeString(base64SampleTree)
if !bytes.Equal(b.Bytes(), sampleTreeBytes) {
t.Fatalf("unexpected serialization of the tree")
}
}
func TestReadIndex(t *testing.T) {
sampleTreeBytes, _ := base64.StdEncoding.DecodeString(base64SampleTree)
b := bytes.NewBuffer(sampleTreeBytes)
at := art.New()
err := ReadIndex(b, at)
if err != nil {
t.Fatalf("error while deserializing correct sample tree: %v", err)
}
atsample, _ := getSampleTree()
if atsample.Size() != at.Size() {
t.Fatalf("trees aren't the same size, expected %v, got %v", atsample.Size(), at.Size())
}
atsample.ForEach(func(node art.Node) bool {
_, found := at.Search(node.Key())
if !found {
t.Fatalf("expected node wasn't found: %s", node.Key())
}
return true
})
}
func TestReadCorruptedData(t *testing.T) {
sampleBytes, _ := base64.StdEncoding.DecodeString(base64SampleTree)
table := []struct {
name string
err error
data []byte
}{
{name: "truncated-key-size-first-item", err: errTruncatedKeySize, data: sampleBytes[:2]},
{name: "truncated-key-data-second-item", err: errTruncatedKeyData, data: sampleBytes[:6]},
{name: "truncated-key-size-second-item", err: errTruncatedKeySize, data: sampleBytes[:(int32Size+4+fileIDSize+offsetSize+sizeSize)+2]},
{name: "truncated-key-data-second-item", err: errTruncatedKeyData, data: sampleBytes[:(int32Size+4+fileIDSize+offsetSize+sizeSize)+6]},
{name: "truncated-data", err: errTruncatedData, data: sampleBytes[:int32Size+4+(fileIDSize+offsetSize+sizeSize-3)]},
}
for i := range table {
t.Run(table[i].name, func(t *testing.T) {
bf := bytes.NewBuffer(table[i].data)
if err := ReadIndex(bf, art.New()); errors.Cause(err) != table[i].err {
t.Fatalf("expected %v, got %v", table[i].err, err)
}
})
}
}
func getSampleTree() (art.Tree, int) {
at := art.New()
keys := [][]byte{[]byte("abcd"), []byte("abce"), []byte("abcf"), []byte("abgd")}
expectedSerializedSize := 0
for i := range keys {
at.Insert(keys[i], Item{FileID: i, Offset: int64(i), Size: int64(i)})
expectedSerializedSize += int32Size + len(keys[i]) + fileIDSize + offsetSize + sizeSize
}
return at, expectedSerializedSize
}