Makefile setup & key/value coherent datatypes & refactoring (#98)

* internal/data: comment exported functions

* internal/data: make smaller codec exported api surface

* make key and value sizes serializing bubble up to everything

* Makefile setup & go mod tidy
This commit is contained in:
Ignacio Hagopian 2019-09-12 10:44:26 -03:00 committed by GitHub
parent 7e0fa151f7
commit 5be114adab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 112 additions and 69 deletions

@ -1,4 +1,4 @@
.PHONY: dev build generate install image release profile bench test clean .PHONY: dev build generate install image release profile bench test clean setup
CGO_ENABLED=0 CGO_ENABLED=0
VERSION=$(shell git describe --abbrev=0 --tags) VERSION=$(shell git describe --abbrev=0 --tags)
@ -49,5 +49,8 @@ test: build
-race \ -race \
. .
setup:
@go install github.com/vektra/mockery/.../
clean: clean:
@git clean -f -d -X @git clean -f -d -X

@ -149,10 +149,10 @@ func (b *Bitcask) Has(key []byte) bool {
// Put stores the key and value in the database. // Put stores the key and value in the database.
func (b *Bitcask) Put(key, value []byte) error { func (b *Bitcask) Put(key, value []byte) error {
if len(key) > b.config.MaxKeySize { if uint32(len(key)) > b.config.MaxKeySize {
return ErrKeyTooLarge return ErrKeyTooLarge
} }
if len(value) > b.config.MaxValueSize { if uint64(len(value)) > b.config.MaxValueSize {
return ErrValueTooLarge return ErrValueTooLarge
} }
@ -261,7 +261,7 @@ func (b *Bitcask) put(key, value []byte) (int64, int64, error) {
id := b.curr.FileID() id := b.curr.FileID()
df, err := data.NewDatafile(b.path, id, true) df, err := data.NewDatafile(b.path, id, true, b.config.MaxKeySize, b.config.MaxValueSize)
if err != nil { if err != nil {
return -1, 0, err return -1, 0, err
} }
@ -269,7 +269,7 @@ func (b *Bitcask) put(key, value []byte) (int64, int64, error) {
b.datafiles[id] = df b.datafiles[id] = df
id = b.curr.FileID() + 1 id = b.curr.FileID() + 1
curr, err := data.NewDatafile(b.path, id, false) curr, err := data.NewDatafile(b.path, id, false, b.config.MaxKeySize, b.config.MaxValueSize)
if err != nil { if err != nil {
return -1, 0, err return -1, 0, err
} }
@ -297,7 +297,7 @@ func (b *Bitcask) reopen() error {
datafiles := make(map[int]data.Datafile, len(ids)) datafiles := make(map[int]data.Datafile, len(ids))
for _, id := range ids { for _, id := range ids {
df, err := data.NewDatafile(b.path, id, true) df, err := data.NewDatafile(b.path, id, true, b.config.MaxKeySize, b.config.MaxValueSize)
if err != nil { if err != nil {
return err return err
} }
@ -338,7 +338,7 @@ func (b *Bitcask) reopen() error {
id = ids[(len(ids) - 1)] id = ids[(len(ids) - 1)]
} }
curr, err := data.NewDatafile(b.path, id, false) curr, err := data.NewDatafile(b.path, id, false, b.config.MaxKeySize, b.config.MaxValueSize)
if err != nil { if err != nil {
return err return err
} }

@ -1126,8 +1126,8 @@ func BenchmarkGet(b *testing.B) {
value := []byte(strings.Repeat(" ", tt.size)) value := []byte(strings.Repeat(" ", tt.size))
options := []Option{ options := []Option{
WithMaxKeySize(len(key)), WithMaxKeySize(uint32(len(key))),
WithMaxValueSize(tt.size), WithMaxValueSize(uint64(tt.size)),
} }
db, err := Open(testdir, options...) db, err := Open(testdir, options...)
if err != nil { if err != nil {

@ -50,11 +50,11 @@ func init() {
"with-max-datafile-size", "", bitcask.DefaultMaxDatafileSize, "with-max-datafile-size", "", bitcask.DefaultMaxDatafileSize,
"Maximum size of each datafile", "Maximum size of each datafile",
) )
exportCmd.PersistentFlags().IntP( exportCmd.PersistentFlags().Uint32P(
"with-max-key-size", "", bitcask.DefaultMaxKeySize, "with-max-key-size", "", bitcask.DefaultMaxKeySize,
"Maximum size of each key", "Maximum size of each key",
) )
exportCmd.PersistentFlags().IntP( exportCmd.PersistentFlags().Uint64P(
"with-max-value-size", "", bitcask.DefaultMaxValueSize, "with-max-value-size", "", bitcask.DefaultMaxValueSize,
"Maximum size of each value", "Maximum size of each value",
) )

@ -30,8 +30,8 @@ var initdbCmd = &cobra.Command{
path := viper.GetString("path") path := viper.GetString("path")
maxDatafileSize := viper.GetInt("with-max-datafile-size") maxDatafileSize := viper.GetInt("with-max-datafile-size")
maxKeySize := viper.GetInt("with-max-key-size") maxKeySize := viper.GetUint32("with-max-key-size")
maxValueSize := viper.GetInt("with-max-value-size") maxValueSize := viper.GetUint64("with-max-value-size")
db, err := bitcask.Open( db, err := bitcask.Open(
path, path,
@ -56,11 +56,11 @@ func init() {
"with-max-datafile-size", "", bitcask.DefaultMaxDatafileSize, "with-max-datafile-size", "", bitcask.DefaultMaxDatafileSize,
"Maximum size of each datafile", "Maximum size of each datafile",
) )
initdbCmd.PersistentFlags().IntP( initdbCmd.PersistentFlags().Uint32P(
"with-max-key-size", "", bitcask.DefaultMaxKeySize, "with-max-key-size", "", bitcask.DefaultMaxKeySize,
"Maximum size of each key", "Maximum size of each key",
) )
initdbCmd.PersistentFlags().IntP( initdbCmd.PersistentFlags().Uint64P(
"with-max-value-size", "", bitcask.DefaultMaxValueSize, "with-max-value-size", "", bitcask.DefaultMaxValueSize,
"Maximum size of each value", "Maximum size of each value",
) )

1
go.sum

@ -173,6 +173,7 @@ golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846 h1:0oJP+9s5Z3MT6dym56c4f7nVeujVpL1QyD2Vp/bTql0=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=

@ -8,10 +8,10 @@ import (
// Config contains the bitcask configuration parameters // Config contains the bitcask configuration parameters
type Config struct { type Config struct {
MaxDatafileSize int `json:"max_datafile_size"` MaxDatafileSize int `json:"max_datafile_size"`
MaxKeySize int `json:"max_key_size"` MaxKeySize uint32 `json:"max_key_size"`
MaxValueSize int `json:"max_value_size"` MaxValueSize uint64 `json:"max_value_size"`
Sync bool `json:"sync"` Sync bool `json:"sync"`
} }
// Load loads a configuration from the given path // Load loads a configuration from the given path

@ -10,11 +10,17 @@ import (
) )
const ( const (
KeySize = 4 keySize = 4
ValueSize = 8 valueSize = 8
checksumSize = 4 checksumSize = 4
) )
var (
// ErrInvalidKeyOrValueSize indicates a serialized key/value size
// which is greater than specified limit
ErrInvalidKeyOrValueSize = errors.New("key/value size is invalid")
)
// NewEncoder creates a streaming Entry encoder. // NewEncoder creates a streaming Entry encoder.
func NewEncoder(w io.Writer) *Encoder { func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w: bufio.NewWriter(w)} return &Encoder{w: bufio.NewWriter(w)}
@ -29,9 +35,9 @@ type Encoder struct {
// Encode takes any Entry and streams it to the underlying writer. // Encode takes any Entry and streams it to the underlying writer.
// Messages are framed with a key-length and value-length prefix. // Messages are framed with a key-length and value-length prefix.
func (e *Encoder) Encode(msg internal.Entry) (int64, error) { func (e *Encoder) Encode(msg internal.Entry) (int64, error) {
var bufKeyValue = make([]byte, KeySize+ValueSize) var bufKeyValue = make([]byte, keySize+valueSize)
binary.BigEndian.PutUint32(bufKeyValue[:KeySize], uint32(len(msg.Key))) binary.BigEndian.PutUint32(bufKeyValue[:keySize], uint32(len(msg.Key)))
binary.BigEndian.PutUint64(bufKeyValue[KeySize:KeySize+ValueSize], uint64(len(msg.Value))) binary.BigEndian.PutUint64(bufKeyValue[keySize:keySize+valueSize], uint64(len(msg.Value)))
if _, err := e.w.Write(bufKeyValue); err != nil { if _, err := e.w.Write(bufKeyValue); err != nil {
return 0, errors.Wrap(err, "failed writing key & value length prefix") return 0, errors.Wrap(err, "failed writing key & value length prefix")
} }
@ -53,46 +59,73 @@ func (e *Encoder) Encode(msg internal.Entry) (int64, error) {
return 0, errors.Wrap(err, "failed flushing data") return 0, errors.Wrap(err, "failed flushing data")
} }
return int64(KeySize + ValueSize + len(msg.Key) + len(msg.Value) + checksumSize), nil return int64(keySize + valueSize + len(msg.Key) + len(msg.Value) + checksumSize), nil
} }
// NewDecoder creates a streaming Entry decoder. // NewDecoder creates a streaming Entry decoder.
func NewDecoder(r io.Reader) *Decoder { func NewDecoder(r io.Reader, maxKeySize uint32, maxValueSize uint64) *Decoder {
return &Decoder{r: r} return &Decoder{
r: r,
maxKeySize: maxKeySize,
maxValueSize: maxValueSize,
}
} }
// Decoder wraps an underlying io.Reader and allows you to stream // Decoder wraps an underlying io.Reader and allows you to stream
// Entry decodings on it. // Entry decodings on it.
type Decoder struct { type Decoder struct {
r io.Reader r io.Reader
maxKeySize uint32
maxValueSize uint64
} }
// Decode decodes the next Entry from the current stream
func (d *Decoder) Decode(v *internal.Entry) (int64, error) { func (d *Decoder) Decode(v *internal.Entry) (int64, error) {
prefixBuf := make([]byte, KeySize+ValueSize) prefixBuf := make([]byte, keySize+valueSize)
_, err := io.ReadFull(d.r, prefixBuf) _, err := io.ReadFull(d.r, prefixBuf)
if err != nil { if err != nil {
return 0, err return 0, err
} }
actualKeySize, actualValueSize := GetKeyValueSizes(prefixBuf) actualKeySize, actualValueSize, err := getKeyValueSizes(prefixBuf, d.maxKeySize, d.maxValueSize)
buf := make([]byte, actualKeySize+actualValueSize+checksumSize) if err != nil {
return 0, errors.Wrap(err, "error while getting key/value serialized sizes")
}
buf := make([]byte, uint64(actualKeySize)+actualValueSize+checksumSize)
if _, err = io.ReadFull(d.r, buf); err != nil { if _, err = io.ReadFull(d.r, buf); err != nil {
return 0, errors.Wrap(translateError(err), "failed reading saved data") return 0, errors.Wrap(translateError(err), "failed reading saved data")
} }
DecodeWithoutPrefix(buf, actualKeySize, v) decodeWithoutPrefix(buf, actualKeySize, v)
return int64(KeySize + ValueSize + actualKeySize + actualValueSize + checksumSize), nil return int64(keySize + valueSize + uint64(actualKeySize) + actualValueSize + checksumSize), nil
} }
func GetKeyValueSizes(buf []byte) (uint64, uint64) { // DecodeEntry decodes a serialized entry
actualKeySize := binary.BigEndian.Uint32(buf[:KeySize]) func DecodeEntry(b []byte, e *internal.Entry, maxKeySize uint32, maxValueSize uint64) error {
actualValueSize := binary.BigEndian.Uint64(buf[KeySize:]) valueOffset, _, err := getKeyValueSizes(b, maxKeySize, maxValueSize)
if err != nil {
return errors.Wrap(err, "key/value sizes are invalid")
}
return uint64(actualKeySize), actualValueSize decodeWithoutPrefix(b[keySize+valueSize:], valueOffset, e)
return nil
} }
func DecodeWithoutPrefix(buf []byte, valueOffset uint64, v *internal.Entry) { func getKeyValueSizes(buf []byte, maxKeySize uint32, maxValueSize uint64) (uint32, uint64, error) {
actualKeySize := binary.BigEndian.Uint32(buf[:keySize])
actualValueSize := binary.BigEndian.Uint64(buf[keySize:])
if actualKeySize > maxKeySize || actualValueSize > maxValueSize {
return 0, 0, ErrInvalidKeyOrValueSize
}
return actualKeySize, actualValueSize, nil
}
func decodeWithoutPrefix(buf []byte, valueOffset uint32, v *internal.Entry) {
v.Key = buf[:valueOffset] v.Key = buf[:valueOffset]
v.Value = buf[valueOffset : len(buf)-checksumSize] v.Value = buf[valueOffset : len(buf)-checksumSize]
v.Checksum = binary.BigEndian.Uint32(buf[len(buf)-checksumSize:]) v.Checksum = binary.BigEndian.Uint32(buf[len(buf)-checksumSize:])

@ -36,16 +36,19 @@ type Datafile interface {
type datafile struct { type datafile struct {
sync.RWMutex sync.RWMutex
id int id int
r *os.File r *os.File
ra *mmap.ReaderAt ra *mmap.ReaderAt
w *os.File w *os.File
offset int64 offset int64
dec *Decoder dec *Decoder
enc *Encoder enc *Encoder
maxKeySize uint32
maxValueSize uint64
} }
func NewDatafile(path string, id int, readonly bool) (Datafile, error) { // NewDatafile opens an existing datafile
func NewDatafile(path string, id int, readonly bool, maxKeySize uint32, maxValueSize uint64) (Datafile, error) {
var ( var (
r *os.File r *os.File
ra *mmap.ReaderAt ra *mmap.ReaderAt
@ -78,17 +81,19 @@ func NewDatafile(path string, id int, readonly bool) (Datafile, error) {
offset := stat.Size() offset := stat.Size()
dec := NewDecoder(r) dec := NewDecoder(r, maxKeySize, maxValueSize)
enc := NewEncoder(w) enc := NewEncoder(w)
return &datafile{ return &datafile{
id: id, id: id,
r: r, r: r,
ra: ra, ra: ra,
w: w, w: w,
offset: offset, offset: offset,
dec: dec, dec: dec,
enc: enc, enc: enc,
maxKeySize: maxKeySize,
maxValueSize: maxValueSize,
}, nil }, nil
} }
@ -131,6 +136,7 @@ func (df *datafile) Size() int64 {
return df.offset return df.offset
} }
// Read reads the next entry from the datafile
func (df *datafile) Read() (e internal.Entry, n int64, err error) { func (df *datafile) Read() (e internal.Entry, n int64, err error) {
df.Lock() df.Lock()
defer df.Unlock() defer df.Unlock()
@ -143,6 +149,7 @@ func (df *datafile) Read() (e internal.Entry, n int64, err error) {
return return
} }
// ReadAt the entry located at index offset with expected serialized size
func (df *datafile) ReadAt(index, size int64) (e internal.Entry, err error) { func (df *datafile) ReadAt(index, size int64) (e internal.Entry, err error) {
var n int var n int
@ -161,8 +168,7 @@ func (df *datafile) ReadAt(index, size int64) (e internal.Entry, err error) {
return return
} }
valueOffset, _ := GetKeyValueSizes(b) DecodeEntry(b, &e, df.maxKeySize, df.maxValueSize)
DecodeWithoutPrefix(b[KeySize+ValueSize:], valueOffset, &e)
return return
} }

@ -24,7 +24,7 @@ const (
sizeSize = int64Size sizeSize = int64Size
) )
func readKeyBytes(r io.Reader, maxKeySize int) ([]byte, error) { func readKeyBytes(r io.Reader, maxKeySize uint32) ([]byte, error) {
s := make([]byte, int32Size) s := make([]byte, int32Size)
_, err := io.ReadFull(r, s) _, err := io.ReadFull(r, s)
if err != nil { if err != nil {
@ -87,7 +87,7 @@ func writeItem(item internal.Item, w io.Writer) error {
} }
// ReadIndex reads a persisted from a io.Reader into a Tree // ReadIndex reads a persisted from a io.Reader into a Tree
func readIndex(r io.Reader, t art.Tree, maxKeySize int) error { func readIndex(r io.Reader, t art.Tree, maxKeySize uint32) error {
for { for {
key, err := readKeyBytes(r, maxKeySize) key, err := readKeyBytes(r, maxKeySize)
if err != nil { if err != nil {

@ -94,7 +94,7 @@ func TestReadCorruptedData(t *testing.T) {
table := []struct { table := []struct {
name string name string
err error err error
maxKeySize int maxKeySize uint32
data []byte data []byte
}{ }{
{name: "key-data-overflow", err: errKeySizeTooLarge, maxKeySize: 1024, data: overflowKeySize}, {name: "key-data-overflow", err: errKeySizeTooLarge, maxKeySize: 1024, data: overflowKeySize},

@ -8,7 +8,7 @@ import (
) )
type Indexer interface { type Indexer interface {
Load(path string, maxkeySize int) (art.Tree, bool, error) Load(path string, maxkeySize uint32) (art.Tree, bool, error)
Save(t art.Tree, path string) error Save(t art.Tree, path string) error
} }
@ -18,7 +18,7 @@ func NewIndexer() Indexer {
type indexer struct{} type indexer struct{}
func (i *indexer) Load(path string, maxKeySize int) (art.Tree, bool, error) { func (i *indexer) Load(path string, maxKeySize uint32) (art.Tree, bool, error) {
t := art.New() t := art.New()
if !internal.Exists(path) { if !internal.Exists(path) {

@ -12,11 +12,11 @@ type Indexer struct {
} }
// Load provides a mock function with given fields: path, maxkeySize // Load provides a mock function with given fields: path, maxkeySize
func (_m *Indexer) Load(path string, maxkeySize int) (art.Tree, bool, error) { func (_m *Indexer) Load(path string, maxkeySize uint32) (art.Tree, bool, error) {
ret := _m.Called(path, maxkeySize) ret := _m.Called(path, maxkeySize)
var r0 art.Tree var r0 art.Tree
if rf, ok := ret.Get(0).(func(string, int) art.Tree); ok { if rf, ok := ret.Get(0).(func(string, uint32) art.Tree); ok {
r0 = rf(path, maxkeySize) r0 = rf(path, maxkeySize)
} else { } else {
if ret.Get(0) != nil { if ret.Get(0) != nil {
@ -25,14 +25,14 @@ func (_m *Indexer) Load(path string, maxkeySize int) (art.Tree, bool, error) {
} }
var r1 bool var r1 bool
if rf, ok := ret.Get(1).(func(string, int) bool); ok { if rf, ok := ret.Get(1).(func(string, uint32) bool); ok {
r1 = rf(path, maxkeySize) r1 = rf(path, maxkeySize)
} else { } else {
r1 = ret.Get(1).(bool) r1 = ret.Get(1).(bool)
} }
var r2 error var r2 error
if rf, ok := ret.Get(2).(func(string, int) error); ok { if rf, ok := ret.Get(2).(func(string, uint32) error); ok {
r2 = rf(path, maxkeySize) r2 = rf(path, maxkeySize)
} else { } else {
r2 = ret.Error(2) r2 = ret.Error(2)

@ -7,10 +7,10 @@ const (
DefaultMaxDatafileSize = 1 << 20 // 1MB DefaultMaxDatafileSize = 1 << 20 // 1MB
// DefaultMaxKeySize is the default maximum key size in bytes // DefaultMaxKeySize is the default maximum key size in bytes
DefaultMaxKeySize = 64 // 64 bytes DefaultMaxKeySize = uint32(64) // 64 bytes
// DefaultMaxValueSize is the default value size in bytes // DefaultMaxValueSize is the default value size in bytes
DefaultMaxValueSize = 1 << 16 // 65KB DefaultMaxValueSize = uint64(1 << 16) // 65KB
// DefaultSync is the default file synchronization action // DefaultSync is the default file synchronization action
DefaultSync = false DefaultSync = false
@ -28,7 +28,7 @@ func WithMaxDatafileSize(size int) Option {
} }
// WithMaxKeySize sets the maximum key size option // WithMaxKeySize sets the maximum key size option
func WithMaxKeySize(size int) Option { func WithMaxKeySize(size uint32) Option {
return func(cfg *config.Config) error { return func(cfg *config.Config) error {
cfg.MaxKeySize = size cfg.MaxKeySize = size
return nil return nil
@ -36,7 +36,7 @@ func WithMaxKeySize(size int) Option {
} }
// WithMaxValueSize sets the maximum value size option // WithMaxValueSize sets the maximum value size option
func WithMaxValueSize(size int) Option { func WithMaxValueSize(size uint64) Option {
return func(cfg *config.Config) error { return func(cfg *config.Config) error {
cfg.MaxValueSize = size cfg.MaxValueSize = size
return nil return nil