-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathdb.go
More file actions
283 lines (253 loc) · 8.39 KB
/
db.go
File metadata and controls
283 lines (253 loc) · 8.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
// billy: Simple datastorage
// Copyright 2021 billy authors
// SPDX-License-Identifier: BSD-3-Clause
package billy
import (
"fmt"
"io"
"slices"
"sort"
)
// Database represents a `billy` storage.
type Database interface {
io.Closer
// Put stores the data to the underlying database, and returns the key needed
// for later accessing the data.
// The data is copied by the database, and is safe to modify after the method returns
Put(data []byte) (uint64, error)
// Get retrieves the data stored at the given key.
Get(key uint64) ([]byte, error)
// Delete marks the data for deletion, which means it will (eventually) be
// overwritten by other data. After calling Delete with a given key, the results
// from doing Get(key) is undefined -- it may return the same data, or some other
// data, or fail with an error.
Delete(key uint64) error
// Size returns the storage size of the value belonging to the given key.
Size(key uint64) uint32
// Limits returns the smallest and largest slot size.
Limits() (uint32, uint32)
// Infos retrieves various internal statistics about the database.
Infos() *Infos
// Iterate iterates through all the data in the database, and invokes the
// given onData method for every element
Iterate(onData OnDataFn) error
}
// OnDataFn is used to iterate the entire dataset in the database.
// After the method returns, the content of 'data' will be modified by
// the iterator, so it needs to be copied if it is to be used later.
type OnDataFn func(key uint64, size uint32, data []byte)
// SlotSizeFn is a method that acts as a "generator": a closure which, at each
// invocation, should spit out the next slot-size. In order to create a database with three
// shelves invocation of the method should return e.g.
//
// 10, false
// 20, false
// 30, true
//
// OBS! The slot size must take item header size (4 bytes) into account. So if you
// plan to store 120 bytes, then the slot needs to be at least 124 bytes large.
type SlotSizeFn func() (size uint32, done bool)
// SlotSizePowerOfTwo is a SlotSizeFn which arranges the slots in shelves which
// double in size for each level.
func SlotSizePowerOfTwo(min, max uint32) SlotSizeFn {
v := min
return func() (uint32, bool) {
ret := v
v += v
return ret, ret >= max
}
}
// SlotSizeLinear is a SlotSizeFn which arranges the slots in shelves which
// increase linearly.
func SlotSizeLinear(size, count int) SlotSizeFn {
i := 0
return func() (uint32, bool) {
i++
ret := size * i
return uint32(ret), i >= count
}
}
type database struct {
shelves []*shelf
}
type Options struct {
Path string
Readonly bool
Repair bool
Snappy bool // unused for now
}
// Open opens a (new or existing) database, with configurable limits. The given
// slotSizeFn will be used to determine both the shelf sizes and the number of
// shelves. The function must yield values in increasing order.
//
// If shelf already exists, they are opened and read, in order to populate the
// internal gap-list. While doing so, it's a good opportunity for the caller to
// read the data out, (which is probably desirable), which can be done using the
// optional onData callback.
func Open(opts Options, slotSizeFn SlotSizeFn, onData OnDataFn) (Database, error) {
slotSizes, err := collectSlotSizes(slotSizeFn)
if err != nil {
return nil, err
}
var db = &database{}
for _, slotSize := range slotSizes {
shelf, err := openShelf(opts.Path, slotSize, wrapShelfDataFn(len(db.shelves), slotSize, onData), opts.Readonly, opts.Repair)
if err != nil {
_ = db.Close() // Close shelves
return nil, err
}
db.shelves = append(db.shelves, shelf)
}
return db, nil
}
func collectSlotSizes(slotSizeFn SlotSizeFn) ([]uint32, error) {
var (
prevSlotSize uint32
prevId int
slotSize uint32
slotSizes []uint32
done bool
)
for !done {
slotSize, done = slotSizeFn()
if slotSize <= prevSlotSize {
return nil, fmt.Errorf("slot sizes must be in increasing order")
}
prevSlotSize = slotSize
slotSizes = append(slotSizes, slotSize)
if id := len(slotSizes) & 0xfff; id < prevId {
return nil, fmt.Errorf("too many shelves (%d)", len(slotSizes))
} else {
prevId = id
}
}
return slotSizes, nil
}
// Migrate migrates data from shelves generated by one slot size function
// over to shelves generated by another.
// Matching shelves are ignored, migrated shelves are removed.
func Migrate(opts Options, old, new SlotSizeFn) error {
oldSlotSizes, err := collectSlotSizes(old)
if err != nil {
return err
}
newSlotSizes, err := collectSlotSizes(new)
if err != nil {
return err
}
var newDB = &database{}
for _, slotSize := range newSlotSizes {
shelf, err := openShelf(opts.Path, slotSize, wrapShelfDataFn(len(newDB.shelves), slotSize, nil), opts.Readonly, opts.Repair)
if err != nil {
_ = newDB.Close() // Close shelves
return err
}
newDB.shelves = append(newDB.shelves, shelf)
}
var indexingErr error
migrateData := func(key uint64, size uint32, data []byte) {
_, indexingErr = newDB.Put(data)
}
for idx, slotSize := range oldSlotSizes {
if slices.Contains(newSlotSizes, slotSize) {
continue // do not migrate shelves that have not changed
}
// migrate shelves
shelf, err := openShelf(opts.Path, slotSize, wrapShelfDataFn(idx, slotSize, migrateData), opts.Readonly, opts.Repair)
if err != nil {
return err
}
if indexingErr != nil {
return err
}
if err := shelf.Close(); err != nil {
return err
}
// remove emptied shelves
if err := removeShelf(opts.Path, slotSize); err != nil {
return err
}
}
return newDB.Close()
}
// Put stores the data to the underlying database, and returns the key needed
// for later accessing the data.
// The data is copied by the database, and is safe to modify after the method returns
func (db *database) Put(data []byte) (uint64, error) {
// Search uses binary search to find and return the smallest index i
// in [0, n) at which f(i) is true,
index := sort.Search(len(db.shelves), func(i int) bool {
return len(data)+itemHeaderSize <= int(db.shelves[i].slotSize)
})
if index == len(db.shelves) {
return 0, fmt.Errorf("no shelf found for size %d", len(data))
}
if slot, err := db.shelves[index].Put(data); err != nil {
return 0, err
} else {
return slot | uint64(index)<<28, nil
}
}
// Get retrieves the data stored at the given key.
//
// The key is assumed to be one returned by Put or Iterate (potentially on Open).
// Attempting to access a different key is undefined behavior and may panic.
func (db *database) Get(key uint64) ([]byte, error) {
id := int(key>>28) & 0xfff
return db.shelves[id].Get(key & 0x0FFFFFFF)
}
// Delete marks the data for deletion, which means it will (eventually) be
// overwritten by other data. After calling Delete with a given key, the results
// from doing Get(key) is undefined -- it may return the same data, or some other
// data, or fail with an error.
//
// The key is assumed to be one returned by Put or Iterate (potentially on Open).
// Attempting to access a different key is undefined behavior and may panic.
func (db *database) Delete(key uint64) error {
id := int(key>>28) & 0xfff
return db.shelves[id].Delete(key & 0x0FFFFFFF)
}
// Size returns the storage size (padding included) of a database entry belonging
// to a key.
//
// The key is assumed to be one returned by Put or Iterate (potentially on Open).
// Attempting to access a different key is undefined behavior and may panic.
func (db *database) Size(key uint64) uint32 {
id := int(key>>28) & 0xfff
return db.shelves[id].slotSize
}
func wrapShelfDataFn(shelfId int, shelfSlotSize uint32, onData OnDataFn) onShelfDataFn {
if onData == nil {
return nil
}
return func(slot uint64, data []byte) {
key := slot | uint64(shelfId)<<28
onData(key, shelfSlotSize, data)
}
}
// Iterate iterates through all the data in the database, and invokes the
// given onData method for every element
func (db *database) Iterate(onData OnDataFn) error {
var err error
for i, shelf := range db.shelves {
if e := shelf.Iterate(wrapShelfDataFn(i, shelf.slotSize, onData)); e != nil {
err = fmt.Errorf("shelf %d: %w", i, e)
}
}
return err
}
func (db *database) Limits() (uint32, uint32) {
smallest := db.shelves[0].slotSize
largest := db.shelves[len(db.shelves)-1].slotSize
return smallest, largest
}
// Close implements io.Closer
func (db *database) Close() error {
var err error
for _, shelf := range db.shelves {
if e := shelf.Close(); e != nil {
err = e
}
}
return err
}