|
- package bbolt
-
- import (
- "errors"
- "fmt"
- "hash/fnv"
- "log"
- "os"
- "runtime"
- "sort"
- "sync"
- "time"
- "unsafe"
- )
-
- // The largest step that can be taken when remapping the mmap.
- const maxMmapStep = 1 << 30 // 1GB
-
- // The data file format version.
- const version = 2
-
- // Represents a marker value to indicate that a file is a Bolt DB.
- const magic uint32 = 0xED0CDAED
-
- const pgidNoFreelist pgid = 0xffffffffffffffff
-
- // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
- // syncing changes to a file. This is required as some operating systems,
- // such as OpenBSD, do not have a unified buffer cache (UBC) and writes
- // must be synchronized using the msync(2) syscall.
- const IgnoreNoSync = runtime.GOOS == "openbsd"
-
- // Default values if not set in a DB instance.
- const (
- DefaultMaxBatchSize int = 1000
- DefaultMaxBatchDelay = 10 * time.Millisecond
- DefaultAllocSize = 16 * 1024 * 1024
- )
-
- // default page size for db is set to the OS page size.
- var defaultPageSize = os.Getpagesize()
-
- // The time elapsed between consecutive file locking attempts.
- const flockRetryTimeout = 50 * time.Millisecond
-
- // FreelistType is the type of the freelist backend
- type FreelistType string
-
- const (
- // FreelistArrayType indicates backend freelist type is array
- FreelistArrayType = FreelistType("array")
- // FreelistMapType indicates backend freelist type is hashmap
- FreelistMapType = FreelistType("hashmap")
- )
-
- // DB represents a collection of buckets persisted to a file on disk.
- // All data access is performed through transactions which can be obtained through the DB.
- // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
- type DB struct {
- // When enabled, the database will perform a Check() after every commit.
- // A panic is issued if the database is in an inconsistent state. This
- // flag has a large performance impact so it should only be used for
- // debugging purposes.
- StrictMode bool
-
- // Setting the NoSync flag will cause the database to skip fsync()
- // calls after each commit. This can be useful when bulk loading data
- // into a database and you can restart the bulk load in the event of
- // a system failure or database corruption. Do not set this flag for
- // normal use.
- //
- // If the package global IgnoreNoSync constant is true, this value is
- // ignored. See the comment on that constant for more details.
- //
- // THIS IS UNSAFE. PLEASE USE WITH CAUTION.
- NoSync bool
-
- // When true, skips syncing freelist to disk. This improves the database
- // write performance under normal operation, but requires a full database
- // re-sync during recovery.
- NoFreelistSync bool
-
- // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
- // dramatic performance degradation if database is large and framentation in freelist is common.
- // The alternative one is using hashmap, it is faster in almost all circumstances
- // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
- // The default type is array
- FreelistType FreelistType
-
- // When true, skips the truncate call when growing the database.
- // Setting this to true is only safe on non-ext3/ext4 systems.
- // Skipping truncation avoids preallocation of hard drive space and
- // bypasses a truncate() and fsync() syscall on remapping.
- //
- // https://github.com/boltdb/bolt/issues/284
- NoGrowSync bool
-
- // If you want to read the entire database fast, you can set MmapFlag to
- // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
- MmapFlags int
-
- // MaxBatchSize is the maximum size of a batch. Default value is
- // copied from DefaultMaxBatchSize in Open.
- //
- // If <=0, disables batching.
- //
- // Do not change concurrently with calls to Batch.
- MaxBatchSize int
-
- // MaxBatchDelay is the maximum delay before a batch starts.
- // Default value is copied from DefaultMaxBatchDelay in Open.
- //
- // If <=0, effectively disables batching.
- //
- // Do not change concurrently with calls to Batch.
- MaxBatchDelay time.Duration
-
- // AllocSize is the amount of space allocated when the database
- // needs to create new pages. This is done to amortize the cost
- // of truncate() and fsync() when growing the data file.
- AllocSize int
-
- path string
- file *os.File
- dataref []byte // mmap'ed readonly, write throws SEGV
- data *[maxMapSize]byte
- datasz int
- filesz int // current on disk file size
- meta0 *meta
- meta1 *meta
- pageSize int
- opened bool
- rwtx *Tx
- txs []*Tx
- stats Stats
-
- freelist *freelist
- freelistLoad sync.Once
-
- pagePool sync.Pool
-
- batchMu sync.Mutex
- batch *batch
-
- rwlock sync.Mutex // Allows only one writer at a time.
- metalock sync.Mutex // Protects meta page access.
- mmaplock sync.RWMutex // Protects mmap access during remapping.
- statlock sync.RWMutex // Protects stats access.
-
- ops struct {
- writeAt func(b []byte, off int64) (n int, err error)
- }
-
- // Read only mode.
- // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
- readOnly bool
- }
-
- // Path returns the path to currently open database file.
- func (db *DB) Path() string {
- return db.path
- }
-
- // GoString returns the Go string representation of the database.
- func (db *DB) GoString() string {
- return fmt.Sprintf("bolt.DB{path:%q}", db.path)
- }
-
- // String returns the string representation of the database.
- func (db *DB) String() string {
- return fmt.Sprintf("DB<%q>", db.path)
- }
-
- // Open creates and opens a database at the given path.
- // If the file does not exist then it will be created automatically.
- // Passing in nil options will cause Bolt to open the database with the default options.
- func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
- db := &DB{
- opened: true,
- }
- // Set default options if no options are provided.
- if options == nil {
- options = DefaultOptions
- }
- db.NoSync = options.NoSync
- db.NoGrowSync = options.NoGrowSync
- db.MmapFlags = options.MmapFlags
- db.NoFreelistSync = options.NoFreelistSync
- db.FreelistType = options.FreelistType
-
- // Set default values for later DB operations.
- db.MaxBatchSize = DefaultMaxBatchSize
- db.MaxBatchDelay = DefaultMaxBatchDelay
- db.AllocSize = DefaultAllocSize
-
- flag := os.O_RDWR
- if options.ReadOnly {
- flag = os.O_RDONLY
- db.readOnly = true
- }
-
- // Open data file and separate sync handler for metadata writes.
- db.path = path
- var err error
- if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
- _ = db.close()
- return nil, err
- }
-
- // Lock file so that other processes using Bolt in read-write mode cannot
- // use the database at the same time. This would cause corruption since
- // the two processes would write meta pages and free pages separately.
- // The database file is locked exclusively (only one process can grab the lock)
- // if !options.ReadOnly.
- // The database file is locked using the shared lock (more than one process may
- // hold a lock at the same time) otherwise (options.ReadOnly is set).
- if err := flock(db, !db.readOnly, options.Timeout); err != nil {
- _ = db.close()
- return nil, err
- }
-
- // Default values for test hooks
- db.ops.writeAt = db.file.WriteAt
-
- if db.pageSize = options.PageSize; db.pageSize == 0 {
- // Set the default page size to the OS page size.
- db.pageSize = defaultPageSize
- }
-
- // Initialize the database if it doesn't exist.
- if info, err := db.file.Stat(); err != nil {
- _ = db.close()
- return nil, err
- } else if info.Size() == 0 {
- // Initialize new files with meta pages.
- if err := db.init(); err != nil {
- // clean up file descriptor on initialization fail
- _ = db.close()
- return nil, err
- }
- } else {
- // Read the first meta page to determine the page size.
- var buf [0x1000]byte
- // If we can't read the page size, but can read a page, assume
- // it's the same as the OS or one given -- since that's how the
- // page size was chosen in the first place.
- //
- // If the first page is invalid and this OS uses a different
- // page size than what the database was created with then we
- // are out of luck and cannot access the database.
- //
- // TODO: scan for next page
- if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
- if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
- db.pageSize = int(m.pageSize)
- }
- } else {
- _ = db.close()
- return nil, ErrInvalid
- }
- }
-
- // Initialize page pool.
- db.pagePool = sync.Pool{
- New: func() interface{} {
- return make([]byte, db.pageSize)
- },
- }
-
- // Memory map the data file.
- if err := db.mmap(options.InitialMmapSize); err != nil {
- _ = db.close()
- return nil, err
- }
-
- if db.readOnly {
- return db, nil
- }
-
- db.loadFreelist()
-
- // Flush freelist when transitioning from no sync to sync so
- // NoFreelistSync unaware boltdb can open the db later.
- if !db.NoFreelistSync && !db.hasSyncedFreelist() {
- tx, err := db.Begin(true)
- if tx != nil {
- err = tx.Commit()
- }
- if err != nil {
- _ = db.close()
- return nil, err
- }
- }
-
- // Mark the database as opened and return.
- return db, nil
- }
-
- // loadFreelist reads the freelist if it is synced, or reconstructs it
- // by scanning the DB if it is not synced. It assumes there are no
- // concurrent accesses being made to the freelist.
- func (db *DB) loadFreelist() {
- db.freelistLoad.Do(func() {
- db.freelist = newFreelist(db.FreelistType)
- if !db.hasSyncedFreelist() {
- // Reconstruct free list by scanning the DB.
- db.freelist.readIDs(db.freepages())
- } else {
- // Read free list from freelist page.
- db.freelist.read(db.page(db.meta().freelist))
- }
- db.stats.FreePageN = db.freelist.free_count()
- })
- }
-
- func (db *DB) hasSyncedFreelist() bool {
- return db.meta().freelist != pgidNoFreelist
- }
-
- // mmap opens the underlying memory-mapped file and initializes the meta references.
- // minsz is the minimum size that the new mmap can be.
- func (db *DB) mmap(minsz int) error {
- db.mmaplock.Lock()
- defer db.mmaplock.Unlock()
-
- info, err := db.file.Stat()
- if err != nil {
- return fmt.Errorf("mmap stat error: %s", err)
- } else if int(info.Size()) < db.pageSize*2 {
- return fmt.Errorf("file size too small")
- }
-
- // Ensure the size is at least the minimum size.
- var size = int(info.Size())
- if size < minsz {
- size = minsz
- }
- size, err = db.mmapSize(size)
- if err != nil {
- return err
- }
-
- // Dereference all mmap references before unmapping.
- if db.rwtx != nil {
- db.rwtx.root.dereference()
- }
-
- // Unmap existing data before continuing.
- if err := db.munmap(); err != nil {
- return err
- }
-
- // Memory-map the data file as a byte slice.
- if err := mmap(db, size); err != nil {
- return err
- }
-
- // Save references to the meta pages.
- db.meta0 = db.page(0).meta()
- db.meta1 = db.page(1).meta()
-
- // Validate the meta pages. We only return an error if both meta pages fail
- // validation, since meta0 failing validation means that it wasn't saved
- // properly -- but we can recover using meta1. And vice-versa.
- err0 := db.meta0.validate()
- err1 := db.meta1.validate()
- if err0 != nil && err1 != nil {
- return err0
- }
-
- return nil
- }
-
- // munmap unmaps the data file from memory.
- func (db *DB) munmap() error {
- if err := munmap(db); err != nil {
- return fmt.Errorf("unmap error: " + err.Error())
- }
- return nil
- }
-
- // mmapSize determines the appropriate size for the mmap given the current size
- // of the database. The minimum size is 32KB and doubles until it reaches 1GB.
- // Returns an error if the new mmap size is greater than the max allowed.
- func (db *DB) mmapSize(size int) (int, error) {
- // Double the size from 32KB until 1GB.
- for i := uint(15); i <= 30; i++ {
- if size <= 1<<i {
- return 1 << i, nil
- }
- }
-
- // Verify the requested size is not above the maximum allowed.
- if size > maxMapSize {
- return 0, fmt.Errorf("mmap too large")
- }
-
- // If larger than 1GB then grow by 1GB at a time.
- sz := int64(size)
- if remainder := sz % int64(maxMmapStep); remainder > 0 {
- sz += int64(maxMmapStep) - remainder
- }
-
- // Ensure that the mmap size is a multiple of the page size.
- // This should always be true since we're incrementing in MBs.
- pageSize := int64(db.pageSize)
- if (sz % pageSize) != 0 {
- sz = ((sz / pageSize) + 1) * pageSize
- }
-
- // If we've exceeded the max size then only grow up to the max size.
- if sz > maxMapSize {
- sz = maxMapSize
- }
-
- return int(sz), nil
- }
-
- // init creates a new database file and initializes its meta pages.
- func (db *DB) init() error {
- // Create two meta pages on a buffer.
- buf := make([]byte, db.pageSize*4)
- for i := 0; i < 2; i++ {
- p := db.pageInBuffer(buf[:], pgid(i))
- p.id = pgid(i)
- p.flags = metaPageFlag
-
- // Initialize the meta page.
- m := p.meta()
- m.magic = magic
- m.version = version
- m.pageSize = uint32(db.pageSize)
- m.freelist = 2
- m.root = bucket{root: 3}
- m.pgid = 4
- m.txid = txid(i)
- m.checksum = m.sum64()
- }
-
- // Write an empty freelist at page 3.
- p := db.pageInBuffer(buf[:], pgid(2))
- p.id = pgid(2)
- p.flags = freelistPageFlag
- p.count = 0
-
- // Write an empty leaf page at page 4.
- p = db.pageInBuffer(buf[:], pgid(3))
- p.id = pgid(3)
- p.flags = leafPageFlag
- p.count = 0
-
- // Write the buffer to our data file.
- if _, err := db.ops.writeAt(buf, 0); err != nil {
- return err
- }
- if err := fdatasync(db); err != nil {
- return err
- }
-
- return nil
- }
-
- // Close releases all database resources.
- // It will block waiting for any open transactions to finish
- // before closing the database and returning.
- func (db *DB) Close() error {
- db.rwlock.Lock()
- defer db.rwlock.Unlock()
-
- db.metalock.Lock()
- defer db.metalock.Unlock()
-
- db.mmaplock.Lock()
- defer db.mmaplock.Unlock()
-
- return db.close()
- }
-
- func (db *DB) close() error {
- if !db.opened {
- return nil
- }
-
- db.opened = false
-
- db.freelist = nil
-
- // Clear ops.
- db.ops.writeAt = nil
-
- // Close the mmap.
- if err := db.munmap(); err != nil {
- return err
- }
-
- // Close file handles.
- if db.file != nil {
- // No need to unlock read-only file.
- if !db.readOnly {
- // Unlock the file.
- if err := funlock(db); err != nil {
- log.Printf("bolt.Close(): funlock error: %s", err)
- }
- }
-
- // Close the file descriptor.
- if err := db.file.Close(); err != nil {
- return fmt.Errorf("db file close: %s", err)
- }
- db.file = nil
- }
-
- db.path = ""
- return nil
- }
-
- // Begin starts a new transaction.
- // Multiple read-only transactions can be used concurrently but only one
- // write transaction can be used at a time. Starting multiple write transactions
- // will cause the calls to block and be serialized until the current write
- // transaction finishes.
- //
- // Transactions should not be dependent on one another. Opening a read
- // transaction and a write transaction in the same goroutine can cause the
- // writer to deadlock because the database periodically needs to re-mmap itself
- // as it grows and it cannot do that while a read transaction is open.
- //
- // If a long running read transaction (for example, a snapshot transaction) is
- // needed, you might want to set DB.InitialMmapSize to a large enough value
- // to avoid potential blocking of write transaction.
- //
- // IMPORTANT: You must close read-only transactions after you are finished or
- // else the database will not reclaim old pages.
- func (db *DB) Begin(writable bool) (*Tx, error) {
- if writable {
- return db.beginRWTx()
- }
- return db.beginTx()
- }
-
- func (db *DB) beginTx() (*Tx, error) {
- // Lock the meta pages while we initialize the transaction. We obtain
- // the meta lock before the mmap lock because that's the order that the
- // write transaction will obtain them.
- db.metalock.Lock()
-
- // Obtain a read-only lock on the mmap. When the mmap is remapped it will
- // obtain a write lock so all transactions must finish before it can be
- // remapped.
- db.mmaplock.RLock()
-
- // Exit if the database is not open yet.
- if !db.opened {
- db.mmaplock.RUnlock()
- db.metalock.Unlock()
- return nil, ErrDatabaseNotOpen
- }
-
- // Create a transaction associated with the database.
- t := &Tx{}
- t.init(db)
-
- // Keep track of transaction until it closes.
- db.txs = append(db.txs, t)
- n := len(db.txs)
-
- // Unlock the meta pages.
- db.metalock.Unlock()
-
- // Update the transaction stats.
- db.statlock.Lock()
- db.stats.TxN++
- db.stats.OpenTxN = n
- db.statlock.Unlock()
-
- return t, nil
- }
-
- func (db *DB) beginRWTx() (*Tx, error) {
- // If the database was opened with Options.ReadOnly, return an error.
- if db.readOnly {
- return nil, ErrDatabaseReadOnly
- }
-
- // Obtain writer lock. This is released by the transaction when it closes.
- // This enforces only one writer transaction at a time.
- db.rwlock.Lock()
-
- // Once we have the writer lock then we can lock the meta pages so that
- // we can set up the transaction.
- db.metalock.Lock()
- defer db.metalock.Unlock()
-
- // Exit if the database is not open yet.
- if !db.opened {
- db.rwlock.Unlock()
- return nil, ErrDatabaseNotOpen
- }
-
- // Create a transaction associated with the database.
- t := &Tx{writable: true}
- t.init(db)
- db.rwtx = t
- db.freePages()
- return t, nil
- }
-
- // freePages releases any pages associated with closed read-only transactions.
- func (db *DB) freePages() {
- // Free all pending pages prior to earliest open transaction.
- sort.Sort(txsById(db.txs))
- minid := txid(0xFFFFFFFFFFFFFFFF)
- if len(db.txs) > 0 {
- minid = db.txs[0].meta.txid
- }
- if minid > 0 {
- db.freelist.release(minid - 1)
- }
- // Release unused txid extents.
- for _, t := range db.txs {
- db.freelist.releaseRange(minid, t.meta.txid-1)
- minid = t.meta.txid + 1
- }
- db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
- // Any page both allocated and freed in an extent is safe to release.
- }
-
- type txsById []*Tx
-
- func (t txsById) Len() int { return len(t) }
- func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
- func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
-
- // removeTx removes a transaction from the database.
- func (db *DB) removeTx(tx *Tx) {
- // Release the read lock on the mmap.
- db.mmaplock.RUnlock()
-
- // Use the meta lock to restrict access to the DB object.
- db.metalock.Lock()
-
- // Remove the transaction.
- for i, t := range db.txs {
- if t == tx {
- last := len(db.txs) - 1
- db.txs[i] = db.txs[last]
- db.txs[last] = nil
- db.txs = db.txs[:last]
- break
- }
- }
- n := len(db.txs)
-
- // Unlock the meta pages.
- db.metalock.Unlock()
-
- // Merge statistics.
- db.statlock.Lock()
- db.stats.OpenTxN = n
- db.stats.TxStats.add(&tx.stats)
- db.statlock.Unlock()
- }
-
- // Update executes a function within the context of a read-write managed transaction.
- // If no error is returned from the function then the transaction is committed.
- // If an error is returned then the entire transaction is rolled back.
- // Any error that is returned from the function or returned from the commit is
- // returned from the Update() method.
- //
- // Attempting to manually commit or rollback within the function will cause a panic.
- func (db *DB) Update(fn func(*Tx) error) error {
- t, err := db.Begin(true)
- if err != nil {
- return err
- }
-
- // Make sure the transaction rolls back in the event of a panic.
- defer func() {
- if t.db != nil {
- t.rollback()
- }
- }()
-
- // Mark as a managed tx so that the inner function cannot manually commit.
- t.managed = true
-
- // If an error is returned from the function then rollback and return error.
- err = fn(t)
- t.managed = false
- if err != nil {
- _ = t.Rollback()
- return err
- }
-
- return t.Commit()
- }
-
- // View executes a function within the context of a managed read-only transaction.
- // Any error that is returned from the function is returned from the View() method.
- //
- // Attempting to manually rollback within the function will cause a panic.
- func (db *DB) View(fn func(*Tx) error) error {
- t, err := db.Begin(false)
- if err != nil {
- return err
- }
-
- // Make sure the transaction rolls back in the event of a panic.
- defer func() {
- if t.db != nil {
- t.rollback()
- }
- }()
-
- // Mark as a managed tx so that the inner function cannot manually rollback.
- t.managed = true
-
- // If an error is returned from the function then pass it through.
- err = fn(t)
- t.managed = false
- if err != nil {
- _ = t.Rollback()
- return err
- }
-
- return t.Rollback()
- }
-
- // Batch calls fn as part of a batch. It behaves similar to Update,
- // except:
- //
- // 1. concurrent Batch calls can be combined into a single Bolt
- // transaction.
- //
- // 2. the function passed to Batch may be called multiple times,
- // regardless of whether it returns error or not.
- //
- // This means that Batch function side effects must be idempotent and
- // take permanent effect only after a successful return is seen in
- // caller.
- //
- // The maximum batch size and delay can be adjusted with DB.MaxBatchSize
- // and DB.MaxBatchDelay, respectively.
- //
- // Batch is only useful when there are multiple goroutines calling it.
- func (db *DB) Batch(fn func(*Tx) error) error {
- errCh := make(chan error, 1)
-
- db.batchMu.Lock()
- if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
- // There is no existing batch, or the existing batch is full; start a new one.
- db.batch = &batch{
- db: db,
- }
- db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
- }
- db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
- if len(db.batch.calls) >= db.MaxBatchSize {
- // wake up batch, it's ready to run
- go db.batch.trigger()
- }
- db.batchMu.Unlock()
-
- err := <-errCh
- if err == trySolo {
- err = db.Update(fn)
- }
- return err
- }
-
- type call struct {
- fn func(*Tx) error
- err chan<- error
- }
-
- type batch struct {
- db *DB
- timer *time.Timer
- start sync.Once
- calls []call
- }
-
- // trigger runs the batch if it hasn't already been run.
- func (b *batch) trigger() {
- b.start.Do(b.run)
- }
-
- // run performs the transactions in the batch and communicates results
- // back to DB.Batch.
- func (b *batch) run() {
- b.db.batchMu.Lock()
- b.timer.Stop()
- // Make sure no new work is added to this batch, but don't break
- // other batches.
- if b.db.batch == b {
- b.db.batch = nil
- }
- b.db.batchMu.Unlock()
-
- retry:
- for len(b.calls) > 0 {
- var failIdx = -1
- err := b.db.Update(func(tx *Tx) error {
- for i, c := range b.calls {
- if err := safelyCall(c.fn, tx); err != nil {
- failIdx = i
- return err
- }
- }
- return nil
- })
-
- if failIdx >= 0 {
- // take the failing transaction out of the batch. it's
- // safe to shorten b.calls here because db.batch no longer
- // points to us, and we hold the mutex anyway.
- c := b.calls[failIdx]
- b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
- // tell the submitter re-run it solo, continue with the rest of the batch
- c.err <- trySolo
- continue retry
- }
-
- // pass success, or bolt internal errors, to all callers
- for _, c := range b.calls {
- c.err <- err
- }
- break retry
- }
- }
-
- // trySolo is a special sentinel error value used for signaling that a
- // transaction function should be re-run. It should never be seen by
- // callers.
- var trySolo = errors.New("batch function returned an error and should be re-run solo")
-
- type panicked struct {
- reason interface{}
- }
-
- func (p panicked) Error() string {
- if err, ok := p.reason.(error); ok {
- return err.Error()
- }
- return fmt.Sprintf("panic: %v", p.reason)
- }
-
- func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
- defer func() {
- if p := recover(); p != nil {
- err = panicked{p}
- }
- }()
- return fn(tx)
- }
-
- // Sync executes fdatasync() against the database file handle.
- //
- // This is not necessary under normal operation, however, if you use NoSync
- // then it allows you to force the database file to sync against the disk.
- func (db *DB) Sync() error { return fdatasync(db) }
-
- // Stats retrieves ongoing performance stats for the database.
- // This is only updated when a transaction closes.
- func (db *DB) Stats() Stats {
- db.statlock.RLock()
- defer db.statlock.RUnlock()
- return db.stats
- }
-
- // This is for internal access to the raw data bytes from the C cursor, use
- // carefully, or not at all.
- func (db *DB) Info() *Info {
- return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
- }
-
- // page retrieves a page reference from the mmap based on the current page size.
- func (db *DB) page(id pgid) *page {
- pos := id * pgid(db.pageSize)
- return (*page)(unsafe.Pointer(&db.data[pos]))
- }
-
- // pageInBuffer retrieves a page reference from a given byte array based on the current page size.
- func (db *DB) pageInBuffer(b []byte, id pgid) *page {
- return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
- }
-
- // meta retrieves the current meta page reference.
- func (db *DB) meta() *meta {
- // We have to return the meta with the highest txid which doesn't fail
- // validation. Otherwise, we can cause errors when in fact the database is
- // in a consistent state. metaA is the one with the higher txid.
- metaA := db.meta0
- metaB := db.meta1
- if db.meta1.txid > db.meta0.txid {
- metaA = db.meta1
- metaB = db.meta0
- }
-
- // Use higher meta page if valid. Otherwise fallback to previous, if valid.
- if err := metaA.validate(); err == nil {
- return metaA
- } else if err := metaB.validate(); err == nil {
- return metaB
- }
-
- // This should never be reached, because both meta1 and meta0 were validated
- // on mmap() and we do fsync() on every write.
- panic("bolt.DB.meta(): invalid meta pages")
- }
-
- // allocate returns a contiguous block of memory starting at a given page.
- func (db *DB) allocate(txid txid, count int) (*page, error) {
- // Allocate a temporary buffer for the page.
- var buf []byte
- if count == 1 {
- buf = db.pagePool.Get().([]byte)
- } else {
- buf = make([]byte, count*db.pageSize)
- }
- p := (*page)(unsafe.Pointer(&buf[0]))
- p.overflow = uint32(count - 1)
-
- // Use pages from the freelist if they are available.
- if p.id = db.freelist.allocate(txid, count); p.id != 0 {
- return p, nil
- }
-
- // Resize mmap() if we're at the end.
- p.id = db.rwtx.meta.pgid
- var minsz = int((p.id+pgid(count))+1) * db.pageSize
- if minsz >= db.datasz {
- if err := db.mmap(minsz); err != nil {
- return nil, fmt.Errorf("mmap allocate error: %s", err)
- }
- }
-
- // Move the page id high water mark.
- db.rwtx.meta.pgid += pgid(count)
-
- return p, nil
- }
-
- // grow grows the size of the database to the given sz.
- func (db *DB) grow(sz int) error {
- // Ignore if the new size is less than available file size.
- if sz <= db.filesz {
- return nil
- }
-
- // If the data is smaller than the alloc size then only allocate what's needed.
- // Once it goes over the allocation size then allocate in chunks.
- if db.datasz < db.AllocSize {
- sz = db.datasz
- } else {
- sz += db.AllocSize
- }
-
- // Truncate and fsync to ensure file size metadata is flushed.
- // https://github.com/boltdb/bolt/issues/284
- if !db.NoGrowSync && !db.readOnly {
- if runtime.GOOS != "windows" {
- if err := db.file.Truncate(int64(sz)); err != nil {
- return fmt.Errorf("file resize error: %s", err)
- }
- }
- if err := db.file.Sync(); err != nil {
- return fmt.Errorf("file sync error: %s", err)
- }
- }
-
- db.filesz = sz
- return nil
- }
-
- func (db *DB) IsReadOnly() bool {
- return db.readOnly
- }
-
- func (db *DB) freepages() []pgid {
- tx, err := db.beginTx()
- defer func() {
- err = tx.Rollback()
- if err != nil {
- panic("freepages: failed to rollback tx")
- }
- }()
- if err != nil {
- panic("freepages: failed to open read only tx")
- }
-
- reachable := make(map[pgid]*page)
- nofreed := make(map[pgid]bool)
- ech := make(chan error)
- go func() {
- for e := range ech {
- panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e))
- }
- }()
- tx.checkBucket(&tx.root, reachable, nofreed, ech)
- close(ech)
-
- var fids []pgid
- for i := pgid(2); i < db.meta().pgid; i++ {
- if _, ok := reachable[i]; !ok {
- fids = append(fids, i)
- }
- }
- return fids
- }
-
- // Options represents the options that can be set when opening a database.
- type Options struct {
- // Timeout is the amount of time to wait to obtain a file lock.
- // When set to zero it will wait indefinitely. This option is only
- // available on Darwin and Linux.
- Timeout time.Duration
-
- // Sets the DB.NoGrowSync flag before memory mapping the file.
- NoGrowSync bool
-
- // Do not sync freelist to disk. This improves the database write performance
- // under normal operation, but requires a full database re-sync during recovery.
- NoFreelistSync bool
-
- // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
- // dramatic performance degradation if database is large and framentation in freelist is common.
- // The alternative one is using hashmap, it is faster in almost all circumstances
- // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
- // The default type is array
- FreelistType FreelistType
-
- // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
- // grab a shared lock (UNIX).
- ReadOnly bool
-
- // Sets the DB.MmapFlags flag before memory mapping the file.
- MmapFlags int
-
- // InitialMmapSize is the initial mmap size of the database
- // in bytes. Read transactions won't block write transaction
- // if the InitialMmapSize is large enough to hold database mmap
- // size. (See DB.Begin for more information)
- //
- // If <=0, the initial map size is 0.
- // If initialMmapSize is smaller than the previous database size,
- // it takes no effect.
- InitialMmapSize int
-
- // PageSize overrides the default OS page size.
- PageSize int
-
- // NoSync sets the initial value of DB.NoSync. Normally this can just be
- // set directly on the DB itself when returned from Open(), but this option
- // is useful in APIs which expose Options but not the underlying DB.
- NoSync bool
- }
-
- // DefaultOptions represent the options used if nil options are passed into Open().
- // No timeout is used which will cause Bolt to wait indefinitely for a lock.
- var DefaultOptions = &Options{
- Timeout: 0,
- NoGrowSync: false,
- FreelistType: FreelistArrayType,
- }
-
- // Stats represents statistics about the database.
- type Stats struct {
- // Freelist stats
- FreePageN int // total number of free pages on the freelist
- PendingPageN int // total number of pending pages on the freelist
- FreeAlloc int // total bytes allocated in free pages
- FreelistInuse int // total bytes used by the freelist
-
- // Transaction stats
- TxN int // total number of started read transactions
- OpenTxN int // number of currently open read transactions
-
- TxStats TxStats // global, ongoing stats.
- }
-
- // Sub calculates and returns the difference between two sets of database stats.
- // This is useful when obtaining stats at two different points and time and
- // you need the performance counters that occurred within that time span.
- func (s *Stats) Sub(other *Stats) Stats {
- if other == nil {
- return *s
- }
- var diff Stats
- diff.FreePageN = s.FreePageN
- diff.PendingPageN = s.PendingPageN
- diff.FreeAlloc = s.FreeAlloc
- diff.FreelistInuse = s.FreelistInuse
- diff.TxN = s.TxN - other.TxN
- diff.TxStats = s.TxStats.Sub(&other.TxStats)
- return diff
- }
-
- type Info struct {
- Data uintptr
- PageSize int
- }
-
- type meta struct {
- magic uint32
- version uint32
- pageSize uint32
- flags uint32
- root bucket
- freelist pgid
- pgid pgid
- txid txid
- checksum uint64
- }
-
- // validate checks the marker bytes and version of the meta page to ensure it matches this binary.
- func (m *meta) validate() error {
- if m.magic != magic {
- return ErrInvalid
- } else if m.version != version {
- return ErrVersionMismatch
- } else if m.checksum != 0 && m.checksum != m.sum64() {
- return ErrChecksum
- }
- return nil
- }
-
- // copy copies one meta object to another.
- func (m *meta) copy(dest *meta) {
- *dest = *m
- }
-
- // write writes the meta onto a page.
- func (m *meta) write(p *page) {
- if m.root.root >= m.pgid {
- panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
- } else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
- // TODO: reject pgidNoFreeList if !NoFreelistSync
- panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
- }
-
- // Page id is either going to be 0 or 1 which we can determine by the transaction ID.
- p.id = pgid(m.txid % 2)
- p.flags |= metaPageFlag
-
- // Calculate the checksum.
- m.checksum = m.sum64()
-
- m.copy(p.meta())
- }
-
- // generates the checksum for the meta.
- func (m *meta) sum64() uint64 {
- var h = fnv.New64a()
- _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
- return h.Sum64()
- }
-
- // _assert will panic with a given formatted message if the given condition is false.
- func _assert(condition bool, msg string, v ...interface{}) {
- if !condition {
- panic(fmt.Sprintf("assertion failed: "+msg, v...))
- }
- }
|