178 lines
		
	
	
		
			6.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			178 lines
		
	
	
		
			6.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
 | 
						|
// All rights reserved.
 | 
						|
//
 | 
						|
// Use of this source code is governed by a BSD-style license that can be
 | 
						|
// found in the LICENSE file.
 | 
						|
 | 
						|
// Package table allows read and write sorted key/value.
 | 
						|
package table
 | 
						|
 | 
						|
import (
 | 
						|
	"encoding/binary"
 | 
						|
)
 | 
						|
 | 
						|
/*
 | 
						|
Table:
 | 
						|
 | 
						|
Table is consist of one or more data blocks, an optional filter block
 | 
						|
a metaindex block, an index block and a table footer. Metaindex block
 | 
						|
is a special block used to keep parameters of the table, such as filter
 | 
						|
block name and its block handle. Index block is a special block used to
 | 
						|
keep record of data blocks offset and length, index block use one as
 | 
						|
restart interval. The key used by index block are the last key of preceding
 | 
						|
block, shorter separator of adjacent blocks or shorter successor of the
 | 
						|
last key of the last block. Filter block is an optional block contains
 | 
						|
sequence of filter data generated by a filter generator.
 | 
						|
 | 
						|
Table data structure:
 | 
						|
                                                         + optional
 | 
						|
                                                        /
 | 
						|
    +--------------+--------------+--------------+------+-------+-----------------+-------------+--------+
 | 
						|
    | data block 1 |      ...     | data block n | filter block | metaindex block | index block | footer |
 | 
						|
    +--------------+--------------+--------------+--------------+-----------------+-------------+--------+
 | 
						|
 | 
						|
    Each block followed by a 5-bytes trailer contains compression type and checksum.
 | 
						|
 | 
						|
Table block trailer:
 | 
						|
 | 
						|
    +---------------------------+-------------------+
 | 
						|
    | compression type (1-byte) | checksum (4-byte) |
 | 
						|
    +---------------------------+-------------------+
 | 
						|
 | 
						|
    The checksum is a CRC-32 computed using Castagnoli's polynomial. Compression
 | 
						|
    type also included in the checksum.
 | 
						|
 | 
						|
Table footer:
 | 
						|
 | 
						|
      +------------------- 40-bytes -------------------+
 | 
						|
     /                                                  \
 | 
						|
    +------------------------+--------------------+------+-----------------+
 | 
						|
    | metaindex block handle / index block handle / ---- | magic (8-bytes) |
 | 
						|
    +------------------------+--------------------+------+-----------------+
 | 
						|
 | 
						|
    The magic are first 64-bit of SHA-1 sum of "http://code.google.com/p/leveldb/".
 | 
						|
 | 
						|
NOTE: All fixed-length integer are little-endian.
 | 
						|
*/
 | 
						|
 | 
						|
/*
 | 
						|
Block:
 | 
						|
 | 
						|
Block is consist of one or more key/value entries and a block trailer.
 | 
						|
Block entry shares key prefix with its preceding key until a restart
 | 
						|
point reached. A block should contains at least one restart point.
 | 
						|
First restart point are always zero.
 | 
						|
 | 
						|
Block data structure:
 | 
						|
 | 
						|
      + restart point                 + restart point (depends on restart interval)
 | 
						|
     /                               /
 | 
						|
    +---------------+---------------+---------------+---------------+---------+
 | 
						|
    | block entry 1 | block entry 2 |      ...      | block entry n | trailer |
 | 
						|
    +---------------+---------------+---------------+---------------+---------+
 | 
						|
 | 
						|
Key/value entry:
 | 
						|
 | 
						|
              +---- key len ----+
 | 
						|
             /                   \
 | 
						|
    +-------+---------+-----------+---------+--------------------+--------------+----------------+
 | 
						|
    | shared (varint) | not shared (varint) | value len (varint) | key (varlen) | value (varlen) |
 | 
						|
    +-----------------+---------------------+--------------------+--------------+----------------+
 | 
						|
 | 
						|
    Block entry shares key prefix with its preceding key:
 | 
						|
    Conditions:
 | 
						|
        restart_interval=2
 | 
						|
        entry one  : key=deck,value=v1
 | 
						|
        entry two  : key=dock,value=v2
 | 
						|
        entry three: key=duck,value=v3
 | 
						|
    The entries will be encoded as follow:
 | 
						|
 | 
						|
      + restart point (offset=0)                                                 + restart point (offset=16)
 | 
						|
     /                                                                          /
 | 
						|
    +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
 | 
						|
    |  0  |  4  |  2  |  "deck"  |  "v1"  |  1  |  3  |  2  |  "ock"  |  "v2"  |  0  |  4  |  2  |  "duck"  |  "v3"  |
 | 
						|
    +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
 | 
						|
     \                                   / \                                  / \                                   /
 | 
						|
      +----------- entry one -----------+   +----------- entry two ----------+   +---------- entry three ----------+
 | 
						|
 | 
						|
    The block trailer will contains two restart points:
 | 
						|
 | 
						|
    +------------+-----------+--------+
 | 
						|
    |     0      |    16     |   2    |
 | 
						|
    +------------+-----------+---+----+
 | 
						|
     \                      /     \
 | 
						|
      +-- restart points --+       + restart points length
 | 
						|
 | 
						|
Block trailer:
 | 
						|
 | 
						|
      +-- 4-bytes --+
 | 
						|
     /               \
 | 
						|
    +-----------------+-----------------+-----------------+------------------------------+
 | 
						|
    | restart point 1 |       ....      | restart point n | restart points len (4-bytes) |
 | 
						|
    +-----------------+-----------------+-----------------+------------------------------+
 | 
						|
 | 
						|
 | 
						|
NOTE: All fixed-length integer are little-endian.
 | 
						|
*/
 | 
						|
 | 
						|
/*
 | 
						|
Filter block:
 | 
						|
 | 
						|
Filter block consist of one or more filter data and a filter block trailer.
 | 
						|
The trailer contains filter data offsets, a trailer offset and a 1-byte base Lg.
 | 
						|
 | 
						|
Filter block data structure:
 | 
						|
 | 
						|
      + offset 1      + offset 2      + offset n      + trailer offset
 | 
						|
     /               /               /               /
 | 
						|
    +---------------+---------------+---------------+---------+
 | 
						|
    | filter data 1 |      ...      | filter data n | trailer |
 | 
						|
    +---------------+---------------+---------------+---------+
 | 
						|
 | 
						|
Filter block trailer:
 | 
						|
 | 
						|
      +- 4-bytes -+
 | 
						|
     /             \
 | 
						|
    +---------------+---------------+---------------+-------------------------------+------------------+
 | 
						|
    | data 1 offset |      ....     | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) |
 | 
						|
    +-------------- +---------------+---------------+-------------------------------+------------------+
 | 
						|
 | 
						|
 | 
						|
NOTE: All fixed-length integer are little-endian.
 | 
						|
*/
 | 
						|
 | 
						|
const (
 | 
						|
	blockTrailerLen = 5
 | 
						|
	footerLen       = 48
 | 
						|
 | 
						|
	magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb"
 | 
						|
 | 
						|
	// The block type gives the per-block compression format.
 | 
						|
	// These constants are part of the file format and should not be changed.
 | 
						|
	blockTypeNoCompression     = 0
 | 
						|
	blockTypeSnappyCompression = 1
 | 
						|
 | 
						|
	// Generate new filter every 2KB of data
 | 
						|
	filterBaseLg = 11
 | 
						|
	filterBase   = 1 << filterBaseLg
 | 
						|
)
 | 
						|
 | 
						|
type blockHandle struct {
 | 
						|
	offset, length uint64
 | 
						|
}
 | 
						|
 | 
						|
func decodeBlockHandle(src []byte) (blockHandle, int) {
 | 
						|
	offset, n := binary.Uvarint(src)
 | 
						|
	length, m := binary.Uvarint(src[n:])
 | 
						|
	if n == 0 || m == 0 {
 | 
						|
		return blockHandle{}, 0
 | 
						|
	}
 | 
						|
	return blockHandle{offset, length}, n + m
 | 
						|
}
 | 
						|
 | 
						|
func encodeBlockHandle(dst []byte, b blockHandle) int {
 | 
						|
	n := binary.PutUvarint(dst, b.offset)
 | 
						|
	m := binary.PutUvarint(dst[n:], b.length)
 | 
						|
	return n + m
 | 
						|
}
 |