From caa6e32751244a83f9123ded85d196ac4e23dc74 Mon Sep 17 00:00:00 2001 From: real_nowhereman Date: Thu, 4 Sep 2008 15:17:50 +0000 Subject: [PATCH] first commit of bplustree class --- fp-includes/core/core.bplustree.class.php | 2578 +++++++++++++++++++++ 1 file changed, 2578 insertions(+) create mode 100755 fp-includes/core/core.bplustree.class.php diff --git a/fp-includes/core/core.bplustree.class.php b/fp-includes/core/core.bplustree.class.php new file mode 100755 index 0000000..668c9f5 --- /dev/null +++ b/fp-includes/core/core.bplustree.class.php @@ -0,0 +1,2578 @@ + + * Based on the original work by Aaron Watters (bplustree.py) + * + * Classes: + * + * - BPlustTree + * Mapping keys, integers + * + * - caching_BPT + * Subclass of BPlusTree, caching + * key,value pairs + * read-only: create using BPlusTree, read + * using caching_BPT + * + * + * Usage + * ===== + * + * # creation + * $f = fopen('myfile', 'w+'); + * $o = new BPlusTree($f, $seek_start, $node_size, $keylen); + * $o->startup(); + * + * $o->setitem('my-key', 123); + * $o->setitem('my-key-2', 456); + * + * $o->delitem('my-key-2'); + * + * + * # read-only + * $f = open('myfile', 'r'); + * $o = caching_BPT($f); + * + * $o->open(); + * echo $o->getitem('my-key'); + * + * Methods: + * + * void setitem($key, $val) + * int getitem($key) + * void delitem($key) + * bool has_key($key) + * object walker() : returns an iterator + * + * + * Walking (iterate) + * ================= + * + * $walker = $tree->walker( + * string $key_lower, bool $include_lower, + * string $key_upper, bool $include_upper); + * while ($walker->valid) { + * echo $walker->current_key(), + * $walker->current_value(); + * $walker->next(); + * } + * $walker->first(); #resets internal pointer + * + * + * Internal FIFO + * ============= + * + * $tree->enable_fifo(); + * do_some_processing(); + * $tree->disable_fifo(); + * + * This should make the processing (slightly) faster + * when key accesses are localized. Don't use it in walking + * (no need for it) or for single write operations. + * + * enable_fifo() takes an optional parameter $length + * (defaults to 33) remember that larger fifos will consume + * more memory. + * + * + * Other options + * ============= + * + * This PHP implementation slightly differs from the Python + * one, because you can choose a constant (affecting all of the + * instanced objects) defining the order relation of the keys. + * + * Usually you would like your keys to be sorted ascending + * (SORT_ASC, the default), but sometimes you might want + * to create a btree where keys are kept in reverse order. + * + * In this case, you just have to define somewhere in your code + * + * define('BPT_SORT', SORT_DESC); + * + * and the include the library. + * + * This somehow weird approach should however make the computation + * slightly faster: the setting is evaluated only once, + * when including the library; the compare routine is then defined + * accordingly, and never checked again; otherwise the compare + * routine would have to check the setting each time it's called. + * + */ + + +function d($s) { + if (is_array($s)) { $s = '{ '.implode(", ", $s) . ' }'; } + + $x = debug_backtrace(); + $f = @$x[1]['function']; + $l = $x[0]['line']; + + echo "[{$f}:{$l}]\t", $s, "\n"; + #echo "---[{$x[2]['function']}:{$x[2]['line']}]\n"; +} + +error_reporting(E_ALL); + + +if (!defined('BPT_SORT')) { +/** + * @const int type of sorting, defaults to SORT_ASC (ascending); + * SORT_DESC (descending) is also possibile + */ +define('BPT_SORT', SORT_ASC); +} + + +/** + * @const int no room error + */ +define('NOROOMERROR', -100); + +/** + * @const int null node + */ +define('BPT_NULL', -1); +/** + * @const int null seek position + */ +define('BPT_NULLSEEK', -1); +/** + * @const string magic string for bplustree + */ +define('BPT_VERSION_MAGIC', 'BPT01'); + + +#define('BPT_INT_SIZE', 4); + + +/** + * @const int root bit flag + */ +define('BPT_FLAG_ROOT_BIT', 1); +/** + * @const int interior node flag + */ +define('BPT_FLAG_INTERIOR', 2); +/** + * @const int root flag, shorthand for BPT_FLAG_ROOT_BIT | BPT_FLAG_INTERIOR + */ +define('BPT_FLAG_ROOT', BPT_FLAG_ROOT_BIT | BPT_FLAG_INTERIOR); +/** + * @const int free node flag + */ +define('BPT_FLAG_FREE', 4); +/** + * @const int leaf flag + */ +define('BPT_FLAG_LEAF', 8); +/** + * @const int leaf+root flag, shorthand for BPT_FLAG_ROOT_BIT | BPT_FLAG_LEAF + */ +define('BPT_FLAG_LEAFANDROOT', BPT_FLAG_ROOT_BIT | BPT_FLAG_LEAF); + +/** + * Abstraction for array of pairs + * (meaning with "pair" an array containing two elements) + * works only read-only + * + */ +class pairs { + /** + * @var array of the first elements of each pair (private) + * + */ + var $a; + /** + * @var array of the second elements of each pair (private) + * + */ + var $b; + /** + * @var integer current size of the array of pairs + * + */ + var $count; + + /* + * Constructor + * @param array $a array of the first elements of each pair + * @parma array $b array of the second elements of each pair + * + */ + function pairs(array $a, array $b) { + if (($v=count($a))!=count($b)) + trigger_error("Size of params must match", E_USER_ERROR); + $this->a=$a; $this->b=$b; + $this->count = $v; + } + /* + * returns a slice of the current Couplets object as a new Couplets object + * (works like array_slice()) + * @param integer $offset offset from the start of the array (count starting from zero) + * @param integer|null $count number of elements to return starting from $offset + * + * @returns pairs object + * + */ + function &slice($offset, $count=null) { + if (is_null($count)) $count = $this->count; + + $a =& new pairs( + array_slice($this->a, $offset, $count), + array_slice($this->b, $offset, $count) + ); + + return $a; + } + /** + * inserts a pair ($a, $b) at the offset $offset eventually + * pushing other elements to the right + * @param int $offset offset at which insert + * @param mixed $a first element in the pair + * @param mixed $b second element in the pair + */ + function insert($offset, $a, $b) { + array_splice($this->a, $offset, 0, $a); + array_splice($this->b, $offset, 0, $b); + $this->count++; + } + /** + * inserts a pair ($a, $b) in the sub-array of pairs + * between $lo and $hi, assuming the array is ordered, + * comparing only the first elements of each pair + * (assumes there aren't duplicates) + * uses {@link BPT_keycmp} for comparing + * + * @param mixed $a first element of the pair + * @param mixed $b second element of the pair + * @param int $lo starting offset of the sub-array + * @param int|nul $hi ending offset of the sub-array + */ + function insort($a, $b, $lo=0, $hi=null) { + if (is_null($hi)) $hi=$this->count; + $A = $this->a; + $X = $a; + while($lo<$hi) { + $mid = (int)(($lo+$hi)/2); + if (BPT_keycmp($X,$A[$mid])<0) $hi=$mid; + else $lo=$mid+1; + } + $this->insert($lo, $a, $b); + } + /** + * removes the pair at the offset $offset + * @param int $offset offset of the pair targeted for deletion + */ + + function remove($offset) { + array_splice($this->a, $offset, 1); + array_splice($this->b, $offset, 1); + $this->count--; + } + /** + * append at the end of the current object the contents + * of another pairs object + * + * @param pairs $pairs a pair object of which the contents + * will be appended to this + */ + function append(&$pairs) { + array_splice($this->a, $this->count, 0, $pairs->a); + array_splice($this->b, $this->count, 0, $pairs->b); + $this->count+=$pairs->count; + } + /** + * make the object fields read-only + */ + function __set($x,$y) { + trigger_error("Can't edit pairs directly'", E_USER_ERROR); + } + +} + + +if (BPT_SORT == SORT_ASC) { +/** + * compares key $a and $b using a less-than or greather-than relation + * depending on {@link BPT_SORT} constants + * + * the function is very simple, returns strcmp($a,$b) or -strcmp($a,$b) + * depending on the BPT_SORT constant: to be a little bit faster, no check is done + * by the function itself; instead it is defined at load time, depending + * on the value of the BPT_SORT constant + * + */ +function BPT_keycmp($a,$b) { return strcmp($a,$b); } +} else { +function BPT_keycmp($a,$b) { return -strcmp($a,$b); } +} + +/* +function _BPT_bisect($a, $x, $lo=0, $hi=null) { + if (is_null($hi)) + $hi=count($a); + while($lo<$hi && $a[$lo++]<$x) ; + return $lo; +} + */ +/** + * locate an element $x or the nearest bigger one + * in the array $a, starting from offset $lo + * and limiting to offset $hi, assuming that $a is + * ordered by the relation BPT_keycmp + * + * @param mixed $a source array + * @param mixed $x element to find + * @param int $lo leftmost offset + * @param int|null $hi rightmost offset + * + * @returns integer + * + */ + +function BPT_bisect($a, $x, $lo=0, $hi=null) { + if (is_null($hi)) { + $hi = count($a); + } + while ($lo < $hi) { + $mid = (int)(($lo+$hi)/2); + #if ($x < $a[$mid]) + if (BPT_keycmp($x,$a[$mid])<0) + $hi = $mid; + else + $lo = $mid+1; + } + return $lo; +} + +/* +function BPT_insort(&$a, $x, $lo=0, $hi=null) { + if (is_null($hi)) + $hi = count($a); + while ($lo<$hi) { + $mid = (int) (($lo+$hi)/2); + if ($x < $a[$mid]) + $hi = $mid; + else + $lo = $mid+1; + } + array_splice($a, $lo, 0, array($x)); +} +*/ + +/* + * fifo of bplustree nodes + */ +class BPlusTree_Node_Fifo { + /** + * @var array array of elements + */ + var $fifo = array(); + /** + * @var array dictionary (associative array) of elements + */ + var $fifo_dict = array(); + /** + * var int size of the fifo + */ + var $size; + + /** + * constructor + * @param int $size specifies size (defaults to 30) + */ + function BPlusTree_Node_Fifo($size=30) { + $this->fifosize=$size; + } + + /** + * flushes all of the contents of the fifo + * to disk + */ + function flush_fifo(){ + reset($this->fifo); + while(list(,$node)=each($this->fifo)){ + if ($node->dirty) { + $node->store(1); + } + } + $this->fifo = array(); + $this->fifo_dict = array(); + } +} + +/** + * defines structure and methods of the node + * of a bplustree + */ + +class BPlusTree_Node { + + /** + * @var integer flags (defined as BPT_* constants) + * specifying the nature of the node (leaf, interior, and combos) + * + */ + var $flag; + /** + * @var integer number of child elements (or values, if a leaf) + * + */ + var $size; + /** + * @var int seek position in the file + * + */ + var $position; + /** + * @var resource stream where to output the data + * (typically a file open with fopen()) + */ + var $infile; + /** + * @var int maximum lenght of a string key + */ + var $keylen; + /** + * @var array array of strings, containing keys, of size $size + */ + var $keys; + /** + * @var array array of longs, of size $size+1 + * if leaf, elements in [0,$size] are the values of each key in $keys: + * at offset $size - ($size+1)-th element - there's the seek + * position of the next leaf (or BPT_NULLSEEK if rightmost leaf) + * + * if interior, + * + * - offset 0 points to the child node where keys are + * are all LESS than those in this node (actually, to $keys[0]), + * + * - offset 1 points to the child node where keys are GREATER or EQUAL to $keys[0] + * but LESS than $keys[1], + * + * - offset 2 points to the child node where keys are >= $keys[1] but < $keys[2], etc... + * + * + * with LESS, GREATER we always mean by the relation {@link BPT_keycmp} + * + * + */ + var $indices; + /** + * @var bool controls deferred writes (using fifo) + * + */ + var $dirty= false; + + /** + * @var BPlusTree_Node_Fifo object of type {@link BPlusTree_Node_Fifo} + */ + var $fifo = null; + /** + * @var int number of valid keys in $keys + */ + var $validkeys; + + + /** + * constructor + * @param int $flag flag of current node + * @param int $size size of node + * @param int $keylen max key length + * @param long $position seek position in file + * @param resource resource stream (opened file) + * @param BPlusTree_Node object from which cloning properties + */ + function BPlusTree_Node($flag, + $size, + $keylen, + $position, + $infile, + $cloner = null) { + + $this->flag = $flag; + + if ($size < 0) { + trigger_error('size must be positive', E_USER_ERROR); + } + + $this->size = $size; + + $this->keylen = $keylen; + $this->position = $position; + $this->infile = $infile; + // last (+1) is successor seek TODO move to its own! + $this->indices = array_fill(0, $size+1, BPT_NULL); + $this->keys = array_fill(0, $size, ''); + + if (is_null($cloner)) { + $this->storage = 2 + + /* 2 chars for flag, validkeys */ + $size*4+4 + /* n 4B-long indices + 1 4B-long next pointer*/ + $size*$keylen ; /* n keylen-bytes long keys */ + } else { + $this->storage = $cloner->storage; + $this->fifo = $cloner->fifo; + } + + if ($flag == BPT_FLAG_INTERIOR || $flag == BPT_FLAG_ROOT) { + $this->validkeys = -1; + } else { + $this->validkeys = 0; + } + + } + + /** + * + * reinitialize keys + * + */ + + function clear() { + $size = $this->size; + // re-init keys + + $this->keys = array_fill(0, $size, ''); + $this->validkeys = 0; + if (($this->flag & BPT_FLAG_INTERIOR) == BPT_FLAG_INTERIOR) { + // re-init all indices + $this->indices = array_fill(0, $size+1, BPT_NULL); + $this->validkeys = -1; + } else { + $fwd = $this->indices[$size]; // forward pointer + $this->indices = array_fill(0, $size, BPT_NULL); + $this->keys = array_fill(0, $size, ''); + $this->indices[] = $fwd; + } + } + + /** + * returns clone of the obect at position $position + * @param long $position seek position + */ + function getclone($position) { + + if ($this->fifo) { + $dict = $this->fifo->fifo_dict; + if (isset($dict[$position])) { + return $dict[$position]; + } + } + + + return new BPlusTree_Node( + $this->flag, + $this->size, + $this->keylen, + $position, + $this->infile, + $this + ); + } + + /** + * put first index (seek position for less-than child) + * + * @param int $index seek position + */ + function putfirstindex($index) { + if ($this->validkeys>=0) + trigger_error("Can't putfirstindex on full node", E_USER_ERROR); + $this->indices[0] = $index; + $this->validkeys = 0; + } + + /** + * links node $node to this node as a child, using key $key + * (this node must be interior) + * + * @param string $key key string + * @param object $node node to link + * + */ + function putnode($key, &$node) { + $position = $node->position; + return $this->putposition($key, $position); + # if ($x == NOROOMERROR) {print_r(debug_backtrace());fail();} + } + + /* + * + * links a seek position $position to the key $key + * + * @param string $key key string + * @param int $position seek position (pointer to the new child node) + * + */ + + function putposition($key, $position) { + + if (($this->flag & BPT_FLAG_INTERIOR) != BPT_FLAG_INTERIOR) { + trigger_error("Can't insert into leaf node", E_USER_ERROR); + } + + $validkeys = $this->validkeys; + $last = $this->validkeys+1; + + if ($this->validkeys>=$this->size) { + #trigger_error('No room error', E_USER_WARNING); + return NOROOMERROR; + } + + // store the key + if ($validkeys<0) { // no nodes currently + d("no keys"); + $this->validkeys = 0; + $this->indices[0] = $position; + } else { + // there are nodes + $keys =& $this->keys; + // is the key there already? + if (in_array($key, $keys)) { + if (array_search($key, $keys) < $validkeys) + trigger_error("reinsert of node for existing key ($key)", + E_USER_ERROR); + } + + $place = BPT_bisect($keys, $key, 0, $validkeys); + // insert at position $place + array_splice($keys, $place, 0, $key); + // delete last element + unset($keys[$last]); + $keys = array_values($keys); # reset array indices + #array_splice($keys, $last, 1); + + // store the index + $indices =& $this->indices; + #echo "inserting $position before ", var_dump($indices,1), "\n"; + array_splice($indices, $place+1, 0, $position); + unset($indices[$last+1]); + $indices = array_values($indices); + #array_splice($indices, $last+1, 1); + $this->validkeys = $last; + } + } + + + /** + * deletes from interior nodes + * + * @param string $key target key + */ + + function delnode($key) { + // {{{ + if (($this->flag & BPT_FLAG_INTERIOR) != BPT_FLAG_INTERIOR) { + trigger_error("Can't delete node from leaf node"); + } + if ($this->validkeys < 0) { + trigger_error("No such key (empty)"); + } + + $validkeys = $this->validkeys; + $indices =& $this->indices; + $keys =& $this->keys; + if (is_null($key)) { + $place = 0; + $indexplace = 0; + } else { + $place = array_search($key, $keys); + $indexplace = $place+1; + } + + #unset($indices[$indexplace]); + array_splice($indices, $indexplace, 1); + $indices[] = BPT_NULLSEEK; + #$indices = array_values($indices); + + #unset($keys[$place]); + array_splice($keys, $place, 1); + $keys[] = ''; + #$keys = array_values($keys); + + $this->validkeys = $validkeys - 1; + } + // }}} + + /** + * slices the $this->keys array to the number of valid keys + * in $this->validkeys + * + * @returns array array of valid keys + */ + + function get_keys() { + $validkeys = $this->validkeys; + if ($validkeys<=0) { + return array(); + } + + return array_slice($this->keys, 0, $validkeys); + } + + + /* + * mimics python's map(None, a, b) + * returns the list of (a,b) pairs + * where a is in list $a and b is in list $b + * + * + + function _oldpairs($a, $b) { + $c = array(); + reset($a); + reset($b); + while((list(,$v1) = each($a)) && + (list(,$v2) = each($b))) { + $c[] = array($v1, $v2); + } + return $c; + } + */ + + + /** + * mimic's python's map(None, a, b); + * a, b must be of the same size + * + * @param array $a first array + * @param array $b second array + * + * @returns object {@link pairs} + */ + function &_pairs($a, $b) { + $x =& new pairs($a,$b); + return $x; + + } + + + /** + * returns an object containing pairs (key, index) + * for all of the valid keys and indices + * + * @param string $leftmost leftmost key corresponding + * to first index (seek) in interior nodes; ignored in leaves + * + * @returns object pairs + * + */ + function keys_indices($leftmost) { + $keys = $this->get_keys(); + if (($this->flag & BPT_FLAG_INTERIOR) == BPT_FLAG_INTERIOR) { + // interior nodes start with + // the pointer to the "less than key[0]" subtree: + // we need pairs (key, indices) so we add the leftmost key + // on top + array_unshift($keys, $leftmost); + } + $indices = array_slice($this->indices, 0, count($keys)); + return $this->_pairs($keys, $indices); + } + + + /** + * returns child, searching for $key in an interior node + * + * @param string $key target $key + * @returns object BPlusTree_Node + * + */ + function &getnode($key) { + if (($this->flag & BPT_FLAG_INTERIOR) != BPT_FLAG_INTERIOR) { + trigger_error("cannot getnode from leaf node", E_USER_ERROR); + } + if (is_null($key)) + $index = 0; + else + $index = array_search($key, $this->keys)+1; + + $place = $this->indices[$index]; + if ($place<0) { + debug_print_backtrace(); + trigger_error("Invalid position! ($place, $key)", E_USER_ERROR); + } + + // fifo + + $fifo =& $this->fifo; + if ($fifo) { + $ff =& $fifo->fifo; + $fd =& $fifo->fifo_dict; + if (isset($fd[$place])) { + $node =& $fd[$place]; + #unset($ff[$place]); + $idx = array_search($node, $ff, true); + array_splice($ff, $idx, 1); + array_unshift($ff, $node); + return $node; + } + } + + $node =& $this->getclone($place); + $node =& $node->materialize(); + return $node; + } + + /***** leaf mode operations *****/ + + /** + * if leaf returns the next leaf on the right + * + */ + + function &next() { + if (($this->flag & BPT_LEAF_FLAG) != BPT_FLAG_LEAF) { + trigger_error("cannot get next for non-leaf", E_USER_ERROR); + } + $place = $this->indices[$this->size]; + if ($place == BPT_NULLSEEK) + return null; + else { + $node =& $this->getclone($place); + $node =& $node->materialize(); + return $node; + } + + } + + /* + function &prev() { + if (($this->flag & BPT_LEAF_FLAG) != BPT_FLAG_LEAF) { + trigger_error("cannot get next for non-leaf", E_USER_ERROR); + } + $place = $this->prev; + if ($place == BPT_NULLSEEK) + return null; + else { + $node =& $this->getclone($place); + $node =& $node->materialize(); + return $node; + } + + } + */ + + /** + * put ($key, $val) in a leaf + * + * @param string $key target string + * @param int $val value for $key + */ + function putvalue($key, $val) { + if (!is_string($key)) + trigger_error("$key must be string", E_USER_ERROR); + + if (($this->flag & BPT_FLAG_LEAF) != BPT_FLAG_LEAF) { + #print_r($this); + trigger_error("cannot get next for non-leaf ($key)", E_USER_ERROR); + } + $validkeys = $this->validkeys; + $indices =& $this->indices; + $keys =& $this->keys; + + if ($validkeys<=0) { // empty + // first entry + $indices[0] = $val; + $keys[0] = $key; + $this->validkeys = 1; + } else { + $place = null; + if (in_array($key, $keys)) { + $place = array_search($key, $keys); + if ($place >= $validkeys) { + $place = null; + } + } + if (!is_null($place)) { + $keys[$place] = $key; + $indices[$place] = $val; + } else { + + if ($validkeys >= $this->size) { + #trigger_error("no room", E_USER_WARNING); + return NOROOMERROR; + } + + $place = BPT_bisect($keys, $key, 0, $validkeys); + $last = $validkeys+1; + + # del keys[validkeys] + # del indices[validkeys] + #array_splice($keys, $validkeys, 1); + unset($keys[$validkeys]); + $keys = array_values($keys); + #array_splice($indices, $validkeys, 1); + unset($indices[$validkeys]); + $indices = array_values($indices); + + array_splice($keys, $place, 0, $key); + array_splice($indices, $place, 0, $val); + + #echo implode(', ', $keys), " ::: $place \n"; + $this->validkeys = $last; + + } + } + } + + /** + * for each $key, $index in $keys_indices + * put the correspoding values (assumes this is a leaf) + * + * @param object $keys_indices object of type {@link pairs} + */ + function put_all_values($keys_indices) { + $this->clear(); + $indices =& $this->indices; + $keys =& $this->keys; + $length = $this->validkeys = $keys_indices->count;#count($keys_indices); + if ($length > $this->size) + trigger_error("bad length $length", E_USER_ERROR); + + for ($i=0; $i<$length; $i++) { + #list($keys[$i], $indices[$i]) = $keys_indices[$i]; + $keys[$i] = $keys_indices->a[$i]; + $indices[$i] = $keys_indices->b[$i]; + } + } + + + /** + * for each $key, $index in $keys_indices + * put the correspoding seek positions (assumes this is an interior node) + * + * @param int $first_position leftmost pointer (to less-than child) + * @param object $keys_indices object of type {@link pairs} + * + */ + + function put_all_positions($first_position, $keys_positions) { + $this->clear(); + $indices =& $this->indices; + $keys =& $this->keys; + $length = $this->validkeys = $keys_positions->count;#count($keys_positions); + if ($length > $this->size) { + trigger_error("bad length $length", E_USER_ERROR); + } + $indices[0] = $first_position; + for ($i=0; $i<$length; $i++) { + #list($keys[$i], $indices[$i+1]) = $keys_positions[$i]; + $keys[$i] = $keys_positions->a[$i]; + $indices[$i+1] = $keys_positions->b[$i]; + } + } + + + /** + * assuming this is a leaf, returns value for $key + * @param $key string target key + * @returns int|false corresponding integer or false if key is missing + * + */ + function getvalue($key) { + + #d(implode(",",$this->keys)); + $place = array_search($key, $this->keys); + if ($place!==false) { + return $this->indices[$place]; + } else { + trigger_error("key '$key' not found", E_USER_WARNING); + return false; + } + + } + + /** + * if leaf, creates a neighbor for this node: a new leaf + * linked to this + * + * @param int $position seek position for the new neighborù + * @returns object BPlusTree_Node + * + */ + function &newneighbour($position) { + if (($this->flag & BPT_FLAG_LEAF) != BPT_FLAG_LEAF) + trigger_error('cannot make leaf neighbour for non-leaf'); + + // create clone + $neighbour =& $this->getclone($position); + $size = $this->size; + $indices =& $this->indices; + + // linking siblings + $neighbour->indices[$size] = $indices[$size]; + $indices[$size] = $position; + return $neighbour; + } + + /** + * if leaf, returns the leaf next to this + * @return object BPlusTree_Node + */ + function &nextneighbour() { + if (($this->flag & BPT_FLAG_LEAF) != BPT_FLAG_LEAF) + trigger_error('cannot get leaf neighbour for non-leaf'); + + $size = $this->size; + $position = $this->indices[$size]; + if ($position == BPT_NULLSEEK) { + return null; + } else { + $neighbour = $this->getclone($position); + $neighbour = $neighbour->materialize(); + return $neighbour; + } + + } + + /* + function &prevneighbour() { + if (($this->flag & BPT_FLAG_LEAF) != BPT_FLAG_LEAF) + trigger_error('cannot get leaf neighbour for non-leaf'); + + #$size = $this->size; + $position = $this->prev; # $this->indices[$size]; + if ($position == BPT_NULLSEEK) { + return null; + } else { + $neighbour = $this->getclone($position); + $neighbour = $neighbour->materialize(); + return $neighbour; + } + + }*/ + + /** + * if leaf, deletes neighbor on the right, and re-link + * with the following + * + * @param object $next target for deletion + * @param free $free seek position of last free node in free list + * + * @returns int new free position + */ + function delnext(&$next, $free) { + d("delnext called:"); + #print_r($this); + $size = $this->size; + if ($this->indices[$size]!=$next->position) { + trigger_error("invalid next pointer ". + "{$this->indices[$size]}!={$next->position})", E_USER_ERROR); + } + $this->indices[$size] = $next->indices[$size]; + return $next->free($free); + } + + /** + * if leaf, deletes corresponding value + * + * @param string $key target key + */ + function delvalue($key) { + $keys =& $this->keys; + $indices =& $this->indices; + if (!in_array($key, $keys)) { + d($keys); + trigger_error ("missing key, can't delete", E_USER_ERROR); + } + $place = array_search($key, $keys); + $validkeys = $this->validkeys; + $prev = $validkeys-1; + + # delete + array_splice($keys, $place, 1); + array_splice($indices, $place, 1); + #unset($keys[$place]); + #$keys[]=''; + #$keys = array_values($keys); + #unset($indices[$place]); + #$indices[] = BPT_NULL; + #$indices = array_values($indices); + + # insert NULLs/empties + array_splice($keys, $prev, 0, ''); + array_splice($indices, $prev, 0, BPT_NULL); + + $this->validkeys=$prev;//validkeys-1 + + } + + /* + * add self to free list, retunr position as new free position + * + * @param int $freenodeposition current last free node + * + */ + function free($freenodeposition) { + $this->flag = BPT_FLAG_FREE; + $this->indices[0] = $freenodeposition; + $this->store(); + return $this->position; + } + + /* + * assuming self is head of free list, + * pop self off freelist, return next free position; + * does not update file + * + * @param integer $flag flag for new node + * @return object new node + * + function unfree($flag) { + $next = $this->indices[0]; + $this->flag = $flag; + $this->validkeys = 0; + $this->indices[0] = BPT_NULLSEEK; + $this->clear(); + return $next; + } + */ + + /** + * get free node of same shape as self from $this->file; + * make one if none exist; + * assume $freeposition is seek position of next free node + * + * @param int $freeposition seek position of next freenode + * @param callback $freenode_callback is specified it is a func to call + * with a new free list head, if needed + * + * @returns array(&$node, $newfreeposition) + * + * + * + * + */ + function getfreenode($freeposition, $freenode_callback=null) { + d("GETTING FREE AT $freeposition"); + if ($freeposition == BPT_NULLSEEK) { + $file = $this->infile; + fseek($file, 0, SEEK_END); + $position = ftell($file); + d("ALLOCATING SPACE..."); + $thenode =& $this->getclone($position); + $thenode->store(); + return array(&$thenode, BPT_NULLSEEK); + } else { + $position = $freeposition; + $thenode = $this->getclone($position); + // get old node + $thenode = $thenode->materialize(); + // ptr to next + $next = $thenode->indices[0]; + if (!is_null($freenode_callback)) { + call_user_func($freenode_callback, $next); + } + + $thenode->BplusTree_Node( + $this->flag, + $this->size, + $this->keylen, + $position, + $this->infile + ); + + $thenode->store(); // save reinit'ed node + return array(&$thenode, $next); + } + } + + + /** + * + * write this to file + * + * @param bool $force forces write back if fifo is enabled, defaults to false + * + */ + + function store($force = false) { + // {{{ + $position = $this->position; + if (is_null($position)) + trigger_error("position cannot be null",E_USER_ERROR); + + $fifo =& $this->fifo; + if (!$force && $fifo) { + $fd =& $fifo->fifo_dict; + if (isset($fd[$this->position]) && $fd[$position] === $this) { + $this->dirty = true; + return; // defer processing + } + } + + $f = $this->infile; + fseek($f, $position); + $data = $this->linearize(); + fwrite($f, $data); + $last = ftell($f); + $this->dirty = false; + + if (!$force && $this->fifo) { + $this->add_to_fifo(); + } + + return $last; + + } + //}}} + + /** + * load node from file + * + * @returns object BPlusTree_Node + * + */ + function &materialize() { + $position = $this->position; + + if ($this->fifo) { + $fifo =& $this->fifo; + $dict =& $fifo->fifo_dict; + $ff =& $fifo->fifo; + if (isset($dict[$position])) { + $node =& $dict[$position]; + if ($node !== $ff[0]) { + $nidx = array_search($node, $ff, true); + unset($ff[$nidx]); + array_unshift($ff, $node); + } + return $node; + } + } + + $f = $this->infile; + fseek($f, $position); + $data = fread($f, $this->storage); + $this->delinearize($data); + + + if ($this->fifo) { + $this->add_to_fifo(); + } + + + + return $this; + } + + /** + * @returns string binary string encoding this node + */ + function linearize() { + $params = array( + 'C2L'.($this->size+1), + $this->flag, + $this->validkeys + ); + + foreach($this->indices as $i) + $params[] = $i; + + $s = call_user_func_array('pack', $params); + + + $x = ''; + for($i = 0; $i<$this->validkeys; $i++) { + $k = $this->keys[$i]; + if (strlen($k)>$this->keylen) + trigger_error("Invalid keylen for '$k'", E_USER_ERROR); + $x .= str_pad($k, $this->keylen, chr(0)); + } + + $x = str_pad($x, $this->size*$this->keylen, chr(0)); + + + $s .= $x; + $l = strlen($s); + if (strlen($s) != $this->storage) { + trigger_error("bad storage $l != {$this->storage}", E_USER_ERROR); + } + + return $s; + } + + /** + * get properties of this node from the string $s encoded via {@link BPlusTree_Node::linearize} + * + * @param string $s binary string + * + */ + + function delinearize($s) { + //{{{ + if (strlen($s)!=$this->storage) + trigger_error("bad storage", E_USER_ERROR); + + + $x = 'Cflag/Cvalidkeys/'; + $n = $this->size+1; + for ($i = 0; $i<$n; $i++) { + $x .= "Lindices{$i}/"; + } + $arr = unpack($x, $s); + + $this->flag = $arr['flag']; + $this->validkeys = $arr['validkeys']; + + for ($i = 0; $i<$n; $i++) { + $this->indices[$i] = $arr["indices{$i}"]; + } + + for ($i = 0, $j = ($n*4+2); $i<$this->validkeys; $i++, $j+=$this->keylen) { + + $this->keys[$i] = rtrim(substr($s, $j, $this->keylen)); + } + + } + //}}} + + // foo dump + /** + * + * prints a dump of the tree on scree + * @param string $indent custom indentation + * + */ + function dump($indent='') { + //{{{ + $flag = $this->flag; + if ($flag == BPT_FLAG_FREE) { + echo "free->", $this->position, "\n"; + $nextp = $this->indices[0]; + if ($nextp!=BPT_NULLSEEK) { + $next =& $this->getclone($nextp); + $next =& $next->materialize(); + $next->dump(); + } else { + echo "!last\n"; + } + return; + } + $nextindent = $indent . " "; + echo $indent; + switch ($flag) { + case BPT_FLAG_ROOT: echo "root"; break; + case BPT_FLAG_INTERIOR: echo "interior"; break; + case BPT_FLAG_LEAF: echo "leaf"; break; + case BPT_FLAG_LEAFANDROOT: echo "root&leaf"; break; + default : echo "invalid flag??? ", $flag; + } + + echo "($flag) "; + echo " ", $this->position, " valid=", $this->validkeys, "\n"; + echo $indent, "keys {", implode(', ', $this->keys), "}\n"; + echo $indent, "seeks {", implode(", ", $this->indices),"}\n"; + if (($flag & BPT_FLAG_INTERIOR) == BPT_FLAG_INTERIOR) { + reset($this->indices); + while(list(,$i) = each($this->indices)) { + if ($i!=BPT_NULLSEEK) { + // interior + $n =& $this->getclone($i); + $n =& $n->materialize(); + $n->dump($nextindent); + } else { + //leaf + continue; + } + } + } + echo $indent, "*****\n"; + + }//}}}*/ + + + /** + * adds this node to fifo + */ + function add_to_fifo() { + $fifo =& $this->fifo; + $ff =& $fifo->fifo; + $dict =& $fifo->fifo_dict; + + $position = $this->position; + if(isset($dict[$position])) { + $old =& $dict[$position]; + unset($dict[$position]); + # ff.remove(old) + array_splice($ff, array_search($old, $ff, true), 1); + } + $dict[$this->position] =& $this; + array_splice($ff, 0, 0, array(&$this)); + if (count($ff)>$this->fifo->fifosize) { + $lastidx = count($ff)-1; + $last = $ff[$lastidx]; + unset($ff[$lastidx]); + unset($dict[$last->position]); + if ($last->dirty) { + $last->store(true); + } + } + $is_o=true; + while((list(,$v)=each($ff)) && $is_o=is_object($v)); + if (!$is_o) {d("CAPUTT");f();} + } + + /** + * @param int $size defaults to 33 + * + */ + + function enable_fifo($size = 33) { + if ($size<5 || $size>1000000) { + trigger_error("size not valid $size"); + } + $this->fifo = new BPlusTree_Node_Fifo($size); + } + + /** + * disables fifo (first flushes to disk) + * + */ + + function disable_fifo() { + if ($this->fifo) { + $this->fifo->flush_fifo(); + $this->fifo = null; + } + } + + +} + +/** + * main class BPlusTree + * creates a B+Tree with string keys and integer values + * + * public methods are only {@link BPlusTree::getitem} + * {@link BPlusTree::setitem} {@link BPlusTree::delitem} + * {@link BPlusTree::walker} + * + * + */ + +class BPlusTree { + /** + * @var int number of values + */ + var $length = null; + /** + * @var bool used for deferred writes (if fifo is enabled + */ + var $dirty = false; + # var $headerformat = "%10d %10d %10d %10d %10d\n"; + /** + * @var int seek position of root in file + */ + var $root_seek = BPT_NULLSEEK; + /** + * @var int seek position of the start of the freelist + * + */ + var $free = BPT_NULLSEEK; + /** + * @var object BPlusTree_Node root node + */ + var $root = null; /* */ + /** + * @var int length of the file header in bytes + */ + var $headersize; + /** + * @var bool true if fifo is enabled + */ + var $fifo_enabled = false; + + /** + * constructor + * @param resource $infile resource of open file + * @param int $position offset from the beginning of the file (usually 0) + * @param int $nodesize size of the node + * @param int $keylen maximum lenght of a key in bytes (unicode extended chars evaluate to two chars) + */ + + function BplusTree($infile, $pos=null, $nodesize=null, $keylen=10) { + if (!is_null($keylen) && $keylen<=2) { + trigger_error("$keylen must be greater than 2", E_USER_ERROR); + } + $this->root_seek = BPT_NULLSEEK; + $this->free = BPT_NULLSEEK; + $this->root = null; + $this->file = $infile; + #if ($nodesize<6) trigger_error("nodesize must be >= 6", E_USER_ERROR); + $this->nodesize = $nodesize; + $this->keylen = $keylen; + if (is_null($pos)) { + $pos = 0; + } + $this->position = $pos; + $this->headersize = 4*4+6; /* 4 4-byte longs, 1 char, 5-byte magic string*/ + } + + /** + * returns an iterator for the tree + * @param string $keylower key lower limit of the iterator + * @param bool $includelower if true $keylower is included in the iterator + * @param string $keyupper key upper bound of the iterator + * @param bool $includeupper if true $keyupper is included in the iterator + */ + function walker( + $keylower =null, + $includelower =null, + $keyupper =null, + $includeupper =null + ) { + + return new BPlusWalker($this, $keylower, $includelower, $keyupper, $includeupper); + + } + + /** + * @returns array array of properties of this object + */ + function init_params() { + return array( + $this->file, + $this->position, + $this->nodesize, + $this->keylen + ); + } + + /** + * @returns object BPlusTree_Node of the root + */ + function get_root() { + return $this->root; + } + + /** + * updates the head of the freelist and writes back to file + * @param int $position seek position of the head of the freelist + */ + function update_freelist($pos) { + if ($this->free!=$pos) { + $this->free = $pos; + $this->reset_header(); + } + } + + /** + * action to perform to setup a bplustree, header is reset, length truncated + * and a new root node is created + */ + function startup() { + if (is_null($this->nodesize) || is_null($this->keylen)) { + trigger_error("cannot initialize without nodesize, keylen specified\n") ; + } + $this->length = 0; + $this->reset_header(); + $file = $this->file; + fseek($file, 0, SEEK_END); + $this->root_seek = ftell($file); + $this->root =& new BplusTree_Node( + BPT_FLAG_LEAFANDROOT, + $this->nodesize, $this->keylen, $this->root_seek, $file + ); + $this->root->store(); + } + + /** + * reload the bplustree from file and setup for use + */ + + function open() { + $file = $this->file; + if ($this->get_parameters()===false) + return false; + $this->root =& new BplusTree_Node( + BPT_FLAG_LEAFANDROOT, + $this->nodesize, + $this->keylen, + $this->root_seek, + $file + ); + $this->root =& $this->root->materialize(); + return true; + } + + /** + * enable fifo + * @param int $size defaults to 33 + */ + + function enable_fifo($size=33) { + $this->fifo_enabled = true; + $this->root->enable_fifo($size); + } + + /** + * disables fifo (writes back header to file if needed) + * + */ + function disable_fifo() { + $this->fifo_enabled = false; + if ($this->dirty) { + $this->reset_header(); + $this->dirty = false; + } + $this->root->disable_fifo(); + } + + /** + * + * @returns string header string + */ + + function _makeheader() { + return pack('a5LCL3', BPT_VERSION_MAGIC, + $this->length, $this->keylen, + $this->nodesize, $this->root_seek, $this->free); + } + + /** + * writes back header to file (if fifo is enabled write is deferred until + * fifo is again disabled + */ + function reset_header() { + + if ($this->fifo_enabled) { + $this->dirty = true; + d("[FIFO]: deferring header reset"); + return; + } + $file = $this->file; + fseek($file, $this->position); + + $s = $this->_makeheader(); + + fwrite($file, $s); + } + + /** + * reads back properties/parameters of this tree from file; + * raises an error if version magic is wrong + * + * @returns bool false on failure, true on success + */ + function get_parameters() { + $file = $this->file; + fseek($file, $this->position); + $data = fread($file, $this->headersize); + $hdr = unpack('a5magic/Llength/Ckeylen/Lnodesize/Lroot_seek/Lfree', $data); + if ($hdr['magic']!=BPT_VERSION_MAGIC) { + trigger_error("Version magic mismatch ({$hdr['magic']}!=" + .BPT_VERSION_MAGIC.')', E_USER_WARNING); + return false; + } + $this->length = $hdr['length']; + $this->keylen = $hdr['keylen']; + $this->nodesize = $hdr['nodesize']; + $this->root_seek = $hdr['root_seek']; + $this->free = $hdr['free']; + return true; + } + + /** + * @returns length of the tree (number of values) + */ + function length() { + if (is_null($this->length)) { + if (false===$this->get_parameters()) return false; + } + return $this->length; + } + + /** + * @param string $key key to find + * @returns int associated value + * + */ + function getitem($key) { + if (is_null($this->root)) + trigger_error("not open!", E_USER_ERROR); + return $this->find($key, $this->root); + } + + /** + * traverses tree starting from $node, searching for $key + * @param string $key target key + * @param object BPlusTree_Node starting node + * + * @returns int|bool value at the leaf node containing key or false if key is missing + * + */ + function find($key, &$node) { + + while (($node->flag & BPT_FLAG_INTERIOR) == BPT_FLAG_INTERIOR) { + + $thesekeys = $node->keys; + $validkeys = $node->validkeys; + + #d(array_slice($thesekeys, 0, $validkeys)); + + $place = BPT_bisect($thesekeys, $key, 0, $validkeys); + if ($place>=$validkeys || BPT_keycmp($thesekeys[$place],$key)>0) { + #$thesekeys[$place]>$key) { + if ($place == 0) + $nodekey = null; + else + $nodekey=$thesekeys[$place-1]; + } else { + $nodekey = $key; + } + + + $node =& $node->getnode($nodekey); + } + + return $node->getvalue($key); + } + + /** + * @param $key target key + * @returns bool false if key does not exists, true otherwise + */ + function has_key($key) { + if ($this->getitem($key)!==false) { + return true; + } else { + return false; + } + } + + + /** + * sets an item in the tree with key $key and value $val + * + * @param string $key + * @param integer $val (internally stored as a 4byte long: keep it in mind!) + * + * + */ + + function setitem($key, $val) { + $curr_length = $this->length; + $root =& $this->root; + if (is_null($root)) trigger_error("not open", E_USER_ERROR); + if (!is_string($key)) trigger_error("$key must be string", E_USER_ERROR); + if (strlen($key)>$this->keylen) + trigger_error("$key is too long: MAX is {$this->keylen}", E_USER_ERROR); + + + d( "STARTING FROM ROOT..." ); + + $test1 = $this->set($key, $val, $this->root); + if (!is_null($test1)) { + d("SPLITTING ROOT"); + + // getting new rightmost interior node + list($leftmost, $node) = $test1; + #print_r($test1); + d("LEFTMOST [$leftmost]"); + + // getting new non-leaf root + list($newroot, $this->free) = $root->getfreenode($this->free); + $newroot->flag = BPT_FLAG_ROOT; + + /* + if ($root->flag == BPT_FLAG_LEAFANDROOT) { + $root->flag = BPT_FLAG_LEAF; + } else { + $root->flag = BPT_FLAG_INTERIOR; + }*/ + + // zero-ing root flag (makes an interior or leaf node + // respectively from a normal root or a leaf-root) + $root->flag &= ~BPT_FLAG_ROOT_BIT; + + $newroot->clear(); + $newroot->putfirstindex($root->position); + $newroot->putnode($leftmost, $node); + $this->root =& $newroot; + $this->root_seek = $newroot->position; + $newroot->store(); + $root->store(); + $this->reset_header(); + d("root split."); + } else { + if ($this->length!=$curr_length) { + // length changed: updating header + $this->reset_header(); + } + } + } + + + /** + * traverses subtree starting at $node, searching a place for $key + * and associates $val; split nodes if needed + * + * This function is not meant to be called outside the class, it is a + * support method for {@link BPlusTree::setitem} + * + * @param string $key + * @param int $val value associated to $key + * @param object BPlusTree_Node starting node + * + * @returns array|null a pair (leftmost, newnode) where "leftmost" is + * the leftmost key in newnode, and newnode is the split node; + * returns null if no split took place + */ + + function set($key, $val, &$node) { + //{{{ + $keys =& $node->keys; + $validkeys = $node->validkeys; + if (($node->flag & BPT_FLAG_INTERIOR) == BPT_FLAG_INTERIOR) { + d("NON LEAF: FIND DESCENDANT"); + // non-leaf: find descendant to insert + d($keys); + $place = BPT_bisect($keys, $key, 0, $validkeys); + + if ($place >= $validkeys || BPT_keycmp($keys[$place],$key)>=0) { + #$keys[$place]>=$key) { + // insert at previous node + $index = $place; + } else { + $index = $place +1 ; + } + + if ($index == 0) + $nodekey = null; + else + $nodekey =$keys[$place-1]; + + $nextnode =$node->getnode($nodekey); + $test = $this->set($key, $val, $nextnode); + // split ? + if (!is_null($test)) { + list($leftmost, $insertnode) = $test; + + // TRY + $TRY = $node->putnode($leftmost, $insertnode); + if ($TRY == NOROOMERROR) { + + d( "$key::SPLIT!" ); + // EXCEPT + + $insertindex = $insertnode->position; + + list($newnode, $this->free) = + $node->getfreenode( + $this->free, + array(&$this, 'update_freelist') + ); + + $newnode->flag = BPT_FLAG_INTERIOR; + + $ki = $node->keys_indices("dummy"); + + #list($dummy, $firstindex) = $ki[0]; #each($ki); + $firstindex = $ki->b[0]; + + #$ki = array_slice($ki, 1); + $ki->remove(0); + print_r($ki); + // insert new pair + #BPT_insort($ki, array($leftmost, $insertindex)); + $ki->insort($leftmost, $insertindex); + + $newleftmost = $this->divide_entries( + $firstindex, + $node, + $newnode, + $ki + ); + + $node->store(); + $newnode->store(); + return array($newleftmost, &$newnode); + + } else { + + d( "$key::NO SPLIT" ); + d($node->keys); + $node->store(); + return null; // no split + } + } + } else { + // leaf + d("FOUND LEAF:"); + d($keys); + if (!in_array($key, $keys) + || array_search($key, $keys) >= $validkeys) { + $newlength = $this->length +1; + } else { + $newlength = $this->length; + } + + d("[LEAF] TRYING TO PUT $key=>$val"); + if ($node->putvalue($key, $val)==NOROOMERROR) { + d("GOT NOROOMERROR"); + + $ki = $node->keys_indices("dummy"); + #BPT_insort($ki, array($key, $val)); + $ki->insort($key, $val); + list($newnode, $this->free) = + $node->getfreenode( + $this->free, + array(&$this, 'update_freelist') + ); + d("CREATE NEW NEIGHBOUR"); + $newnode =& $node->newneighbour($newnode->position); + $newnode->flag = BPT_FLAG_LEAF; + $newleftmost = $this->divide_entries(0, $node, $newnode, $ki); + $node->store(); + #print_r($node); + #print_r($newnode); + $newnode->store(); + $this->length = $newlength; + return array($newleftmost, &$newnode); + } else { + d("STORING NODE [{$node->position}]") ; + d($node->keys); + + $node->store(); + $this->length = $newlength; + return null; + } + } + } + //}}} + + /** + * + * removes key from tree at node $node; + * triggers an error if $key does not exists + * + * not meant to be called outside the class, it is a support method + * for {@link BPlusTree::delitem} + * + * @param $key target key + * @param $node node from which start + * + * @returns array a pair(&$leftmost, $size): if leftmost changes it is a string with the new leftmost + * of $node otherwise returns array(null, $size)- caller will restructure node, if needed + * size is the new size of $node + * + */ + function remove($key, &$node, $NESTING=0) { + $newnodekey = null; + d("NESTING LEVEL $NESTING"); + d("($NESTING) current size = {$this->nodesize}"); + + // first of all we check if it is non-leaf + if (($node->flag & BPT_FLAG_INTERIOR) == BPT_FLAG_INTERIOR) { + // non-leaf + + $keys =& $node->keys; + $validkeys =$node->validkeys; + $place = BPT_bisect($keys, $key, 0, $validkeys); + + if ($place>=$validkeys || BPT_keycmp($keys[$place],$key)>=0) { + #$keys[$place]>=$key) { + // delete occurs before $place + // (remember that indices are [i_0,i_1,...,i_n] + // where i_0 points to the node where all keys are < K_search + // and i_1 points to the node where keys are k_1<=K_searchgetnode($nodekey); + + // RECURSION! remove from nextnode; + // returns new leftmost if changed, otherwise null, + // and new size of the child node + list($lm, $size) = $this->remove($key, $nextnode, $NESTING+1); + + // check now for size of nodesize: is it too small? + // (less than half) + $nodesize = $this->nodesize; + $half = (int)($nodesize/2); + + # if($size==0) trigger_error("SIZE==0", E_USER_WARNING); + + if ($size < $half) { + d("($NESTING) node too small ($size<$nodesize/2), redistribute children"); + + // node is too small, need to redistribute + // children + + if (is_null($nodekey) && $validkeys == 0) { + #print_r($node); + trigger_error( + "invalid node, only one child", + E_USER_ERROR + ); + } + + + if ($place >= $validkeys) { + // final node in row, get previous + $rightnode =& $nextnode; + $rightkey = $nodekey; + if ($validkeys<=1) { + $leftkey = null; + } else { + $leftkey = $keys[$place-2]; + } + $leftnode =& $node->getnode($leftkey); + } else { + // non-final, get next + $leftnode =& $nextnode; + $leftkey = $nodekey; + + if ($index == 0) { + $rightkey = $keys[0]; + } else { + $rightkey = $keys[$place]; + } + $rightnode = $node->getnode($rightkey); + } + // get all keys and indices + $rightki = $rightnode->keys_indices($rightkey); + $leftki = $leftnode->keys_indices($leftkey); + + #$ki = array_merge($leftki, $rightki); + $leftki->append($rightki); + $ki =& $leftki; + + #array_splice ($leftki, count($leftki), 0, $rightki); + + $lki = $ki->count;#count($ki); + + // merging? + if (($lki>$nodesize) || ( + ($leftnode->flag & BPT_FLAG_LEAF)!=BPT_FLAG_LEAF + && + ($lki>=$nodesize) + )) { + // redistribute + #list($newleftkey, $firstindex) = $ki[0]; + $newleftkey = $ki->a[0]; + $firstindex = $ki->b[0]; + if (is_null($leftkey)) { + $newleftkey = $lm; + } + if (($leftnode->flag&BPT_FLAG_LEAF)!=BPT_FLAG_LEAF) { + // kill first pair + #$ki = array_slice($ki, 1); + $ki->remove(0); + } + $newrightkey = $this->divide_entries( + $firstindex, + $leftnode, + $rightnode, + $ki + ); + + // delete, reinsert right + $node->delnode($rightkey); + $node->putnode($newrightkey, $rightnode); + + // same for left if first changed + if (!is_null($leftkey) && $leftkey!=$newleftkey) { + $node->delnode($leftkey); + $node->putnode($newleftkey, $leftnode); + } + $node->store(); + $leftnode->store(); + $rightnode->store(); + } else { + d("($NESTING) node too small, need merge left<-right"); + // merge into left, free right + d($leftnode->keys); + d($leftnode->indices); + d($rightnode->indices); + #list($newleftkey, $firstindex) = $ki[0]; + $newleftkey = $ki->a[0]; + $firstindex = $ki->b[0]; + + if (($leftnode->flag&BPT_FLAG_LEAF)!=BPT_FLAG_LEAF) { + $leftnode->put_all_positions( + $firstindex, + $ki->slice(1) + #array_slice($ki, 1) + ); + } else { + $leftnode->put_all_values($ki); + } + + if ($rightnode->flag==BPT_FLAG_LEAF) { + $this->free = $leftnode->delnext( + $rightnode, $this->free + ); + } else { + $this->free = $rightnode->free($this->free); + } + if (!is_null($leftkey) && $newleftkey!=$leftkey) { + d("$newleftkey!=$leftkey"); + $node->delnode($leftkey); + $node->putnode($newleftkey, $leftnode); + } + $node->delnode($rightkey); + $node->store(); + $leftnode->store(); + d('redist:'); + d($node->keys); + d($leftnode->keys); + + $this->reset_header(); + } + if (is_null($leftkey)) + $newnodekey = $lm; + } else { + // no restructuring, + // update leftmost if needed + if (is_null($nodekey)) { + // we changed leftmost child, + // we return a new leftmost key to update parent + // ($lm is null if no update is needed) + $newnodekey = $lm; + } elseif(!is_null($lm)) { + // child's leftmost has changed: + // delete old reference + $node->delnode($nodekey); + // change reference with new key + $node->putnode($lm, $nextnode); + } + } // end restructuring if + + } else { + //leaf, base case: just delete. + if ($node->validkeys<1) { + // only for empty root + trigger_error("No such key $key", E_USER_ERROR); + } + $first=$node->keys[0]; + d($node->keys); + $node->delvalue($key); + d($node->keys); + $rest = $node->keys[0]; + if ($first!=$rest) { + $newnodekey = $rest; + } + $node->store(); + $this->length--; + + d("NEWNODEKEY: $newnodekey"); + d("VALIDKEYS: {$node->validkeys}"); + } + d($node->keys); + return array($newnodekey, $node->validkeys); + } + + + /** + * + * equally divides $entries ("array" of pairs (key,index) - + * implemented with a pair object) between two nodes $node1 and $node2 + * + * @param int $firstindex if interior node, leftmost index (pointer to less-than sub-tree) for $node1 + * @param object $node1 BPlusTree_Node first destination node + * @param object $node2 BplusTree_Node second destination node + * @param object $entries {@link pairs} object + * + * @returns string leftmost key of $node1 + */ + + function divide_entries($firstindex, &$node1, &$node2, &$entries) { +//{{{ + #$middle = (int)(count($entries)/2); + $middle = (int)($entries->count/2); + d("divide entries at $middle"); + + #$left = array_slice($entries, 0, $middle); + #$right = array_slice($entries, $middle); + $left = $entries->slice(0, $middle); + $right = $entries->slice($middle); + + + if (($node1->flag & BPT_FLAG_INTERIOR) == BPT_FLAG_INTERIOR) { + d("DIVIDING INTERIOR\n"); + #list($leftmost, $midindex) = $right[0]; + $leftmost = $right->a[0]; + $midindex = $right->b[0]; + + $node1->put_all_positions($firstindex, $left); + #$node2->put_all_positions($midindex, array_slice($right, 1)); + $node2->put_all_positions($midindex, $right->slice(1)); + d($node1->keys); + d($node2->keys); + if (in_array( + array_fill(0,$node1->size,''), + array($node1->keys,$node2->keys)) + ) { + trigger_error("splitting an empty node!", E_USER_ERROR); + } + return $leftmost; + } else { + d("DIVIDING non-INTERIOR"); + $node1->put_all_values($left); + $node2->put_all_values($right); + d($node1->keys); + d($node2->keys); + // returns right leftmost + #return $right[0][0]; + return $right->a[0]; + } + } +// }}} + + + /** + * delete item $key + * @param string $key the key to delete + * + */ + + function delitem($key) { + $root = $this->root; + $currentlength = $this->length; + + $this->remove($key, $root, $NESTING=0); + + if ($root->flag == BPT_FLAG_ROOT) { + + $validkeys = $root->validkeys; + + if ($validkeys <1) { + + + if ($validkeys<0) { + trigger_error( + "invalid empty non-leaf root", + E_USER_ERROR + ); + } + + + $this->root =& $root->getnode(null); + $newroot =& $this->root; + $this->root_seek = $newroot->position; + $this->free = $root->free($this->free); + $this->reset_header(); + /* + if ($newroot->flag == BPT_FLAG_LEAF) { + $newroot->flag = BPT_FLAG_LEAFANDROOT; + } else { + $newroot->flag = BPT_FLAG_ROOT; + } + */ + $newroot->flag |= BPT_FLAG_ROOT_BIT; + $newroot->store(); + } elseif ($this->length != $currentlength) { + $this->reset_header(); + } + } elseif($root->flag != BPT_FLAG_LEAFANDROOT) { + trigger_error("invalid flag for root", E_USER_ERROR); + } elseif ($this->length != $currentlength) { + $this->reset_header(); + } + } + + function _dump() { + $free =& $this->root->getclone($this->free); + for ($i=$this->headersize; + !feof($this->file); + fseek($this->file, $i), $i+=$free->storage) { + $s = fread($this->file, $free->storage); + $free->delinearize($s); + #print_r($free); + } + } + + /** + * dumps contents of the tree to screen + */ + + function dump() { + $this->root->dump() ; + if ($this->free != BPT_NULLSEEK) { + $free =& $this->root->getclone($this->free); + $free =& $free->materialize(); + $free->dump(); + } + } + + +} + +class BPlusWalker { + + var $tree; + var $keylower; + var $includelower; + var $keyupper; + var $includeupper; + + + function BPlusWalker( + &$tree, + $keylower=null, + $includelower=null, + $keyupper=null, + $includeupper=null){ + + $this->tree =& $tree; + $this->keylower = $keylower; + $this->includelower = $includelower; + $this->keyupper = $keyupper; + $this->includeupper = $includeupper; + if ($this->tree->get_root()==null) { + $this->tree->open(); + } + $node = $this->tree->get_root(); + while(BPT_FLAG_INTERIOR == ($node->flag & BPT_FLAG_INTERIOR)) { + + if (is_null($keylower)) { + $nkey = null; + } else { + $keys =& $node->get_keys(); + $n_keys = count($keys); + $place = BPT_bisect($keys, $keylower); + if ($place==0) { + $nkey = null; + } elseif ($place>$n_keys) { + $nkey = $keys[$n_keys-1]; + } else { + $nkey = $keys[$place-1]; + } + } + + $node =& $node->getnode($nkey); + } + + $this->startnode =& $node; + $this->node =& $node; + + $this->node_index = null; + $this->valid = 0; + $this->first(); + } + + function first() { + $this->node =& $this->startnode; + $node =& $this->node; + + $keys =& $node->keys; + $keylower = $this->keylower; + $keyupper = $this->keyupper; + $validkeys= $node->validkeys; + $this->valid=0; + if ($keylower==null) { + $this->node_index = 0; + $this->valid=1; + } elseif (in_array($keylower, $keys) && $this->includelower) { + $this->node_index = array_search($keylower, $keys); + $index = $this->node_index; + if ($Index<$validkeys) { + $this->valid = 1; + } + } + if (!$this->valid) { + $place = BPT_bisect($keys, $keylower, 0, $validkeys); + if ($place < $validkeys) { + $index = $this->node_index = $place; + $testk = $keys[$index]; + /* + if ($testk>$keylower || + ($this->includelower && $testk==$keylower)) { + $this->valid = true; + } else { + $this->valid = false; + } + */ + $this->valid = BPT_keycmp($testk,$keylower)<0||#$testk>$keylower || + ($this->includelower && $testk==$keylower); + } else { + $next =& $node->nextneighbour(); + if (!is_null($next)) { + $this->startnode =& $next; + $this->first(); + return; + } else { + $this->valid = 0; + } + } + if ($this->valid && !is_null($keyupper)) { + $key = $this->current_key(); + $this->valid= ( + BPT_keycmp($key,$keyupper)<0 #$key<$keyupper + ||($this->includeupper && $key==$keyupper)); + } + } + } + + function current_key() { + if ($this->valid) return $this->node->keys[$this->node_index]; + else trigger_error("Not a valid index ({$this->node_index})"); + } + + function current_value() { + if ($this->valid) return $this->node->indices[$this->node_index]; + else trigger_error("Not a valid index ({$this->node_index})"); + } + + function current() { + if ($this->valid) { + return array( + $this->node->keys[$this->node_index], + $this->node->indices[$this->node_index] + ); + } else { + trigger_error("Not a valid index ({$this->node_index})"); + } + } + + function next() { + $nextp = $this->node_index+1; + $node =& $this->node; + if ($nextp>=$node->validkeys) { + $next =& $node->nextneighbour(); + if (is_null($next)) { + $this->valid = 0; + return; + } + $this->node =& $next; + $node =& $next; + $nextp = 0; + } + if($node->validkeys <= $nextp) { + $this->valid = 0; + } else { + $testkey = $node->keys[$nextp]; + $keyupper = $this->keyupper; + $this->valid =( is_null($keyupper) || + BPT_keycmp($testkey,$upper)<0|| + #$testkey < $keyupper || + ($this->includeupper && $testkey == keyupper) ); + if ($this->valid) $this->node_index = $nextp; + } + + return $this->valid; + + } + +} + +class caching_BPT extends BPlusTree { + + var $cache = array(); + + function getitem($key) { + if (isset($cache[$key])) + return $cache[$key]; + else return ($cache[$key] = parent::getitem($key)); + } + + function resetcache() { + $this->cache = array(); + } + + function nope() { + trigger_error("operation not permitted in caching_BPT", E_USER_WARNING); + } + + function setitem() { $this->nope(); } + + function delitem() { $this->nope(); } + +} + +class SBPlusTree extends BPlusTree { + + var $maxstring; var $stringfile; + + function SBPlusTree($infile, $stringfile, + $maxstring = 256, + $pos=null, $nodesize=null, $keylen=null) { + parent::BPlusTree(); + $this->stringfile = $stringfile; + $this->maxstring = $maxstring; + } + + function getstring($seek) { + fseek($this->stringfile, $seek); + $s = fread($this->stringfile, $this->maxstring); + return rtrim($s); + } + + function setstring($s) { + $seek = fseek($this->stringfile, 0, SEEK_END); + // nul-pad string + $x = str_pad($s, $this->maxstring, chr(0)); + fwrite($this->stringfile, $x); + return $seek; + } + + function getitem($key) { + $seek = parent::getitem($key); + return $this->getstring($seek); + } + + function setitem($key, $val) { + $seek = $this->setstring($val); + parent::setitem($key, $seek); + } + + function walker( + $keylower =null, + $includelower =null, + $keyupper =null, + $includeupper =null + ) { + + return new SBPlusWalker($this, $keylower, $includelower, $keyupper, $includeupper); + } +} + +class SBPlusWalker extends BPlusWalker { + + function current_value() { + $id = parent::current_value(); + return $this->tree->GETSTRING($id); + } + +} + +class BPlusUtils { + + function recopy_bplus($fromfile, $tofile, $class='BPlusTree') { + $fromtree = new $class($fromfile); + $fromtree->open; + list($f, $p, $n, $k) = $fromtree->init_params(); + $totree = new $class($tofile, $p, $n, $k); + $totree->startup(); + return BPlusUtils::recopy_tree($fromtree,$totree); + } + + function recopy_tree($fromtree, $totree) { + list($f, $p, $n, $k) = $totree->init_params(); + // .... + } + +} + +