问题描述
我不明白为什么我的 btree 有时会产生乱序的遍历,但只有当插入的数据是随机生成的。当数据按顺序插入时,数据是有序的断言总是通过。
我怀疑找到一个有问题的插入点是我的逻辑。但我无法弄清楚它有什么问题,因为它有时会起作用。我想我需要找到 >= 插入键的最深节点。目前,我觉得它可能会跳过最深的节点。但我不确定。我需要其他人来看看代码。
注意下面的两个断言。一种是按顺序插入。第二个是随机插入。在断言错误之上是遍历顺序。部分顺序让我感到困惑。
import bisect
import random
class BTree():
def __init__(self,leaf,M,key,value,parent=None):
self.leaf = leaf
self.children = []
self.M = M
self.key = key
self.value = value
self.parent = parent
def walk(self):
for child in self.children:
if child.leaf:
yield child
yield from child.walk()
def __repr__(self):
return "{}:{}".format(self.key,self.value)
def __str__(self):
return "{}:{}".format(self.key,self.value)
def insert(self,height=1,parent=None):
next_children = self.children
leaf = self
found = False
parents = [self]
child = None
while found == False:
next_children_changed = False
for child in next_children:
if key >= child.key:
print("Inspecting {} <= {} ".format(child.key,key))
next_children = child.children
# found one stage before the end
if len(next_children) == 0:
found = True
else:
parents.append(child)
leaf = child
next_children_changed = True
if not next_children_changed:
found = True
print("Trying to insert {} at Found insertion leaf {}".format(key,leaf))
walk(leaf)
if len(leaf.children) < leaf.M:
leaf.insert_non_full(key,parents[-1])
else:
# we need to split
current = leaf
last_current = current
inserted = False
new_root = None
while current != None:
original_parent = current.parent
if len(current.children) >= current.M:
new_left,new_right,separation_value = current.split()
new_left.parent = original_parent
new_right.parent = original_parent
if original_parent == None:
new_root = BTree(False,self.M,None)
parent = new_root
new_root.children.append(new_left)
new_root.children.append(new_right)
new_root.key = new_left.key
new_root.value = new_left.value
else:
parent = original_parent
original_parent.children.remove(current)
original_parent.children.append(new_left)
original_parent.children.append(new_right)
original_parent.sort()
new_left.parent = parent
new_right.parent = parent
assert new_right.key > new_left.key
last_current = current
current = original_parent
if new_root != None:
# split went to root
print("Split went to root")
return new_root.insert(key,value)
else:
self.insert(key,value)
return self
return self
def split(self):
new_self = BTree(True,None)
new_self.key = self.key
new_self.value = self.value
new_left = BTree(False,None)
new_sibling = BTree(False,None)
midpoint = int((len(self.children)+1)/2)
left_children = []
if self.leaf:
left_children = [new_self]
left_children = left_children + self.children[0:midpoint]
right_children = self.children[midpoint:]
for child in left_children:
child.parent = new_left
for child in right_children:
child.parent = new_sibling
new_sibling.key = right_children[0].key
new_sibling.value = right_children[0].value
new_left.children = left_children
new_sibling.children = right_children
new_left.leaf = False
new_left.key = left_children[0].key
new_left.value = left_children[0].value
return new_left,new_sibling,self.children[midpoint].key
def insert_after_split(self,parent):
height = height + 1
insertion_point,index = self.find_location_for_key(key)
if insertion_point == None:
self.insert_non_full(key,parent)
else:
split = insertion_point.insert(key,parent=self)
return split
return self
def insert_non_full(self,parent):
values = [child.key for child in self.children]
new_pos = bisect.bisect(values,key)
self.children.insert(new_pos,BTree(True,parent))
return self
def sort(self):
self.children.sort(key=lambda x: x.key)
def find_location_for_key(self,key):
index = None
for child in self.children:
if cmp(key,child.key) >= 0:
index = child,self.children.index(child)
if index:
return index
else:
return None,-1
def search(self,greater_than_equal,less_than):
for child in self.children:
if child.key >= greater_than_equal and child.key < less_than:
if child.leaf:
yield child
yield from child.search(greater_than_equal,less_than)
else:
yield from child.search(greater_than_equal,less_than)
def delete(self,key):
deletion_point,index = self.find_location_for_key(key)
if deletion_point:
if deletion_point.key == key:
self.children.remove(deletion_point)
return True
else:
return deletion_point.delete(key)
else:
return False
def walk(item,spaces=0):
print("{}{}={} {} {}".format(" " * spaces,item.key,item.value,"leaf" if item.leaf else "",item.parent))
for child in item.children:
walk(child,spaces + 1)
root = BTree(False,3,None)\
.insert(1,"1")\
.insert(2,"2")\
.insert(3,"3")\
.insert(4,"4")
for i in range(5,100):
root = root.insert(i,str(i))
walk(root)
print(root.children)
def keysonly(items):
for item in items:
yield item.key
assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))
root = BTree(False,None)
seen = {}
for i in range(1,100):
num1 = random.randint(0,100)
if num1 not in seen:
seen[num1] = True
root = root.insert(num1,str(num1))
walk(root)
for item in root.walk():
print(item.key,item.value)
assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))
解决方法
我重写了插入位置的搜索。它现在应该总是产生正确的位置。我反向搜索子项并在小于插入键的第一个子项上中断。
当我去插入时,我总是插入最后一个子节点,所以我们填充一个旧节点而不是创建一个新子节点。
改变的一点。很微妙。
def insert(self,key,value,height=1,parent=None):
next_children = self.children
leaf = self
found = False
parents = [self]
child = None
last_child = self
while found == False:
next_children_changed = False
for child in reversed(next_children):
if key >= child.key:
print("Inspecting {} <= {} ".format(child.key,key))
next_children = child.children
last_child = leaf
parents.append(child)
leaf = child
next_children_changed = True
break
if not next_children_changed:
found = True
leaf = last_child
完整的工作代码:
import bisect
import random
class BTree():
def __init__(self,leaf,M,parent=None):
self.leaf = leaf
self.children = []
self.M = M
self.key = key
self.value = value
self.parent = parent
def walk(self):
for child in self.children:
if child.leaf:
yield child
yield from child.walk()
def __repr__(self):
return "{}:{}".format(self.key,self.value)
def __str__(self):
return "{}:{}".format(self.key,self.value)
def insert(self,key))
next_children = child.children
last_child = leaf
parents.append(child)
leaf = child
next_children_changed = True
break
if not next_children_changed:
found = True
leaf = last_child
print("Trying to insert {} at Found insertion leaf {}".format(key,leaf))
# walk(leaf)
if len(leaf.children) < leaf.M:
leaf.insert_non_full(key,parents[-1])
else:
# we need to split
current = leaf
inserted = False
new_root = None
while current != None:
original_parent = current.parent
if len(current.children) >= current.M:
new_left,new_right,separation_value = current.split()
if original_parent == None:
new_root = BTree(False,self.M,None)
parent = new_root
new_root.children.append(new_left)
new_root.children.append(new_right)
new_root.key = new_left.key
new_root.value = new_left.value
else:
parent = original_parent
original_parent.children.remove(current)
original_parent.children.append(new_left)
original_parent.children.append(new_right)
original_parent.sort()
new_left.parent = parent
new_right.parent = parent
assert new_right.key > new_left.key
current = original_parent
if new_root != None:
# split went to root
print("Split went to root")
# walk(new_root)
return new_root.insert(key,value)
else:
return self.insert(key,value)
return self
return self
def split(self):
new_self = BTree(True,None)
new_self.key = self.key
new_self.value = self.value
new_left = BTree(False,None)
new_sibling = BTree(False,None)
midpoint = int((len(self.children)+1)/2)
left_children = []
if self.leaf:
left_children = [new_self]
left_children = left_children + self.children[0:midpoint]
right_children = self.children[midpoint:]
for child in left_children:
child.parent = new_left
for child in right_children:
child.parent = new_sibling
new_sibling.key = right_children[0].key
new_sibling.value = right_children[0].value
new_left.children = left_children
new_sibling.children = right_children
new_left.leaf = False
new_left.key = left_children[0].key
new_left.value = left_children[0].value
return new_left,new_sibling,self.children[midpoint].key
def insert_after_split(self,parent):
height = height + 1
insertion_point,index = self.find_location_for_key(key)
if insertion_point == None:
self.insert_non_full(key,parent)
else:
split = insertion_point.insert(key,parent=self)
return split
return self
def insert_non_full(self,parent):
values = [child.key for child in self.children]
new_pos = bisect.bisect(values,key)
self.children.insert(new_pos,BTree(True,parent))
return self
def sort(self):
self.children.sort(key=lambda x: x.key)
def find_location_for_key(self,key):
index = None
for child in self.children:
if cmp(key,child.key) >= 0:
index = child,self.children.index(child)
if index:
return index
else:
return None,-1
def search(self,greater_than_equal,less_than):
for child in self.children:
if child.key >= greater_than_equal and child.key < less_than:
if child.leaf:
yield child
yield from child.search(greater_than_equal,less_than)
else:
yield from child.search(greater_than_equal,less_than)
def delete(self,key):
deletion_point,index = self.find_location_for_key(key)
if deletion_point:
if deletion_point.key == key:
self.children.remove(deletion_point)
return True
else:
return deletion_point.delete(key)
else:
return False
def walk(item,spaces=0):
print("{}{}={} {} {}".format(" " * spaces,item.key,item.value,"leaf" if item.leaf else "",item.parent))
for child in item.children:
walk(child,spaces + 1)
root = BTree(False,3,None)\
.insert(1,"1")\
.insert(2,"2")\
.insert(3,"3")\
.insert(4,"4")
for i in range(5,100):
root = root.insert(i,str(i))
walk(root)
print(root.children)
def keysonly(items):
for item in items:
yield item.key
assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))
root = BTree(False,None)
seen = {}
for i in range(1,100):
num1 = random.randint(0,100)
if num1 not in seen:
seen[num1] = True
root = root.insert(num1,str(num1))
walk(root)
for item in root.walk():
print(item.key,item.value)
assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))