2017-06-24 3 views
0

J'ai assemblé un arbre AVL avec suppression et autorisation pour les clés en double. C'est basé sur quelques exemples que j'ai trouvés en ligne (et qui sont listés dans les commentaires du code). Je voulais comparer les performances d'insertion par rapport à la liste Python standard. J'ai écrit une fonction qui génère une quantité arbitraire de nombres aléatoires et les insère dans l'arbre avl. J'ai encore écrit la même fonction pour l'insertion dans l'index 0 de la liste standard de Python. Puis j'ai utilisé un décorateur timeit pour mesurer combien de temps chaque fonction a pris pour s'exécuter.Performances de l'arbre AVL en Python

Avec 100 000 entiers aléatoires, le temps était de 7693ms pour l'arbre AVL et de 3906ms pour la liste. Avec 500 000 entiers aléatoires, l'arbre AVL a pris 46894ms et la liste a pris 136665ms. Il est logique que l'arbre AVL vaincre de manière asymptotique, mais y a-t-il quelque chose que je puisse faire pour améliorer l'arbre AVL en traitant un plus petit nombre d'inserts?

Voici mon code, désolé si son bâclée:

#!/usr/bin/env python 
import time 
from random import randint 
from typing import Optional 

""" 
Class based AVL balanced binary search tree. 
Based on designs from: 
https://interactivepython.org/runestone/static/pythonds/Trees/AVLTreeImplementation.html 
http://www.geeksforgeeks.org/avl-tree-set-2-deletion/ 

A tree constists of a single AVL_Tree object and 
many Node objects. 

What distinguises AVL_Tree from a plain Binary Search Tree is 
it's self balancing property. Whenever a node is inserted or 
deleted, the balance factors of the affected nodes are checked 
and Nodes are rotated to maintain balance in the tree. This 
ensures O(logN) insertion, deletion, and search performance. 

""" 

class Node: 
    def __init__(self, key, left=None, right=None, parent=None, payload=None): 
     self.key = key 
     self.left = left 
     self.right= right 
     self.parent = parent 
     self.height = 1 
     if payload: 
      self.payload = payload 
     else: 
      self.payload = self.key 
     self.count = 1 


class AVL_Tree: 
    def __init__(self): 
     self.root = None 

    def height(self, node: Node) -> int: 
     if node == None: 
      return 0 
     return node.height 

    def right_rotate(self, y: Node) -> None: 
     x = y.left 
     y.left = x.right 
     if x.right != None: 
      x.right.parent = y 
     x.parent = y.parent 
     if self.root == y: 
      self.root = x 
     else: 
      if y.parent.left == y: 
       y.parent.left = x 
      else: 
       y.parent.right = x 
     x.right = y 
     y.parent = x 

     y.height = max(self.height(y.left), self.height(y.right)) + 1 
     x.height = max(self.height(x.left), self.height(x.right)) + 1 

    def left_rotate(self, x: Node) -> None: 
     y = x.right 
     x.right = y.left 

     if y.left != None: 
      y.left.parent = x 
     y.parent = x.parent 

     if self.root == x: 
      self.root = y 
     else: 
      if x.parent.left == x: 
       x.parent.left = y 
      else: 
       x.parent.right = y 
     y.left = x 
     x.parent = y 

     x.height = max(self.height(x.left), self.height(x.right)) + 1 
     y.height = max(self.height(y.left), self.height(y.right)) + 1 

    def get_balance(self, node: Node) -> int: 
     if node == None: 
      return 0 
     return self.height(node.left) - self.height(node.right) 

    def insert(self, key: int, insertion_point=None, payload=None) -> None: 
     node = Node(key) 
     if payload != None: 
      node.payload = payload 
     # If the tree is empty then assign new node to root 
     if self.root == None: 
      self.root = node 
      return 

     if insertion_point == None: 
      insertion_point = self.root 

     if key == insertion_point.key: 
      insertion_point.count += 1 
     elif key < insertion_point.key: 
      if insertion_point.left: 
       self.insert(key, insertion_point.left, payload) 
      else: 
       insertion_point.left = node 
       node.parent = insertion_point 
     elif key > insertion_point.key: 
      if insertion_point.right: 
       self.insert(key, insertion_point.right, payload) 
      else: 
       insertion_point.right = node 
       node.parent = insertion_point 
     else: 
      return 

     insertion_point.height = 1 + max(self.height(insertion_point.left), self.height(insertion_point.right)) 
     balance = self.get_balance(insertion_point) 

     if balance > 1 and key < insertion_point.left.key: 
      # Left Left 
      self.right_rotate(insertion_point) 
     elif balance < -1 and key > insertion_point.right.key: 
      # Right Right 
      self.left_rotate(insertion_point) 
     elif balance > 1 and key > insertion_point.left.key: 
      # Left Right 
      self.left_rotate(insertion_point.left) 
      self.right_rotate(insertion_point) 
     elif balance < -1 and key < insertion_point.right.key: 
      # Right Left 
      self.right_rotate(insertion_point.right) 
      self.left_rotate(insertion_point) 

    def get(self, key: int) -> Optional[Node]: 
     if self.root: 
      node = self._get(key,self.root) 
      if node: 
       return node 
      else: 
       return None 
     else: 
      return None 

    def _get(self, key: int, currentNode: Node) -> Optional[Node]: 
     if not currentNode 
      return None 
     elif currentNode.key == key: 
      return currentNode 
     elif key < currentNode.key: 
      return self._get(key, currentNode.left) 
     else: 
      return self._get(key,currentNode.right) 

    def __getitem__(self,key: int): 
     """ Overloads [] getter to use get() """ 
     return self.get(key) 

    def __contains__(self,key): 
     if self.get(key): 
      return True 
     else: 
      return False 

    def min_value(self, key: int) -> int: 
     """ Return the lowest value key in subtree with root 'node' """ 
     sub_tree_root = self.get(key) 
     while sub_tree_root.left != None: 
      sub_tree_root = sub_tree_root.left 
     return sub_tree_root.key 

    def delete(self, key: int, starting_node: Node = None) -> None: 
     """ 
     When removing a node there are three cases: 
      1. The node has no children: 
       Delete pointer in parent node and 
       delete node object. 
      2. The node has one child: 
       Promote the child to take node's place 
       then delete node object. 
      3. The node has two children: 
       Search tree for a node that can replace 
       the node and preserve the binary structure 
       This will be the next largest node in 
       the tree and will never have two children. 
       This means it can be removed and swapped 
       in using the first two cases. 
     """ 
     if self.root == None: 
      return 
     if starting_node == None: 
      starting_node = self.root 

     # key < starting_node so we recurse left 
     if key < starting_node.key: 
      self.delete(key, starting_node.left) 
     # key > starting_node so we recurse right 
     elif key > starting_node.key: 
      self.delete(key, starting_node.right) 
     # starting_node is key and we can begin the deletion process. 
     else: 
      if starting_node.count > 1: 
       starting_node.count -= 1 
      # starting_node is a leaf 
      elif starting_node.left == None and starting_node.right == None: 
       if starting_node == starting_node.parent.left: 
        starting_node.parent.left = None 
       else: 
        starting_node.parent.right = None 
      # starting_node has both children 
      elif starting_node.left != None and starting_node.right != None: 
       succ = self.get(self.min_value(starting_node.right.key)) 
       starting_node.key = succ.key 
       starting_node.payload = succ.payload 
       # succ is a leaf 
       # (succ cannot have a left child because it is the min) 
       if succ.right == None: 
        # succ is a left child 
        if succ.parent.left == succ: 
         succ.parent.left = None 
        # succ is a right child 
        else: 
         succ.parent.right = None 
       # succ has a right child 
       else: 
        # succ is a left child 
        if succ.parent.left == succ: 
         succ.parent.left = succ.right 
         succ.right.parent = succ.parent 
        # succ is a right child 
        else: 
         succ.parent.right = succ.right 
         succ.right.parent = succ.parent 
      # starting_node has one child 
      else: 
       if starting_node == self.root: 
        # Child is left 
        if starting_node.left != None: 
         starting_node.left.parent = None 
         self.root = starting_node.left 
        # Child is right 
        else: 
         starting_node.right.parent = None 
         self.root = starting_node.right 
       # starting_node is left child: 
       elif starting_node.parent.left == starting_node: 
        # Child is left 
        if starting_node.left != None: 
         starting_node.left.parent = starting_node.parent 
         starting_node.parent.left = starting_node.left 
        # Child is right 
        else: 
         starting_node.right.parent = starting_node.parent 
         starting_node.parent.left = starting_node.right 
       # starting_node is right child 
       else: 
        # Child is left 
        if starting_node.left != None: 
         starting_node.left.parent = starting_node.parent 
         starting_node.parent.right = starting_node.left 
        else: 
         starting_node.right.parent = starting_node.parent 
         starting_node.parent.right = starting_node.right 

     # Update height of starting_node 
     starting_node.height = max(self.height(starting_node.left), self.height(starting_node.right)) + 1 

     # Get balance factor 
     balance = self.get_balance(starting_node) 
     # Use balance factor to rotate 

     # Left Left 
     if balance > 1 and self.get_balance(starting_node.left) >= 0: 
      self.right_rotate(starting_node) 
     # Left Right 
     if balance > 1 and self.get_balance(starting_node.left) < 0: 
      self.left_rotate(starting_node.left) 
      self.right_rotate(starting_node) 
     # Right Right 
     if balance < -1 and self.get_balance(starting_node.right) <= 0: 
      self.left_rotate(starting_node) 
     # Right Left 
     if balance < -1 and self.get_balance(starting_node.right) > 0: 
      self.right_rotate(starting_node.right) 
      self.left_rotate(starting_node) 

    def __delitem__(self,key): 
     self.delete(key) 


def traverse(rootnode: Node) -> None: 
    thislevel = [rootnode] 
    while thislevel: 
     nextlevel = list() 
     row_string = "" 
     for n in thislevel: 
      if n.parent != None: 
       if n.parent.left == n: 
        relation = "L" 
       elif n.parent.right == n: 
        relation = "R" 
      else: 
       relation = "ro" 
      row_string += str(n.key) + str((relation, n.payload)) + " " 
      if n.left: nextlevel.append(n.left) 
      if n.right: nextlevel.append(n.right) 
     print(row_string) 
     thislevel = nextlevel 


def timeit(method): 
    def timed(*args, **kw): 
     ts = time.time() 
     result = method(*args, **kw) 
     te = time.time() 
     if 'log_time' in kw: 
      name = kw.get('log_name', method.__name__.upper()) 
      kw['log_time'][name] = int((te - ts) * 1000) 
     else: 
      print('%r %2.2f ms' % \ 
        (method.__name__, (te - ts) * 1000)) 
     return result 
    return timed 


@timeit 
def avl_inserter(items): 
    tree = AVL_Tree() 
    for _ in range(1, items): 
     tree.insert(randint(1,items)) 
    return None 

@timeit 
def list_inserter(items): 
    l = [] 
    for _ in range(1, items): 
     l.insert(0, randint(1,items)) 
    return None 

if __name__ == '__main__': 
    avl_inserter(100000) 
    list_inserter(100000) 

Répondre

0

Après beaucoup d'expérimentation, je remplace la méthode d'insertion récursive avec une version itérative. J'ai fait un certain nombre de petits ajustements basés sur la sortie de cProfile et ai eu l'exécution jusqu'à ~ 4100ms avec 100.000 insertions.

Mon insert itératif() utilise une boucle while pour trouver le chemin vers le point d'insertion, puis une boucle for pour remonter le chemin et effectuer les rotations nécessaires. Je pense que cela serait intrinsèquement plus lent qu'une insertion récursive et prendrait au mieux 2 * temps de connexion pour l'insertion. Quelqu'un peut-il expliquer ce qui causait la lenteur de ma version récursive?

#!/usr/bin/env python 
""" 
Class based AVL balanced binary search tree. 
Based on designs from: 
https://interactivepython.org/runestone/static/pythonds/Trees/AVLTreeImplementation.html 
http://www.geeksforgeeks.org/avl-tree-set-2-deletion/ 

A tree constists of a single AVL_Tree object and 
many Node objects. 

What distinguises AVL_Tree from a plain Binary Search Tree is 
it's self balancing property. Whenever a node is inserted or 
deleted, the balance factors of the affected nodes are checked 
and Nodes are rotated to maintain balance in the tree. This 
ensures O(logN) insertion, deletion, and search performance. 
""" 

import time 
import random 
from typing import Optional 

class Node: 
    def __init__(self, key, left=None, right=None, parent=None, payload=None): 
     self.key = key 
     self.left = left 
     self.right = right 
     self.parent = parent 
     self.height = 1 
     if payload: 
      self.payload = payload 
     else: 
      self.payload = self.key 
     self.count = 1 


class AvlTree: 
    def __init__(self): 
     self.root = None 

    def right_rotate(self, current_node: Node) -> None: 
     """ Performs a right rotation for balancing the tree """ 
     left_child = current_node.left 
     current_node.left = left_child.right 
     if left_child.right != None: 
      left_child.right.parent = current_node 
     left_child.parent = current_node.parent 
     if self.root == current_node: 
      self.root = left_child 
     else: 
      if current_node.parent.left == current_node: 
       current_node.parent.left = left_child 
      else: 
       current_node.parent.right = left_child 
     left_child.right = current_node 
     current_node.parent = left_child 

     current_node.height = max(
      current_node.left.height if current_node.left else 0, 
      current_node.right.height if current_node.right else 0) + 1 
     left_child.height = max(
      left_child.left.height if left_child.left else 0, 
      left_child.right.height if left_child.right else 0) + 1 

    def left_rotate(self, current_node: Node) -> None: 
     """ Performs a left rotation for balancing the tree """ 
     right_child = current_node.right 
     current_node.right = right_child.left 

     if right_child.left != None: 
      right_child.left.parent = current_node 
     right_child.parent = current_node.parent 

     if self.root == current_node: 
      self.root = right_child 
     else: 
      if current_node.parent.left == current_node: 
       current_node.parent.left = right_child 
      else: 
       current_node.parent.right = right_child 
     right_child.left = current_node 
     current_node.parent = right_child 

     current_node.height = max(
      current_node.left.height if current_node.left else 0, 
      current_node.right.height if current_node.right else 0) + 1 
     right_child.height = max(
      right_child.left.height if right_child.left else 0, 
      right_child.right.height if right_child.right else 0) + 1 

    def get_balance(self, node: Node) -> int: 
     """ Returns balance factor for a node """ 
     if node is None: 
      return 0 
     return (node.left.height if node.left else 0) - (node.right.height if node.right else 0) 

    def rotate_manager(self, node: Node, inserted_key: int, balance: int) -> None: 

     if balance > 1 and inserted_key < node.left.key: 
      # Left Left 
      self.right_rotate(node) 
     elif balance < -1 and inserted_key > node.right.key: 
      # Right Right 
      self.left_rotate(node) 
     elif balance > 1 and inserted_key > node.left.key: 
      # Left Right 
      self.left_rotate(node.left) 
      self.right_rotate(node) 
     elif balance < -1 and inserted_key < node.right.key: 
      # Right Left 
      self.right_rotate(node.right) 
      self.left_rotate(node) 

    def insert(self, key: int, insertion_point=None, payload=None) -> None: 
     """ Insert new node into the tree """ 
     node = Node(key) 
     if payload is not None: 
      node.payload = payload 
     # If the tree is empty then assign new node to root 
     if self.root is None: 
      self.root = node 
      return 

     if insertion_point is None: 
      insertion_point = self.root 

     search_queue = [insertion_point] 
     index = 0 
     while search_queue: 
      if key < search_queue[index].key: 
       if search_queue[index].left: 
        search_queue.append(search_queue[index].left) 
        index += 1 
       else: 
        search_queue[index].left = node 
        node.parent = search_queue[index] 
        break 
      elif key > search_queue[index].key: 
       if search_queue[index].right: 
        search_queue.append(search_queue[index].right) 
        index += 1 
       else: 
        search_queue[index].right = node 
        node.parent = search_queue[index] 
        break 

     for n in reversed(search_queue): 
      n.height = max(
       n.left.height if n.left else 0, 
       n.right.height if n.right else 0) + 1 
      balance = self.get_balance(n) 
      if balance > 1 or balance < -1: 
       self.rotate_manager(n, key, balance) 


    def get(self, key: int) -> Optional[Node]: 
     """ Returns a node with key if found in tree """ 
     if self.root: 
      node = self._get(key, self.root) 
      if node: 
       return node 
      return None 
     return None 

    def _get(self, key: int, current_node: Node) -> Optional[Node]: 
     """ Recursive search method called by get() """ 
     if not current_node: 
      return None 
     elif current_node.key == key: 
      return current_node 
     elif key < current_node.key: 
      return self._get(key, current_node.left) 
     return self._get(key, current_node.right) 

    def __getitem__(self, key: int): 
     """ Overloads [] getter to use get() """ 
     return self.get(key) 

    def __contains__(self, key): 
     return bool(self.get(key)) 

    def min_value(self, key: int) -> int: 
     """ Return the lowest value key in subtree with root 'node' """ 
     sub_tree_root = self.get(key) 
     while sub_tree_root.left != None: 
      sub_tree_root = sub_tree_root.left 
     return sub_tree_root.key 

    def delete(self, key: int, starting_node: Node = None) -> None: 
     """ 
     When removing a node there are three cases: 
      1. The node has no children: 
       Delete pointer in parent node and 
       delete node object. 
      2. The node has one child: 
       Promote the child to take node's place 
       then delete node object. 
      3. The node has two children: 
       Search tree for a node that can replace 
       the node and preserve the binary structure 
       This will be the next largest node in 
       the tree and will never have two children. 
       This means it can be removed and swapped 
       in using the first two cases. 
     """ 
     if self.root is None: 
      return 
     if starting_node is None: 
      starting_node = self.root 

     # key < starting_node so we recurse left 
     if key < starting_node.key: 
      self.delete(key, starting_node.left) 
     # key > starting_node so we recurse right 
     elif key > starting_node.key: 
      self.delete(key, starting_node.right) 
     # starting_node is key and we can begin the deletion process. 
     else: 
      if starting_node.count > 1: 
       starting_node.count -= 1 
      # starting_node is a leaf 
      elif starting_node.left is None and starting_node.right is None: 
       if starting_node == starting_node.parent.left: 
        starting_node.parent.left = None 
       else: 
        starting_node.parent.right = None 
      # starting_node has both children 
      elif starting_node.left != None and starting_node.right != None: 
       succ = self.get(self.min_value(starting_node.right.key)) 
       starting_node.key = succ.key 
       starting_node.payload = succ.payload 
       # succ is a leaf 
       # (succ cannot have a left child because it is the min) 
       if succ.right is None: 
        # succ is a left child 
        if succ.parent.left == succ: 
         succ.parent.left = None 
        # succ is a right child 
        else: 
         succ.parent.right = None 
       # succ has a right child 
       else: 
        # succ is a left child 
        if succ.parent.left == succ: 
         succ.parent.left = succ.right 
         succ.right.parent = succ.parent 
        # succ is a right child 
        else: 
         succ.parent.right = succ.right 
         succ.right.parent = succ.parent 
      # starting_node has one child 
      else: 
       if starting_node == self.root: 
        # Child is left 
        if starting_node.left != None: 
         starting_node.left.parent = None 
         self.root = starting_node.left 
        # Child is right 
        else: 
         starting_node.right.parent = None 
         self.root = starting_node.right 
       # starting_node is left child: 
       elif starting_node.parent.left == starting_node: 
        # Child is left 
        if starting_node.left != None: 
         starting_node.left.parent = starting_node.parent 
         starting_node.parent.left = starting_node.left 
        # Child is right 
        else: 
         starting_node.right.parent = starting_node.parent 
         starting_node.parent.left = starting_node.right 
       # starting_node is right child 
       else: 
        # Child is left 
        if starting_node.left != None: 
         starting_node.left.parent = starting_node.parent 
         starting_node.parent.right = starting_node.left 
        else: 
         starting_node.right.parent = starting_node.parent 
         starting_node.parent.right = starting_node.right 

     # Update height of starting_node 
     starting_node.height = max(
      self.height(starting_node.left), 
      self.height(starting_node.right)) + 1 

     # Get balance factor 
     balance = self.get_balance(starting_node) 
     # Use balance factor to rotate 

     # Left Left 
     if balance > 1 and self.get_balance(starting_node.left) >= 0: 
      print('L L') 
      self.right_rotate(starting_node) 
     # Left Right 
     if balance > 1 and self.get_balance(starting_node.left) < 0: 
      print('L R') 
      self.left_rotate(starting_node.left) 
      self.right_rotate(starting_node) 
     # Right Right 
     if balance < -1 and self.get_balance(starting_node.right) <= 0: 
      print('R R') 
      self.left_rotate(starting_node) 
     # Right Left 
     if balance < -1 and self.get_balance(starting_node.right) > 0: 
      print('R L') 
      self.right_rotate(starting_node.right) 
      self.left_rotate(starting_node) 

    def __delitem__(self, key): 
     self.delete(key) 


def traverse(rootnode: Node) -> None: 
    """ Prints a map of the tree starting at rootnode """ 
    thislevel = [rootnode] 
    while thislevel: 
     nextlevel = list() 
     row_string = "" 
     for node in thislevel: 
      if node.parent != None: 
       if node.parent.left == node: 
        relation = "L" 
       elif node.parent.right == node: 
        relation = "R" 
      else: 
       relation = "ro" 
      row_string += str(node.key) + str((relation, node.height)) + " " 
      if node.left: 
       nextlevel.append(node.left) 
      if node.right: 
       nextlevel.append(node.right) 
     print(row_string) 
     thislevel = nextlevel 


def timeit(method): 
    """ timeit decorator """ 
    def timed(*args, **kw): 
     """ inner timeit function """ 
     ts = time.time() 
     result = method(*args, **kw) 
     te = time.time() 
     if 'log_time' in kw: 
      name = kw.get('log_name', method.__name__.upper()) 
      kw['log_time'][name] = int((te - ts) * 1000) 
     else: 
      print('%r %2.2f ms' % \ 
        (method.__name__, (te - ts) * 1000)) 
     return result 
    return timed 


@timeit 
def avl_inserter(items): 
    """ Tree insertion speed test """ 
    samples = random.sample(range(1, 1000000), items) 
    sample_tree = AvlTree() 
    for sample in samples: 
     sample_tree.insert(sample) 
    return None 

@timeit 
def list_inserter(items): 
    """ List Insertion speed test """ 
    samples = random.sample(range(1, 1000000), items) 
    sample_list = [] 
    for sample in samples: 
     sample_list.insert(0, sample) 
    return None 

if __name__ == '__main__': 
    avl_inserter(100000) 
    #list_inserter(500000) 
    #tree = AvlTree() 
    #tree.insert(10, payload=5) 
    #tree.insert(15, payload=3) 
    #tree.insert(11, payload=4) 
    #tree.insert(20) 
    #tree.insert(17) 
    #tree.insert(25) 
    #tree.insert(18) 
    #traverse(tree.root)