🧪 Tree Library Benchmark: bigtree vs anytree vs treelib¶
This notebook benchmarks three popular Python libraries used for building and working with tree data structures: bigtree, anytree, and treelib. The core functionality being tested is to create a tree from a nested dictionary.
In [1]:
Copied!
import sys
import os
import importlib.metadata
# Add the project root directory to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
if project_root not in sys.path:
sys.path.insert(0, project_root)
# Turn off assertions for performance
os.environ["BIGTREE_CONF_ASSERTIONS"] = ""
import anytree
import bigtree
print(f"anytree=={anytree.__version__}")
print(f"bigtree={bigtree.__version__}",)
print(f"treelib=={importlib.metadata.version('treelib')}")
import sys
import os
import importlib.metadata
# Add the project root directory to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
if project_root not in sys.path:
sys.path.insert(0, project_root)
# Turn off assertions for performance
os.environ["BIGTREE_CONF_ASSERTIONS"] = ""
import anytree
import bigtree
print(f"anytree=={anytree.__version__}")
print(f"bigtree={bigtree.__version__}",)
print(f"treelib=={importlib.metadata.version('treelib')}")
anytree==2.12.1 bigtree=1.0.1 treelib==1.8.0
Setup¶
In [2]:
Copied!
import json
with open("../../assets/docs/cad.json", "r") as f:
tree_data = json.load(f)
import json
with open("../../assets/docs/cad.json", "r") as f:
tree_data = json.load(f)
Timing: bigtree¶
In [3]:
Copied!
from bigtree import Tree
def construct_tree_bigtree(data):
tree = Tree.from_nested_dict(data)
return tree
from bigtree import Tree
def construct_tree_bigtree(data):
tree = Tree.from_nested_dict(data)
return tree
In [4]:
Copied!
%%timeit
construct_tree_bigtree(tree_data)
%%timeit
construct_tree_bigtree(tree_data)
574 μs ± 2.68 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
In [5]:
Copied!
# Preview tree
tree_bigtree = construct_tree_bigtree(tree_data)
tree_bigtree.show(max_depth=4)
# Preview tree
tree_bigtree = construct_tree_bigtree(tree_data)
tree_bigtree.show(max_depth=4)
learning_model_airplane └── learning_model_airplane └── Default ├── PartDefinition1 └── FRMFeatureBasedEntity1
Timing: anytree¶
In [6]:
Copied!
from anytree.importer import DictImporter
def construct_tree_anytree(data):
importer = DictImporter()
tree = importer.import_(data)
return tree
from anytree.importer import DictImporter
def construct_tree_anytree(data):
importer = DictImporter()
tree = importer.import_(data)
return tree
In [7]:
Copied!
%%timeit
tree = construct_tree_anytree(tree_data)
%%timeit
tree = construct_tree_anytree(tree_data)
617 μs ± 3.74 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
In [8]:
Copied!
# Preview tree
from anytree import RenderTree
tree = construct_tree_anytree(tree_data)
for pre, _, node in RenderTree(tree):
if node.depth < 4:
print("%s%s" % (pre, node.name))
# Preview tree
from anytree import RenderTree
tree = construct_tree_anytree(tree_data)
for pre, _, node in RenderTree(tree):
if node.depth < 4:
print("%s%s" % (pre, node.name))
learning_model_airplane └── learning_model_airplane └── Default ├── PartDefinition1 └── FRMFeatureBasedEntity1
Timing: treelib¶
In [9]:
Copied!
from treelib import Tree
import uuid
def construct_from_treelib(data, tree=None, parent=None):
if tree is None:
tree = Tree()
# Generate a unique ID to avoid identifier collisions
node_id = str(uuid.uuid4())
# Extract node label and other attributes
label = data.get("name", "Unnamed")
attributes = {k: v for k, v in data.items() if k != "children"}
# Add node to the tree
tree.create_node(
tag=label,
identifier=node_id,
parent=parent,
data=attributes,
)
# Recurse into children if they exist
children = data.get("children", [])
for child in children:
construct_from_treelib(child, tree, parent=node_id)
return tree
from treelib import Tree
import uuid
def construct_from_treelib(data, tree=None, parent=None):
if tree is None:
tree = Tree()
# Generate a unique ID to avoid identifier collisions
node_id = str(uuid.uuid4())
# Extract node label and other attributes
label = data.get("name", "Unnamed")
attributes = {k: v for k, v in data.items() if k != "children"}
# Add node to the tree
tree.create_node(
tag=label,
identifier=node_id,
parent=parent,
data=attributes,
)
# Recurse into children if they exist
children = data.get("children", [])
for child in children:
construct_from_treelib(child, tree, parent=node_id)
return tree
In [10]:
Copied!
%%timeit
construct_from_treelib(tree_data)
%%timeit
construct_from_treelib(tree_data)
1.71 ms ± 9.1 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
In [11]:
Copied!
# Preview tree
tree_treelib = construct_from_treelib(tree_data)
for node in tree_treelib.expand_tree(mode=Tree.DEPTH):
if tree_treelib.depth(node) < 4:
indent = " " * tree_treelib.level(node) * 4
print(f"{indent}{tree_treelib[node].tag}")
# Preview tree
tree_treelib = construct_from_treelib(tree_data)
for node in tree_treelib.expand_tree(mode=Tree.DEPTH):
if tree_treelib.depth(node) < 4:
indent = " " * tree_treelib.level(node) * 4
print(f"{indent}{tree_treelib[node].tag}")
learning_model_airplane learning_model_airplane Default FRMFeatureBasedEntity1 PartDefinition1
Closing Words¶
While performance, such as construction speed, is important, it is essential to also consider the following aspects:
🎨 Library Style¶
- API Design & Usability: How intuitive and user-friendly is the library syntax?
- Flexibility: Does the library allow attaching rich metadata, custom objects, or additional attributes to nodes?
- Readability & Visualisation: How easy is it to visualise or print the tree structure?
🧰 Use Cases¶
- Extensibility: Can the library support advanced tree operations like traversal, searching, filtering, or exporting to multiple formats?
- Integration with other tools: Does it integrate smoothly with popular data tools such as pandas or graph libraries? Notably, bigtree stands out here with strong dataframe support, which can be a decisive advantage depending on your workflow
🔧 Maintenance & Community¶
- Activity & Support: Is the library actively maintained?
- Documentation Quality: Comprehensive and clear documentation is vital for onboarding new users and enabling effective use of all features