🧪 Tree Library Benchmark: bigtree vs anytree vs treelib¶

This notebook benchmarks three popular Python libraries used for building and working with tree data structures: bigtree, anytree, and treelib. The core functionality being tested is to create a tree from a nested dictionary.

In [1]:

Copied!





import sys
import os
import importlib.metadata

# Add the project root directory to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Turn off assertions for performance
os.environ["BIGTREE_CONF_ASSERTIONS"] = ""

import anytree
import bigtree

print(f"anytree=={anytree.__version__}")
print(f"bigtree={bigtree.__version__}",)
print(f"treelib=={importlib.metadata.version('treelib')}")
import sys
import os
import importlib.metadata

# Add the project root directory to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Turn off assertions for performance
os.environ["BIGTREE_CONF_ASSERTIONS"] = ""

import anytree
import bigtree

print(f"anytree=={anytree.__version__}")
print(f"bigtree={bigtree.__version__}",)
print(f"treelib=={importlib.metadata.version('treelib')}")

anytree==2.12.1
bigtree=1.0.1
treelib==1.8.0

Setup¶

In [2]:

Copied!

import json

with open("../../assets/docs/cad.json", "r") as f:
    tree_data = json.load(f)
import json

with open("../../assets/docs/cad.json", "r") as f:
    tree_data = json.load(f)

Timing: bigtree¶

In [3]:

Copied!

from bigtree import Tree

def construct_tree_bigtree(data):
    tree = Tree.from_nested_dict(data)
    return tree
from bigtree import Tree

def construct_tree_bigtree(data):
    tree = Tree.from_nested_dict(data)
    return tree

In [4]:

Copied!

%%timeit
construct_tree_bigtree(tree_data)
%%timeit
construct_tree_bigtree(tree_data)

574 μs ± 2.68 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

In [5]:

Copied!

# Preview tree
tree_bigtree = construct_tree_bigtree(tree_data)
tree_bigtree.show(max_depth=4)
# Preview tree
tree_bigtree = construct_tree_bigtree(tree_data)
tree_bigtree.show(max_depth=4)

learning_model_airplane
└── learning_model_airplane
    └── Default
        ├── PartDefinition1
        └── FRMFeatureBasedEntity1

Timing: anytree¶

In [6]:

Copied!





from anytree.importer import DictImporter

def construct_tree_anytree(data):
    importer = DictImporter()
    tree = importer.import_(data)
    return tree
from anytree.importer import DictImporter

def construct_tree_anytree(data):
    importer = DictImporter()
    tree = importer.import_(data)
    return tree

In [7]:

Copied!

%%timeit
tree = construct_tree_anytree(tree_data)
%%timeit
tree = construct_tree_anytree(tree_data)

617 μs ± 3.74 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

In [8]:

Copied!





# Preview tree
from anytree import RenderTree

tree = construct_tree_anytree(tree_data)
for pre, _, node in RenderTree(tree):
    if node.depth < 4:
        print("%s%s" % (pre, node.name))
# Preview tree
from anytree import RenderTree

tree = construct_tree_anytree(tree_data)
for pre, _, node in RenderTree(tree):
    if node.depth < 4:
        print("%s%s" % (pre, node.name))

learning_model_airplane
└── learning_model_airplane
    └── Default
        ├── PartDefinition1
        └── FRMFeatureBasedEntity1

Timing: treelib¶

In [9]:

Copied!





from treelib import Tree
import uuid

def construct_from_treelib(data, tree=None, parent=None):
    if tree is None:
        tree = Tree()

    # Generate a unique ID to avoid identifier collisions
    node_id = str(uuid.uuid4())

    # Extract node label and other attributes
    label = data.get("name", "Unnamed")
    attributes = {k: v for k, v in data.items() if k != "children"}

    # Add node to the tree
    tree.create_node(
        tag=label,
        identifier=node_id,
        parent=parent,
        data=attributes,
    )

    # Recurse into children if they exist
    children = data.get("children", [])
    for child in children:
        construct_from_treelib(child, tree, parent=node_id)

    return tree
from treelib import Tree
import uuid

def construct_from_treelib(data, tree=None, parent=None):
    if tree is None:
        tree = Tree()

    # Generate a unique ID to avoid identifier collisions
    node_id = str(uuid.uuid4())

    # Extract node label and other attributes
    label = data.get("name", "Unnamed")
    attributes = {k: v for k, v in data.items() if k != "children"}

    # Add node to the tree
    tree.create_node(
        tag=label,
        identifier=node_id,
        parent=parent,
        data=attributes,
    )

    # Recurse into children if they exist
    children = data.get("children", [])
    for child in children:
        construct_from_treelib(child, tree, parent=node_id)

    return tree

In [10]:

Copied!

%%timeit
construct_from_treelib(tree_data)
%%timeit
construct_from_treelib(tree_data)

1.71 ms ± 9.1 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

In [11]:

Copied!





# Preview tree
tree_treelib = construct_from_treelib(tree_data)
for node in tree_treelib.expand_tree(mode=Tree.DEPTH):
    if tree_treelib.depth(node) < 4:
        indent = " " * tree_treelib.level(node) * 4
        print(f"{indent}{tree_treelib[node].tag}")
# Preview tree
tree_treelib = construct_from_treelib(tree_data)
for node in tree_treelib.expand_tree(mode=Tree.DEPTH):
    if tree_treelib.depth(node) < 4:
        indent = " " * tree_treelib.level(node) * 4
        print(f"{indent}{tree_treelib[node].tag}")

learning_model_airplane
    learning_model_airplane
        Default
            FRMFeatureBasedEntity1
            PartDefinition1

Closing Words¶

While performance, such as construction speed, is important, it is essential to also consider the following aspects:

🎨 Library Style¶

API Design & Usability: How intuitive and user-friendly is the library syntax?
Flexibility: Does the library allow attaching rich metadata, custom objects, or additional attributes to nodes?
Readability & Visualisation: How easy is it to visualise or print the tree structure?

🧰 Use Cases¶

Extensibility: Can the library support advanced tree operations like traversal, searching, filtering, or exporting to multiple formats?
Integration with other tools: Does it integrate smoothly with popular data tools such as pandas or graph libraries? Notably, bigtree stands out here with strong dataframe support, which can be a decisive advantage depending on your workflow

🔧 Maintenance & Community¶

Activity & Support: Is the library actively maintained?
Documentation Quality: Comprehensive and clear documentation is vital for onboarding new users and enabling effective use of all features