Query Cookbook¶

HtmlGraph provides multiple ways to query your graph data. This cookbook covers all query methods with practical examples.

Query Methods Overview¶

Method	Use Case	Example
`query()`	CSS selector queries	`graph.query('[data-status="blocked"]')`
`query_builder()`	Complex conditions with AND/OR/NOT	`graph.query_builder().where("status", "blocked").execute()`
`find()`	Single node lookup	`graph.find(type="feature", status="done")`
`find_all()`	Multiple nodes with filters	`graph.find_all(priority="high")`

CSS Selector Queries¶

The simplest way to query - uses familiar CSS selector syntax.

from htmlgraph import HtmlGraph

graph = HtmlGraph(".htmlgraph")

# Status-based queries
blocked = graph.query('[data-status="blocked"]')
done = graph.query('[data-status="done"]')

# Priority queries
high_priority = graph.query('[data-priority="high"]')
critical = graph.query('[data-priority="critical"]')

# Type queries
features = graph.query('[data-type="feature"]')
sessions = graph.query('[data-type="session"]')

# Combined selectors
urgent = graph.query('[data-status="blocked"][data-priority="high"]')

Limitations: CSS selectors cannot express OR logic, NOT conditions, numeric comparisons, or text search.

Fluent Query Builder¶

For complex queries that CSS selectors can't handle.

Basic Conditions¶

# Start a query
qb = graph.query_builder()

# Simple equality
features = qb.where("type", "feature").execute()

# With status
blocked = qb.where("status", "blocked").execute()

Chaining Conditions¶

# AND conditions (implicit)
urgent = (graph.query_builder()
    .where("status", "blocked")
    .and_("priority", "high")
    .execute())

# OR conditions
high_or_critical = (graph.query_builder()
    .where("priority", "high")
    .or_("priority", "critical")
    .execute())

# NOT conditions
not_done = (graph.query_builder()
    .where("type", "feature")
    .not_("status").eq("done")
    .execute())

Comparison Operators¶

# Greater than
large_effort = (graph.query_builder()
    .where("properties.effort").gt(8)
    .execute())

# Less than
quick_tasks = (graph.query_builder()
    .where("properties.effort").lt(4)
    .execute())

# Greater than or equal
medium_plus = (graph.query_builder()
    .where("properties.effort").gte(5)
    .execute())

# Less than or equal
small_tasks = (graph.query_builder()
    .where("properties.effort").lte(2)
    .execute())

# Between (inclusive)
medium_tasks = (graph.query_builder()
    .where("properties.effort").between(3, 6)
    .execute())

Text Search¶

# Contains substring
auth_features = (graph.query_builder()
    .where("title").contains("auth")
    .execute())

# Case-insensitive contains
auth_any_case = (graph.query_builder()
    .where("title").icontains("AUTH")
    .execute())

# Regex matching
api_features = (graph.query_builder()
    .where("title").matches(r"API|REST|GraphQL")
    .execute())

List Operations¶

# In list
high_priorities = (graph.query_builder()
    .where("priority").in_(["high", "critical"])
    .execute())

# Not in list
not_done_or_blocked = (graph.query_builder()
    .where("status").not_in(["done", "blocked"])
    .execute())

Nested Attributes¶

# Access nested properties
high_effort = (graph.query_builder()
    .where("properties.effort").gt(10)
    .execute())

# Multiple levels deep
specific_config = (graph.query_builder()
    .where("properties.config.enabled", True)
    .execute())

Result Methods¶

qb = graph.query_builder().where("status", "blocked")

# Get all results
all_blocked = qb.execute()

# Get first match
first_blocked = qb.first()

# Get count only
blocked_count = qb.count()

Find API (BeautifulSoup-style)¶

Simple, intuitive queries inspired by BeautifulSoup.

Basic Find¶

# Find first match
feature = graph.find(type="feature")
blocked = graph.find(status="blocked")

# Find with multiple criteria
urgent = graph.find(type="feature", status="blocked", priority="high")

Find All¶

# Find all matches
all_features = graph.find_all(type="feature")
all_blocked = graph.find_all(status="blocked")

# With limit
top_5 = graph.find_all(type="feature", limit=5)

Django-style Lookup Suffixes¶

# Contains (case-sensitive)
auth = graph.find_all(title__contains="auth")

# Case-insensitive contains
auth_any = graph.find_all(title__icontains="AUTH")

# Starts with
api_features = graph.find_all(title__startswith="API")

# Ends with
service_features = graph.find_all(title__endswith="Service")

# Regex
pattern_match = graph.find_all(title__regex=r"v\d+")

# Numeric comparisons
high_effort = graph.find_all(properties__effort__gt=8)
low_effort = graph.find_all(properties__effort__lt=4)
medium = graph.find_all(properties__effort__gte=4, properties__effort__lte=8)

# In list
priority_filter = graph.find_all(priority__in=["high", "critical"])

# Not in list
not_completed = graph.find_all(status__not_in=["done", "cancelled"])

# Is null / Is not null
no_assignee = graph.find_all(properties__assignee__isnull=True)
has_assignee = graph.find_all(properties__assignee__isnull=False)

Relationship Queries¶

# Find nodes related to a specific node
related = graph.find_related("feature-001")

# Find by specific relationship type
blockers = graph.find_related("feature-001", relationship="blocked_by")

# Convenience methods for common relationships
blocking = graph.find_blocking("feature-001")  # What this blocks
blocked_by = graph.find_blocked_by("feature-001")  # What blocks this

Graph Traversal¶

Navigate the graph structure.

Ancestors and Descendants¶

# Get all ancestors (nodes this depends on)
ancestors = graph.ancestors("feature-001")

# With depth limit
immediate_deps = graph.ancestors("feature-001", max_depth=1)
two_levels = graph.ancestors("feature-001", max_depth=2)

# Get all descendants (nodes that depend on this)
descendants = graph.descendants("feature-001")

# With depth limit
immediate_dependents = graph.descendants("feature-001", max_depth=1)

Path Finding¶

# Find all paths between two nodes
paths = graph.all_paths("feature-001", "feature-010")

# With max length constraint
short_paths = graph.all_paths("feature-001", "feature-010", max_length=4)

# Existing shortest path
shortest = graph.shortest_path("feature-001", "feature-010")

Subgraph Extraction¶

# Extract a subgraph with specific nodes
subset = graph.subgraph(["feature-001", "feature-002", "feature-003"])

# Without internal edges
nodes_only = graph.subgraph(["feature-001", "feature-002"], include_edges=False)

Connected Components¶

# Get all nodes in the same connected component
component = graph.connected_component("feature-001")

# Filter by relationship type
blocking_component = graph.connected_component("feature-001", relationship="blocked_by")

Edge Index (O(1) Lookups)¶

Efficient reverse edge lookups.

# Get edges pointing TO a node
incoming = graph.get_incoming_edges("feature-001")

# Filter by relationship
blockers = graph.get_incoming_edges("feature-001", relationship="blocked_by")

# Get edges pointing FROM a node
outgoing = graph.get_outgoing_edges("feature-001")

# Get all connected neighbors
neighbors = graph.get_neighbors("feature-001")

Common Patterns¶

Finding Bottlenecks¶

# Nodes that block the most others
def find_top_blockers(graph, limit=5):
    nodes = graph.find_all(type="feature")
    blockers = []
    for node in nodes:
        blocked_count = len(graph.descendants(node.id, relationship="blocked_by"))
        if blocked_count > 0:
            blockers.append((node, blocked_count))
    return sorted(blockers, key=lambda x: x[1], reverse=True)[:limit]

Finding Leaf Nodes¶

# Nodes with no dependencies
def find_leaf_nodes(graph):
    return graph.query_builder() \
        .where("type", "feature") \
        .execute()
    # Then filter for nodes where ancestors() returns empty
    return [n for n in graph.find_all(type="feature")
            if not graph.ancestors(n.id)]

Finding Ready Tasks¶

# Tasks where all dependencies are done
def find_ready_tasks(graph):
    ready = []
    for node in graph.find_all(type="feature", status="todo"):
        blockers = graph.find_blocked_by(node.id)
        if all(b.status == "done" for b in blockers):
            ready.append(node)
    return ready

Dependency Chain Analysis¶

# Find the longest dependency chain
def find_critical_path(graph):
    features = graph.find_all(type="feature")
    max_path = []
    for f1 in features:
        for f2 in features:
            if f1.id != f2.id:
                paths = graph.all_paths(f1.id, f2.id, relationship="blocked_by")
                for path in paths:
                    if len(path) > len(max_path):
                        max_path = path
    return max_path

Performance Tips¶

Use EdgeIndex for reverse lookups: get_incoming_edges() is O(1) vs O(V*E) for scanning
Limit traversal depth: Use max_depth parameter when you don't need full transitive closure
Use first() when you only need one result: Avoids iterating entire graph
Prefer find_all() with filters: More efficient than filtering after get_nodes()
Cache frequently accessed subgraphs: Use subgraph() to create smaller working sets