diff --git a/.gitignore b/.gitignore
index 5f899ff8d..fd42dac32 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,9 @@
 
 
+# Benchmark generated output
+benchmarks/results/
+benchmarks/figures/
+
 #################
 ## Eclipse
 #################
diff --git a/benchmarks/data_generator.py b/benchmarks/data_generator.py
new file mode 100644
index 000000000..2cc646b3a
--- /dev/null
+++ b/benchmarks/data_generator.py
@@ -0,0 +1,230 @@
+"""Generate synthetic and real HED strings/Series for benchmarking.
+
+Usage::
+
+    from data_generator import DataGenerator
+    gen = DataGenerator()  # loads schema 8.4.0
+    s = gen.make_string(n_tags=10, n_groups=2, depth=1)
+    series = gen.make_series(n_rows=1000, n_tags=10, n_groups=2, depth=1)
+    real = gen.load_real_data(tile_to=5000)
+"""
+
+from __future__ import annotations
+
+import os
+import random
+
+import pandas as pd
+
+from hed.schema import load_schema_version
+from hed.models.schema_lookup import generate_schema_lookup
+from hed.models.tabular_input import TabularInput
+from hed.models.df_util import convert_to_form
+
+
+class DataGenerator:
+    """Build synthetic and real HED data for benchmarking."""
+
+    def __init__(self, schema_version="8.4.0", seed=42):
+        self.schema = load_schema_version(schema_version)
+        self.lookup = generate_schema_lookup(self.schema)
+        self._rng = random.Random(seed)
+
+        # Collect real tag short names from the schema for realistic generation
+        self._all_tags = []
+        for name, entry in self.schema.tags.items():
+            if name.endswith("/#"):
+                continue
+            short = getattr(entry, "short_tag_name", name.rsplit("/", 1)[-1])
+            self._all_tags.append(short)
+
+        # Separate leaf vs non-leaf for variety
+        self._tags = list(self._all_tags)
+
+    # ------------------------------------------------------------------
+    # Single string generation
+    # ------------------------------------------------------------------
+
+    def _pick_tags(self, n, repeats=0):
+        """Pick *n* unique tags, then append *repeats* duplicates of the first."""
+        chosen = self._rng.sample(self._tags, min(n, len(self._tags)))
+        if repeats and chosen:
+            chosen.extend([chosen[0]] * repeats)
+        return chosen
+
+    def make_string(self, n_tags=5, n_groups=0, depth=0, repeats=0, form="short"):
+        """Build a single synthetic HED string.
+
+        Parameters:
+            n_tags: Total number of tag tokens (spread across top-level and groups).
+            n_groups: Number of parenthesised groups to create.
+            depth: Maximum nesting depth inside groups.
+            repeats: Number of duplicate copies of the first tag to append.
+            form: 'short' | 'long' — tag form.
+
+        Returns:
+            str: A raw HED string.
+        """
+        tags = self._pick_tags(n_tags, repeats=repeats)
+        if form == "long":
+            tags = self._to_long(tags)
+
+        if n_groups == 0 or depth == 0:
+            return ", ".join(tags)
+
+        # Distribute tags across top-level and groups
+        top_count = max(1, n_tags - n_groups * 2)
+        top_tags = tags[:top_count]
+        remaining = tags[top_count:]
+
+        parts = list(top_tags)
+        for i in range(n_groups):
+            group_tags = remaining[i * 2 : i * 2 + 2] if i * 2 + 2 <= len(remaining) else remaining[i * 2 :]
+            if not group_tags:
+                group_tags = [self._rng.choice(self._tags)]
+            parts.append(self._wrap_group(group_tags, depth))
+
+        return ", ".join(parts)
+
+    def _wrap_group(self, tags, depth):
+        """Recursively nest *tags* to the given *depth*."""
+        inner = ", ".join(tags)
+        result = f"({inner})"
+        for _ in range(depth - 1):
+            extra = self._rng.choice(self._tags)
+            result = f"({extra}, {result})"
+        return result
+
+    def make_deeply_nested_string(self, depth, tags_per_level=2):
+        """Build a string with deep nesting: (A, (B, (C, ...))).
+
+        Parameters:
+            depth: Number of nesting levels.
+            tags_per_level: Tags at each level.
+
+        Returns:
+            str: Deeply nested HED string.
+        """
+        tags = self._pick_tags(depth * tags_per_level + 2)
+        # Build inside-out
+        inner = ", ".join(tags[:tags_per_level])
+        for i in range(depth):
+            level_tags = tags[tags_per_level + i * tags_per_level : tags_per_level + (i + 1) * tags_per_level]
+            if not level_tags:
+                level_tags = [self._rng.choice(self._tags)]
+            inner = f"({', '.join(level_tags)}, ({inner}))"
+        return f"Event, Action, {inner}"
+
+    def make_string_with_specific_tags(self, target_tags, n_extra=5, n_groups=2, depth=1, repeats=0):
+        """Build a string guaranteed to contain specific tags.
+
+        Parameters:
+            target_tags: List of tag names to include.
+            n_extra: Number of random extra tags.
+            n_groups: Number of groups.
+            depth: Nesting depth.
+            repeats: How many times to repeat the first target tag.
+
+        Returns:
+            str: HED string containing the target tags.
+        """
+        extra = self._pick_tags(n_extra)
+        all_tags = list(target_tags) + extra + [target_tags[0]] * repeats
+        self._rng.shuffle(all_tags)
+
+        if n_groups == 0 or depth == 0:
+            return ", ".join(all_tags)
+
+        top_count = max(1, len(all_tags) - n_groups * 2)
+        top_tags = all_tags[:top_count]
+        remaining = all_tags[top_count:]
+
+        parts = list(top_tags)
+        for i in range(n_groups):
+            group_tags = remaining[i * 2 : i * 2 + 2] if i * 2 + 2 <= len(remaining) else remaining[i * 2 :]
+            if not group_tags:
+                group_tags = [self._rng.choice(self._tags)]
+            parts.append(self._wrap_group(group_tags, depth))
+
+        return ", ".join(parts)
+
+    def _to_long(self, short_tags):
+        """Convert short tag names to long form via the schema."""
+        from hed.models.hed_tag import HedTag
+
+        out = []
+        for t in short_tags:
+            try:
+                out.append(HedTag(t, self.schema).long_tag)
+            except Exception:
+                out.append(t)
+        return out
+
+    # ------------------------------------------------------------------
+    # Series generation
+    # ------------------------------------------------------------------
+
+    def make_series(self, n_rows, *, n_tags=5, n_groups=0, depth=0, repeats=0, form="short", heterogeneous=False):
+        """Build a pd.Series of HED strings.
+
+        Parameters:
+            n_rows: Number of rows.
+            n_tags, n_groups, depth, repeats, form: Passed to make_string.
+            heterogeneous: If True, randomise parameters per row.
+        """
+        if heterogeneous:
+            rows = []
+            for _ in range(n_rows):
+                nt = self._rng.choice([3, 5, 10, 15, 25])
+                ng = self._rng.choice([0, 1, 2, 5])
+                d = self._rng.choice([0, 1, 2])
+                rows.append(self.make_string(n_tags=nt, n_groups=ng, depth=d, form=form))
+            return pd.Series(rows)
+        else:
+            # Homogeneous: one template, tiled
+            template = self.make_string(n_tags=n_tags, n_groups=n_groups, depth=depth, repeats=repeats, form=form)
+            return pd.Series([template] * n_rows)
+
+    # ------------------------------------------------------------------
+    # Real data
+    # ------------------------------------------------------------------
+
+    def load_real_data(self, tile_to=None, form="short"):
+        """Load the FacePerception BIDS events and return a HED Series.
+
+        Parameters:
+            tile_to: If set, tile the series up to this many rows.
+            form: 'short' | 'long'.
+
+        Returns:
+            pd.Series of HED strings.
+        """
+        bids_root = os.path.realpath(
+            os.path.join(os.path.dirname(__file__), "..", "tests", "data", "bids_tests", "eeg_ds003645s_hed")
+        )
+        sidecar = os.path.join(bids_root, "task-FacePerception_events.json")
+        events = os.path.join(bids_root, "sub-002", "eeg", "sub-002_task-FacePerception_run-1_events.tsv")
+        tab = TabularInput(events, sidecar)
+        series = tab.series_filtered
+
+        if form == "long":
+            df = series.copy()
+            convert_to_form(df, self.schema, "long_tag")
+            series = df
+
+        if tile_to and tile_to > len(series):
+            reps = (tile_to // len(series)) + 1
+            series = pd.Series(list(series) * reps).iloc[:tile_to].reset_index(drop=True)
+
+        return series
+
+
+# Quick self-test
+if __name__ == "__main__":
+    gen = DataGenerator()
+    print(f"Schema tags available: {len(gen._tags)}")
+    print(f"Sample string (5 tags):  {gen.make_string(5)}")
+    print(f"Sample string (10 tags, 2 groups, depth 2): {gen.make_string(10, 2, 2)}")
+    print(f"Sample string (5 tags, 3 repeats): {gen.make_string(5, repeats=3)}")
+    print(f"Real data rows: {len(gen.load_real_data())}")
+    print(f"Tiled to 500:   {len(gen.load_real_data(tile_to=500))}")
diff --git a/benchmarks/report.py b/benchmarks/report.py
new file mode 100644
index 000000000..4d4f1dee6
--- /dev/null
+++ b/benchmarks/report.py
@@ -0,0 +1,747 @@
+"""Generate analysis report from benchmark results.
+
+Reads the latest JSON results file and produces:
+  - Console summary tables
+  - Matplotlib figures saved to benchmarks/figures/{stem}/
+  - A Markdown report in benchmarks/results/
+
+Usage::
+
+    python report.py                         # latest results
+    python report.py results/benchmark_20260407_120000.json  # specific file
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import matplotlib
+
+matplotlib.use("Agg")  # must be set before importing pyplot
+import matplotlib.pyplot as plt  # noqa: E402
+import pandas as pd
+
+RESULTS_DIR = Path(__file__).parent / "results"
+_FIGURES_BASE = Path(__file__).parent / "figures"
+_FIGURES_BASE.mkdir(exist_ok=True)
+
+# Consistent colours per engine
+ENGINE_COLORS = {
+    "basic_search": "#1f77b4",
+    "QueryHandler": "#ff7f0e",
+    "QueryHandler_loop": "#ff7f0e",
+    "SQH_no_lookup": "#2ca02c",
+    "SQH_with_lookup": "#d62728",
+    "search_series_no_lookup": "#2ca02c",
+    "search_series_with_lookup": "#d62728",
+    "StringQueryHandler": "#9467bd",
+    "search_series": "#8c564b",
+    "StringQueryHandler_no_lookup": "#2ca02c",
+    "StringQueryHandler_with_lookup": "#d62728",
+}
+
+
+def load_results(path=None):
+    """Load benchmark results from JSON."""
+    if path is None:
+        files = sorted(RESULTS_DIR.glob("benchmark_*.json"))
+        if not files:
+            print("No results files found in", RESULTS_DIR)
+            sys.exit(1)
+        path = files[-1]
+    else:
+        path = Path(path)
+    print(f"Loading results from {path}")
+    return json.loads(path.read_text(encoding="utf-8")), path.stem
+
+
+# ======================================================================
+# Console summary
+# ======================================================================
+
+
+def print_single_string_summary(data):
+    """Print a pivoted summary table of single-string results."""
+    records = data.get("single_string", [])
+    if not records:
+        return
+    df = pd.DataFrame(records)
+    print("\n" + "=" * 80)
+    print("SINGLE-STRING BENCHMARK SUMMARY (median seconds)")
+    print("=" * 80)
+    pivot = df.pivot_table(
+        index=["config_label", "query_label"],
+        columns="engine",
+        values="total_time",
+        aggfunc="first",
+    )
+    # Convert to milliseconds for readability
+    pivot_ms = pivot * 1000
+    pd.set_option("display.float_format", "{:.4f}".format)
+    pd.set_option("display.max_columns", 20)
+    pd.set_option("display.width", 200)
+    print(pivot_ms.to_string())
+    print()
+
+
+def print_series_summary(data):
+    """Print series-level benchmark summary."""
+    records = data.get("series", [])
+    if not records:
+        return
+    df = pd.DataFrame(records)
+    print("\n" + "=" * 80)
+    print("SERIES BENCHMARK SUMMARY (median seconds)")
+    print("=" * 80)
+    pivot = df.pivot_table(
+        index=["config_label", "query_label"],
+        columns="engine",
+        values="total_time",
+        aggfunc="first",
+    )
+    pivot_ms = pivot * 1000
+    print(pivot_ms.to_string())
+    print()
+
+
+def print_sweep_summary(data):
+    """Print factor sweep summary."""
+    records = data.get("factor_sweeps", [])
+    if not records:
+        return
+    df = pd.DataFrame(records)
+    print("\n" + "=" * 80)
+    print("FACTOR SWEEP SUMMARY")
+    print("=" * 80)
+    for factor in df["factor"].unique():
+        sub = df[df["factor"] == factor]
+        pivot = sub.pivot_table(index="level", columns="engine", values="time", aggfunc="first")
+        pivot_ms = pivot * 1000
+        print(f"\n--- {factor} (ms) ---")
+        print(pivot_ms.to_string())
+
+
+def print_real_data_summary(data):
+    """Print real-data benchmark summary."""
+    records = data.get("real_data", [])
+    if not records:
+        return
+    df = pd.DataFrame(records)
+    print("\n" + "=" * 80)
+    print(f"REAL DATA BENCHMARK ({records[0].get('n_rows', '?')} rows)")
+    print("=" * 80)
+    pivot = df.pivot_table(index="query_label", columns="engine", values="total_time", aggfunc="first")
+    pivot_ms = pivot * 1000
+    print(pivot_ms.to_string())
+    print()
+
+
+# ======================================================================
+# Plots
+# ======================================================================
+
+
+def _color(engine):
+    return ENGINE_COLORS.get(engine, "#333333")
+
+
+def plot_factor_sweep(data, stem):
+    """One figure per factor sweep with engines as separate lines."""
+    records = data.get("factor_sweeps", [])
+    if not records:
+        return
+    df = pd.DataFrame(records)
+
+    for factor in df["factor"].unique():
+        sub = df[df["factor"] == factor].copy()
+
+        fig, ax = plt.subplots(figsize=(8, 5))
+        for engine in sub["engine"].unique():
+            edf = sub[sub["engine"] == engine].sort_values("level")
+            ax.plot(range(len(edf)), edf["time"].values * 1000, marker="o", label=engine, color=_color(engine))
+            ax.set_xticks(range(len(edf)))
+            ax.set_xticklabels(edf["level"].astype(str), rotation=45, ha="right")
+
+        ax.set_xlabel(factor)
+        ax.set_ylabel("Time (ms)")
+        ax.set_title(f"Factor sweep: {factor}")
+        ax.legend(fontsize=8)
+        ax.grid(True, alpha=0.3)
+        fig.tight_layout()
+        fig.savefig(_figures_dir(stem) / f"benchmark_sweep_{factor}.png", dpi=150)
+        plt.close(fig)
+        print(f"  Saved figures/{stem}/benchmark_sweep_{factor}.png")
+
+
+def plot_series_scaling(data, stem):
+    """Plot total time vs series size for each engine."""
+    records = data.get("factor_sweeps", [])
+    if not records:
+        return
+    df = pd.DataFrame(records)
+    sub = df[df["factor"] == "series_size"]
+    if sub.empty:
+        return
+
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+
+    # Total time
+    ax = axes[0]
+    for engine in sub["engine"].unique():
+        edf = sub[sub["engine"] == engine].sort_values("level")
+        ax.plot(edf["level"], edf["time"] * 1000, marker="o", label=engine, color=_color(engine))
+    ax.set_xlabel("Series size (rows)")
+    ax.set_ylabel("Total time (ms)")
+    ax.set_title("Series search: total time")
+    ax.legend(fontsize=8)
+    ax.grid(True, alpha=0.3)
+
+    # Per-row time
+    ax = axes[1]
+    for engine in sub["engine"].unique():
+        edf = sub[sub["engine"] == engine].sort_values("level")
+        if "per_row" in edf.columns:
+            ax.plot(edf["level"], edf["per_row"] * 1000, marker="o", label=engine, color=_color(engine))
+    ax.set_xlabel("Series size (rows)")
+    ax.set_ylabel("Per-row time (ms)")
+    ax.set_title("Series search: per-row amortized cost")
+    ax.legend(fontsize=8)
+    ax.grid(True, alpha=0.3)
+
+    fig.tight_layout()
+    fig.savefig(_figures_dir(stem) / "benchmark_series_scaling.png", dpi=150)
+    plt.close(fig)
+    print(f"  Saved figures/{stem}/benchmark_series_scaling.png")
+
+
+def plot_compile_vs_search(data, stem):
+    """Bar chart comparing compilation time to per-search time."""
+    records = data.get("factor_sweeps", [])
+    if not records:
+        return
+    df = pd.DataFrame(records)
+    sub = df[df["factor"] == "compile_vs_search"]
+    if sub.empty:
+        return
+
+    fig, ax = plt.subplots(figsize=(8, 5))
+    engines = sub["engine"].unique()
+    levels = sub["level"].unique()  # compile, search
+    x = range(len(engines))
+    width = 0.35
+
+    for i, level in enumerate(levels):
+        vals = []
+        for eng in engines:
+            row = sub[(sub["engine"] == eng) & (sub["level"] == level)]
+            vals.append(row["time"].values[0] * 1000 if len(row) else 0)
+        offset = (i - 0.5) * width
+        ax.bar([xi + offset for xi in x], vals, width, label=level)
+
+    ax.set_xticks(x)
+    ax.set_xticklabels(engines, rotation=15)
+    ax.set_ylabel("Time (ms)")
+    ax.set_title("Compilation vs per-search cost")
+    ax.legend()
+    ax.grid(True, alpha=0.3, axis="y")
+    fig.tight_layout()
+    fig.savefig(_figures_dir(stem) / "benchmark_compile_vs_search.png", dpi=150)
+    plt.close(fig)
+    print(f"  Saved figures/{stem}/benchmark_compile_vs_search.png")
+
+
+def plot_query_complexity_heatmap(data, stem):
+    """Heatmap of query complexity vs engine (single-string results)."""
+    records = data.get("single_string", [])
+    if not records:
+        return
+    df = pd.DataFrame(records)
+    # Pick one config for clarity
+    config = df["config_label"].unique()[len(df["config_label"].unique()) // 2]
+    sub = df[df["config_label"] == config]
+
+    pivot = sub.pivot_table(index="query_label", columns="engine", values="total_time", aggfunc="first")
+    pivot_ms = pivot * 1000
+
+    fig, ax = plt.subplots(figsize=(12, 6))
+    im = ax.imshow(pivot_ms.values, aspect="auto", cmap="YlOrRd")
+    ax.set_xticks(range(len(pivot_ms.columns)))
+    ax.set_xticklabels(pivot_ms.columns, rotation=45, ha="right", fontsize=8)
+    ax.set_yticks(range(len(pivot_ms.index)))
+    ax.set_yticklabels(pivot_ms.index, fontsize=8)
+    ax.set_title(f"Query × Engine time (ms) — config: {config}")
+    fig.colorbar(im, ax=ax, label="Time (ms)")
+
+    # Annotate cells
+    for i in range(len(pivot_ms.index)):
+        for j in range(len(pivot_ms.columns)):
+            val = pivot_ms.values[i, j]
+            if pd.notna(val):
+                ax.text(
+                    j,
+                    i,
+                    f"{val:.2f}",
+                    ha="center",
+                    va="center",
+                    fontsize=7,
+                    color="white" if val > pivot_ms.values[pd.notna(pivot_ms.values)].mean() else "black",
+                )
+
+    fig.tight_layout()
+    fig.savefig(_figures_dir(stem) / "benchmark_query_heatmap.png", dpi=150)
+    plt.close(fig)
+    print(f"  Saved figures/{stem}/benchmark_query_heatmap.png")
+
+
+def plot_real_data(data, stem):
+    """Bar chart of real-data results."""
+    records = data.get("real_data", [])
+    if not records:
+        return
+    df = pd.DataFrame(records)
+
+    pivot = df.pivot_table(index="query_label", columns="engine", values="total_time", aggfunc="first")
+    pivot_ms = pivot * 1000
+
+    fig, ax = plt.subplots(figsize=(10, 5))
+    pivot_ms.plot(kind="bar", ax=ax, color=[_color(c) for c in pivot_ms.columns])
+    ax.set_ylabel("Total time (ms)")
+    ax.set_title(f"Real BIDS data ({records[0].get('n_rows', '?')} rows)")
+    ax.legend(fontsize=8)
+    ax.grid(True, alpha=0.3, axis="y")
+    plt.xticks(rotation=45, ha="right")
+    fig.tight_layout()
+    fig.savefig(_figures_dir(stem) / "benchmark_real_data.png", dpi=150)
+    plt.close(fig)
+    print(f"  Saved figures/{stem}/benchmark_real_data.png")
+
+
+# ======================================================================
+# Markdown report
+# ======================================================================
+
+
+def _pivot_to_md(pivot_ms, float_fmt=".3f"):
+    """Convert a pandas pivot table (in ms) to a Markdown table string."""
+    lines = []
+    headers = [""] + [str(c) for c in pivot_ms.columns]
+    lines.append("| " + " | ".join(headers) + " |")
+    lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
+    for idx, row in pivot_ms.iterrows():
+        label = str(idx) if not isinstance(idx, tuple) else " / ".join(str(x) for x in idx)
+        cells = [label]
+        for v in row:
+            cells.append(f"{v:{float_fmt}}" if pd.notna(v) else "—")
+        lines.append("| " + " | ".join(cells) + " |")
+    return "\n".join(lines)
+
+
+def _engine_summary_table(data):
+    """Build a comparison table of the three search engines."""
+    return (
+        "| Feature | basic_search | QueryHandler | StringQueryHandler |\n"
+        "| --- | --- | --- | --- |\n"
+        "| Input type | `pd.Series[str]` | `HedString` objects | Raw strings (`str`) |\n"
+        "| Schema required | No | Yes | Optional (via `schema_lookup`) |\n"
+        "| Series-native | Yes (`find_matching`) | No (manual loop) | Yes (`search_series`) |\n"
+        "| Boolean AND | `word1, word2` | `term1 && term2` | same as QH |\n"
+        "| Boolean OR | — | `term1 || term2` | same as QH |\n"
+        "| Negation | `~word` | `~term` | same as QH |\n"
+        "| Exact group `{}` | — | `{term1, term2}` | same as QH |\n"
+        "| Optional exact `{:}` | — | `{term1, term2:}` | same as QH |\n"
+        "| Logical group `[]` | — | `[term1, term2]` | same as QH |\n"
+        "| Wildcard `?/?? /???` | — | Yes | same as QH |\n"
+        "| Descendant wildcard | `*` suffix | `*` suffix | same as QH |\n"
+        '| Quoted exact match | — | `"Exact-tag"` | same as QH |\n'
+        "| Implementation | Regex on text | Recursive tree on parsed nodes | Recursive tree on StringNode |\n"
+    )
+
+
+def _figures_dir(stem: str) -> Path:
+    """Return (and create) the per-run figures subdirectory."""
+    d = _FIGURES_BASE / stem
+    d.mkdir(parents=True, exist_ok=True)
+    return d
+
+
+def generate_markdown_report(data, stem):
+    """Write a comprehensive Markdown report with tables, plots, and analysis."""
+    mode = "quick" if data.get("quick") else "full"
+    lines = []
+
+    def h1(t):
+        lines.extend([f"# {t}", ""])
+
+    def h2(t):
+        lines.extend([f"## {t}", ""])
+
+    def h3(t):
+        lines.extend([f"### {t}", ""])
+
+    def p(t):
+        lines.extend([t, ""])
+
+    def img(alt, path):
+        lines.extend([f"![{alt}]({path})", ""])
+
+    def table(md):
+        lines.extend([md, ""])
+
+    # ------------------------------------------------------------------
+    # Title and overview
+    # ------------------------------------------------------------------
+    h1("HED search benchmark report")
+    p(f"**Run:** {data.get('timestamp', 'unknown')}  ")
+    p(f"**Mode:** {mode}")
+
+    h2("Overview")
+    p("This report compares the performance of the three HED string search engines provided by the `hedtools` package:")
+    p(
+        "1. **basic_search** (`hed.models.basic_search.find_matching`) — regex-based pattern matching "
+        "that operates directly on a `pd.Series` of raw HED strings. No schema required. "
+        "Supports simple boolean AND (`@`), negation (`~`), wildcards (`*`), and parenthesised groups.\n"
+        "2. **QueryHandler** (`hed.models.query_handler.QueryHandler`) — full expression-tree search "
+        "that operates on parsed `HedString` objects. Requires a loaded HED schema. "
+        "Supports AND, OR, negation, exact groups `{}`, optional exact `{:}`, logical groups `[]`, "
+        "wildcard child `?`/`??`/`???`, descendant wildcards, and quoted exact matches.\n"
+        "3. **StringQueryHandler** (`hed.models.string_search.StringQueryHandler`) — lightweight "
+        "tree-based search that operates on raw strings via `StringNode` duck-typing. Schema is "
+        "optional (via `schema_lookup` dict for ancestor queries). Provides `search_series()` "
+        "convenience function for `pd.Series` input. Same query syntax as QueryHandler."
+    )
+
+    h3("Engine capability matrix")
+    table(_engine_summary_table(data))
+
+    # ------------------------------------------------------------------
+    # Key findings (populated from data)
+    # ------------------------------------------------------------------
+    h2("Key findings")
+    findings = []
+
+    # Series speed — use series_size sweep so query and config are consistent;
+    # report ratio at the largest row count tested.
+    series_recs = data.get("series", [])
+    _sweep_recs = data.get("factor_sweeps", [])
+    if _sweep_recs:
+        swdf_series = pd.DataFrame(_sweep_recs)
+        ss = swdf_series[swdf_series["factor"] == "series_size"]
+        if not ss.empty:
+            max_level = ss["level"].max()
+            at_max = ss[ss["level"] == max_level]
+            bs_row = at_max[at_max["engine"] == "basic_search"]["time"]
+            qh_row = at_max[at_max["engine"] == "QueryHandler_loop"]["time"]
+            if not bs_row.empty and not qh_row.empty and bs_row.values[0] > 0:
+                ratio = qh_row.values[0] / bs_row.values[0]
+                findings.append(
+                    f"**Series throughput:** `basic_search` is ~{ratio:.0f}× faster than "
+                    f"`QueryHandler` in a row-by-row loop at {max_level:,} rows, "
+                    f"because it leverages vectorised pandas `str.contains` regex matching."
+                )
+    elif series_recs:
+        sdf = pd.DataFrame(series_recs)
+        # Group by engine + n_rows, then take the median across queries at each row count;
+        # report the ratio at the largest row count to avoid mixing incomparable workloads.
+        per_nrows = sdf.groupby(["engine", "n_rows"])["total_time"].median().reset_index()
+        max_nrows = per_nrows["n_rows"].max()
+        at_max = per_nrows[per_nrows["n_rows"] == max_nrows]
+        bs_row = at_max[at_max["engine"] == "basic_search"]["total_time"]
+        qh_row = at_max[at_max["engine"] == "QueryHandler_loop"]["total_time"]
+        if not bs_row.empty and not qh_row.empty and bs_row.values[0] > 0:
+            ratio = qh_row.values[0] / bs_row.values[0]
+            findings.append(
+                f"**Series throughput:** `basic_search` is ~{ratio:.0f}× faster than "
+                f"`QueryHandler` in a row-by-row loop at {max_nrows:,} rows, "
+                f"because it leverages vectorised pandas `str.contains` regex matching."
+            )
+
+    # SQH vs QH per string
+    single_recs = data.get("single_string", [])
+    if single_recs:
+        ssdf = pd.DataFrame(single_recs)
+        qh_avg = ssdf[ssdf["engine"] == "QueryHandler"]["total_time"].mean()
+        sqh_avg = ssdf[ssdf["engine"] == "StringQueryHandler_no_lookup"]["total_time"].mean()
+        if qh_avg > 0 and sqh_avg > 0:
+            pct = (1 - sqh_avg / qh_avg) * 100
+            findings.append(
+                f"**Single-string speed:** `StringQueryHandler` (no lookup) is ~{pct:.0f}% "
+                f"faster than `QueryHandler` per string because it avoids schema-based "
+                f"`HedString` construction and uses lightweight string parsing."
+            )
+
+    # Schema lookup cost
+    sweeps = data.get("factor_sweeps", [])
+    if sweeps:
+        swdf = pd.DataFrame(sweeps)
+        lu = swdf[swdf["factor"] == "schema_lookup"]
+        if not lu.empty:
+            with_lu = lu[lu["level"] == "with_lookup"]["time"].mean()
+            no_lu = lu[lu["level"] == "no_lookup"]["time"].mean()
+            if no_lu > 0:
+                lu_pct = ((with_lu / no_lu) - 1) * 100
+                if abs(lu_pct) < 5:
+                    findings.append(
+                        "**Schema-lookup overhead:** Enabling `schema_lookup` in "
+                        "`StringQueryHandler` has negligible overhead for simple queries "
+                        "(cost comes from queries that actually use ancestor matching)."
+                    )
+                else:
+                    findings.append(
+                        f"**Schema-lookup overhead:** Enabling `schema_lookup` in "
+                        f"`StringQueryHandler` adds ~{lu_pct:.0f}% overhead for "
+                        f"ancestor-based queries."
+                    )
+
+    # Deep nesting
+    if sweeps:
+        nest_df = swdf[swdf["factor"] == "nesting_depth"]
+        if not nest_df.empty:
+            for eng in ["QueryHandler", "SQH_with_lookup"]:
+                edf = nest_df[nest_df["engine"] == eng].sort_values("level")
+                if len(edf) >= 2:
+                    t0 = edf.iloc[0]["time"]
+                    t_last = edf.iloc[-1]["time"]
+                    if t0 > 0:
+                        ratio = t_last / t0
+                        findings.append(
+                            f"**Nesting depth ({eng}):** At depth {edf.iloc[-1]['level']}, "
+                            f"search time is ~{ratio:.1f}× the flat-string time."
+                        )
+
+    # basic_search operation limitations
+    if sweeps:
+        po = swdf[swdf["factor"] == "per_operation"]
+        if not po.empty:
+            total = po["level"].nunique()
+            bs_supported = po[po["engine"] == "basic_search"]["level"].nunique()
+            unsupported = total - bs_supported
+            if unsupported > 0:
+                findings.append(
+                    f"**Operation coverage:** `basic_search` supports "
+                    f"{bs_supported} of {total} tested operations. "
+                    f"The remaining {unsupported} operations (OR, exact groups, logical groups, "
+                    f"wildcards `?`/`??`/`???`, quoted terms) require `QueryHandler` or "
+                    f"`StringQueryHandler`."
+                )
+
+    for f in findings:
+        p(f"- {f}")
+
+    # ------------------------------------------------------------------
+    # Single-string results
+    # ------------------------------------------------------------------
+    if single_recs:
+        h2("Single-string performance")
+        p(
+            "Each query was applied to a single HED string of varying complexity. "
+            "Times are medians of repeated runs, in milliseconds."
+        )
+        ssdf = pd.DataFrame(single_recs)
+        pivot = (
+            ssdf.pivot_table(
+                index=["config_label", "query_label"], columns="engine", values="total_time", aggfunc="first"
+            )
+            * 1000
+        )
+        table(_pivot_to_md(pivot))
+
+        img("Query × Engine heatmap", f"../figures/{stem}/benchmark_query_heatmap.png")
+
+    # ------------------------------------------------------------------
+    # Series results
+    # ------------------------------------------------------------------
+    if series_recs:
+        h2("Series performance")
+        p(
+            "Whole-series search: each engine processes all rows of a `pd.Series` for a "
+            "given query. `basic_search` uses vectorised regex; `search_series` uses "
+            "`StringQueryHandler.search()` per row; `QueryHandler_loop` parses each row "
+            "into a `HedString` then searches. Times in milliseconds."
+        )
+        sdf = pd.DataFrame(series_recs)
+        pivot = (
+            sdf.pivot_table(
+                index=["config_label", "query_label"], columns="engine", values="total_time", aggfunc="first"
+            )
+            * 1000
+        )
+        table(_pivot_to_md(pivot))
+
+        img("Series scaling", f"../figures/{stem}/benchmark_series_scaling.png")
+
+    # ------------------------------------------------------------------
+    # Factor sweeps
+    # ------------------------------------------------------------------
+    h2("Factor sweeps")
+    p("Each sweep varies a single factor while holding others constant, measuring how performance degrades.")
+
+    factor_descriptions = {
+        "tag_count": (
+            "Number of tags in the HED string (1 to 100). basic_search time is dominated by "
+            "regex compilation overhead and stays roughly constant; tree-based engines scale "
+            "linearly with the number of nodes to traverse."
+        ),
+        "nesting_depth": (
+            "Parenthesisation depth from 0 (flat) to 20. Deeper nesting increases the tree "
+            "walk for QueryHandler/StringQueryHandler. basic_search sees variable cost because "
+            "deeper nesting means more delimiter positions for its cartesian-product verification."
+        ),
+        "repeated_tags": (
+            "Repetitions of a target tag (0 to 40). basic_search's `verify_search_delimiters` "
+            "uses `itertools.product` over delimiter positions; repeated tags multiply the "
+            "search space. Tree-based engines are unaffected."
+        ),
+        "group_count": (
+            "Number of parenthesised groups (1 to 20). More groups mean more children at the "
+            "top level for tree traversal."
+        ),
+        "series_size": (
+            "Number of rows in the Series (10 to 5000). basic_search scales sub-linearly "
+            "thanks to vectorised pandas regex. All other engines scale linearly (per-row cost "
+            "is fixed)."
+        ),
+        "query_complexity": (
+            "Query expression complexity from a bare term to a multi-clause composite. "
+            "More clauses = more expression-tree nodes to evaluate per candidate."
+        ),
+        "schema_lookup": (
+            "StringQueryHandler with vs without the `schema_lookup` dictionary. The lookup "
+            "enables ancestor-based matching (e.g. `Event` matches `Sensory-event`) at a cost."
+        ),
+        "string_form": (
+            "Short-form vs long-form HED strings. Long-form strings have fully expanded "
+            "paths (e.g. `Event/Sensory-event`) and are longer, increasing regex and parse cost."
+        ),
+        "compile_vs_search": (
+            "Decomposition of one-time query compilation cost vs per-string search cost. "
+            "Compilation is cheap for both engines; the per-search cost dominates."
+        ),
+        "per_operation": (
+            "Individual operation types tested in isolation. Shows which operations are "
+            "expensive for each engine. basic_search shows NaN/— for unsupported operations."
+        ),
+    }
+
+    # Deep nesting sub-sweeps
+    for rec in sweeps:
+        factor = rec["factor"]
+        if factor.startswith("deep_nest_") and factor not in factor_descriptions:
+            query_type = factor.replace("deep_nest_", "").replace("_", " ")
+            factor_descriptions[factor] = (
+                f"Deep nesting sweep for *{query_type}* queries at depths 1–20. "
+                f"Shows how nesting interacts with specific query patterns."
+            )
+
+    factors = sorted({rec["factor"] for rec in sweeps})
+    for factor in factors:
+        h3(factor.replace("_", " ").title())
+        desc = factor_descriptions.get(factor, "")
+        if desc:
+            p(desc)
+
+        # Inline table for this factor
+        sub = pd.DataFrame([r for r in sweeps if r["factor"] == factor])
+        pivot = sub.pivot_table(index="level", columns="engine", values="time", aggfunc="first") * 1000
+        table(_pivot_to_md(pivot))
+
+        img(factor, f"../figures/{stem}/benchmark_sweep_{factor}.png")
+
+    # ------------------------------------------------------------------
+    # Real data
+    # ------------------------------------------------------------------
+    real_recs = data.get("real_data", [])
+    if real_recs:
+        h2("Real BIDS data")
+        n_rows = real_recs[0].get("n_rows", "?")
+        p(
+            f"Search over {n_rows} rows of real BIDS event data "
+            f"(`eeg_ds003645s_hed` test dataset, HED_column values). "
+            f"Times in milliseconds."
+        )
+        rdf = pd.DataFrame(real_recs)
+        pivot = rdf.pivot_table(index="query_label", columns="engine", values="total_time", aggfunc="first") * 1000
+        table(_pivot_to_md(pivot))
+        img("Real BIDS data", f"../figures/{stem}/benchmark_real_data.png")
+
+    # ------------------------------------------------------------------
+    # Recommendations
+    # ------------------------------------------------------------------
+    h2("Recommendations")
+    p(
+        "**Choose `basic_search` when:** You need the fastest possible series-level search, "
+        "your queries use only simple terms, AND, negation, or descendant wildcards (`*`), "
+        "and you don't need schema-aware matching. Ideal for filtering event files where "
+        "speed matters and queries are simple."
+    )
+    p(
+        "**Choose `StringQueryHandler` when:** You need the full query language (OR, exact "
+        "groups, logical groups, wildcards) but want to avoid the overhead of parsing every "
+        "HED string through the schema. `search_series()` is the best general-purpose "
+        "option when operating on raw strings from tabular files."
+    )
+    p(
+        "**Choose `QueryHandler` when:** You already have parsed `HedString` objects (e.g. "
+        "from validation pipelines), or you need exact schema-validated matching. The "
+        "additional overhead comes from `HedString` construction, not the search itself."
+    )
+
+    # ------------------------------------------------------------------
+    # Methodology
+    # ------------------------------------------------------------------
+    h2("Methodology")
+    p(
+        f"- **Timing:** `timeit` with {20 if not data.get('quick') else 10} iterations "
+        f"(single-string), {5 if not data.get('quick') else 3} iterations (series), "
+        f"{10 if not data.get('quick') else 5} iterations (sweeps). Median of all iterations reported.\n"
+        f"- **Schema:** HED 8.4.0 loaded once and reused across all benchmarks.\n"
+        f"- **Data generation:** Synthetic strings built from real schema tags with controlled "
+        f"tag count, nesting depth, group count, and tag repetition.\n"
+        f"- **schema_lookup:** Generated via `generate_schema_lookup(schema)` — a dict mapping "
+        f"each short tag to its ancestor tuple.\n"
+        f"- **Environment:** Results depend on hardware; relative ratios between engines are "
+        f"the meaningful comparison."
+    )
+
+    # Write
+    report_path = RESULTS_DIR / f"{stem}_report.md"
+    report_path.write_text("\n".join(lines), encoding="utf-8")
+    print(f"  Saved {report_path}")
+
+
+# ======================================================================
+# Main
+# ======================================================================
+
+
+def main(path=None):
+    data, stem = load_results(path)
+
+    # Console summaries
+    print_single_string_summary(data)
+    print_series_summary(data)
+    print_sweep_summary(data)
+    print_real_data_summary(data)
+
+    # Plots
+    print("\nGenerating plots…")
+    plot_factor_sweep(data, stem)
+    plot_series_scaling(data, stem)
+    plot_compile_vs_search(data, stem)
+    plot_query_complexity_heatmap(data, stem)
+    plot_real_data(data, stem)
+
+    # Markdown
+    print("\nGenerating Markdown report…")
+    generate_markdown_report(data, stem)
+
+    print("\nDone.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate benchmark report")
+    parser.add_argument("results_file", nargs="?", default=None, help="Path to results JSON")
+    args = parser.parse_args()
+    main(args.results_file)
diff --git a/benchmarks/search_benchmark.py b/benchmarks/search_benchmark.py
new file mode 100644
index 000000000..3cefdfd0c
--- /dev/null
+++ b/benchmarks/search_benchmark.py
@@ -0,0 +1,747 @@
+"""HED search performance benchmark harness.
+
+Measures compilation time, single-string search time, and series search time
+for all three HED search engines across a matrix of query types + data configs.
+
+Usage::
+
+    python search_benchmark.py              # full benchmark
+    python search_benchmark.py --quick      # fast smoke-test
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+import timeit
+import tracemalloc
+from datetime import datetime
+from pathlib import Path
+
+import pandas as pd
+
+# Ensure the repo root is importable when running the script directly
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from hed import HedString, QueryHandler  # noqa: E402
+from hed.models.basic_search import find_matching  # noqa: E402
+from hed.models.string_search import StringQueryHandler, search_series  # noqa: E402
+
+from data_generator import DataGenerator  # noqa: E402
+
+RESULTS_DIR = Path(__file__).parent / "results"
+RESULTS_DIR.mkdir(exist_ok=True)
+
+
+# ======================================================================
+# Timing helpers
+# ======================================================================
+
+
+def time_it(func, n_runs=5):
+    """Return (median_seconds, all_times) for calling *func* n_runs times."""
+    times = []
+    for _ in range(n_runs):
+        t = timeit.timeit(func, number=1)
+        times.append(t)
+    times.sort()
+    median = times[len(times) // 2]
+    return median, times
+
+
+def measure_memory(func):
+    """Return peak memory (bytes) used by *func*."""
+    tracemalloc.start()
+    func()
+    _, peak = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+    return peak
+
+
+# ======================================================================
+# Query definitions  — (label, basic_search_query, qh_query)
+#   basic_search_query = None means "not supported by basic_search"
+# ======================================================================
+
+QUERIES = [
+    # --- Simple terms ---
+    ("single_bare_term", "@Event", "Event"),
+    ("single_exact_term", None, '"Event"'),
+    ("single_wildcard", "Def/*", "Def/*"),
+    # --- Boolean ---
+    ("two_term_and", "@Event, @Action", "Event && Action"),
+    ("two_term_or", None, "Event || Action"),
+    ("negation", "~Event", "~Event"),
+    # --- Groups ---
+    ("group_nesting", "(Event, Action)", "[Event && Action]"),
+    ("exact_group", None, "{Event && Action}"),
+    ("exact_group_optional", None, "{Event && Action: Agent}"),
+    ("wildcard_child", None, "{Event, ?}"),
+    # --- Complex ---
+    ("three_term_and", "@Event, @Action, @Agent", "Event && Action && Agent"),
+    ("complex_composite", None, "{(Onset || Offset), (Def || {Def-expand}): ???}"),
+]
+
+
+# ======================================================================
+# Single-string benchmarks
+# ======================================================================
+
+
+class SingleStringBenchmark:
+    """Benchmark each engine on a single HED string."""
+
+    def __init__(self, gen: DataGenerator, n_runs=20):
+        self.gen = gen
+        self.schema = gen.schema
+        self.lookup = gen.lookup
+        self.n_runs = n_runs
+
+    def run_all(self, string_configs):
+        """Run all queries against all string configurations.
+
+        Parameters:
+            string_configs: list of dicts with keys matching DataGenerator.make_string params
+                plus a 'label' key for identification.
+
+        Returns:
+            list[dict]: One record per (query, config, engine) combination.
+        """
+        records = []
+        for cfg in string_configs:
+            label = cfg.pop("label")
+            raw = self.gen.make_string(**cfg)
+            cfg["label"] = label  # restore
+
+            for q_label, bs_query, qh_query in QUERIES:
+                # --- basic_search ---
+                if bs_query is not None:
+                    rec = self._bench_basic(raw, bs_query, label, q_label)
+                    records.append(rec)
+
+                # --- QueryHandler ---
+                rec = self._bench_query_handler(raw, qh_query, label, q_label)
+                records.append(rec)
+
+                # --- StringQueryHandler (no lookup) ---
+                rec = self._bench_string_qh(raw, qh_query, label, q_label, schema_lookup=None, suffix="no_lookup")
+                records.append(rec)
+
+                # --- StringQueryHandler (with lookup) ---
+                rec = self._bench_string_qh(
+                    raw, qh_query, label, q_label, schema_lookup=self.lookup, suffix="with_lookup"
+                )
+                records.append(rec)
+
+        return records
+
+    def _bench_basic(self, raw, query, cfg_label, q_label):
+        series = pd.Series([raw])
+        # Compilation (regex build is inside find_matching, not separable easily)
+        med, _ = time_it(lambda: find_matching(series, query), self.n_runs)
+        matches = int(find_matching(series, query).sum())
+        return {
+            "engine": "basic_search",
+            "query_label": q_label,
+            "config_label": cfg_label,
+            "query": query,
+            "compile_time": None,  # not separable
+            "search_time": med,
+            "total_time": med,
+            "matches": matches,
+        }
+
+    def _bench_query_handler(self, raw, query, cfg_label, q_label):
+        # Compilation
+        comp_med, _ = time_it(lambda: QueryHandler(query), self.n_runs)
+        qh = QueryHandler(query)
+
+        # Need to parse HedString each time (part of the cost)
+        def do_search():
+            hs = HedString(raw, self.schema)
+            return qh.search(hs)
+
+        search_med, _ = time_it(do_search, self.n_runs)
+        result = do_search()
+        return {
+            "engine": "QueryHandler",
+            "query_label": q_label,
+            "config_label": cfg_label,
+            "query": query,
+            "compile_time": comp_med,
+            "search_time": search_med,
+            "total_time": comp_med + search_med,
+            "matches": len(result),
+        }
+
+    def _bench_string_qh(self, raw, query, cfg_label, q_label, schema_lookup, suffix):
+        comp_med, _ = time_it(lambda: StringQueryHandler(query), self.n_runs)
+        sqh = StringQueryHandler(query)
+        search_med, _ = time_it(lambda: sqh.search(raw, schema_lookup=schema_lookup), self.n_runs)
+        result = sqh.search(raw, schema_lookup=schema_lookup)
+        return {
+            "engine": f"StringQueryHandler_{suffix}",
+            "query_label": q_label,
+            "config_label": cfg_label,
+            "query": query,
+            "compile_time": comp_med,
+            "search_time": search_med,
+            "total_time": comp_med + search_med,
+            "matches": len(result),
+        }
+
+
+# ======================================================================
+# Series benchmarks
+# ======================================================================
+
+
+class SeriesBenchmark:
+    """Benchmark each engine on a pd.Series of HED strings."""
+
+    def __init__(self, gen: DataGenerator, n_runs=5):
+        self.gen = gen
+        self.schema = gen.schema
+        self.lookup = gen.lookup
+        self.n_runs = n_runs
+
+    def run_all(self, series_configs):
+        """Run selected queries against series of varying size.
+
+        Parameters:
+            series_configs: list of dicts with keys 'label', 'n_rows', plus
+                DataGenerator.make_series params.
+
+        Returns:
+            list[dict]: One record per (query, config, engine) combination.
+        """
+        records = []
+        for cfg in series_configs:
+            label = cfg.pop("label")
+            n_rows = cfg["n_rows"]
+            series = self.gen.make_series(**cfg)
+            cfg["label"] = label  # restore
+
+            # Use a subset of queries for series (too slow to run all × all)
+            # For small series test all; for large ones test representative subset
+            queries_to_test = QUERIES if n_rows <= 500 else QUERIES[:6]
+            for q_label, bs_query, qh_query in queries_to_test:
+                print(f"  Series {label} | {q_label} ({n_rows} rows)…")
+
+                # --- basic_search ---
+                if bs_query is not None:
+                    rec = self._bench_basic_series(series, bs_query, label, q_label, n_rows)
+                    records.append(rec)
+
+                # --- search_series (StringQueryHandler) no lookup ---
+                rec = self._bench_search_series(series, qh_query, label, q_label, n_rows, None, "no_lookup")
+                records.append(rec)
+
+                # --- search_series (StringQueryHandler) with lookup ---
+                rec = self._bench_search_series(series, qh_query, label, q_label, n_rows, self.lookup, "with_lookup")
+                records.append(rec)
+
+                # --- QueryHandler loop ---
+                rec = self._bench_qh_loop(series, qh_query, label, q_label, n_rows)
+                records.append(rec)
+
+        return records
+
+    def _bench_basic_series(self, series, query, cfg_label, q_label, n_rows):
+        med, _ = time_it(lambda: find_matching(series, query), self.n_runs)
+        matches = int(find_matching(series, query).sum())
+        return {
+            "engine": "basic_search",
+            "query_label": q_label,
+            "config_label": cfg_label,
+            "n_rows": n_rows,
+            "total_time": med,
+            "per_row": med / n_rows,
+            "matches": matches,
+        }
+
+    def _bench_search_series(self, series, query, cfg_label, q_label, n_rows, lookup, suffix):
+        med, _ = time_it(lambda: search_series(series, query, schema_lookup=lookup), self.n_runs)
+        matches = int(search_series(series, query, schema_lookup=lookup).sum())
+        return {
+            "engine": f"search_series_{suffix}",
+            "query_label": q_label,
+            "config_label": cfg_label,
+            "n_rows": n_rows,
+            "total_time": med,
+            "per_row": med / n_rows,
+            "matches": matches,
+        }
+
+    def _bench_qh_loop(self, series, query, cfg_label, q_label, n_rows):
+        qh = QueryHandler(query)
+        schema = self.schema
+
+        def do_all():
+            for s in series:
+                if pd.notna(s) and s:
+                    hs = HedString(s, schema)
+                    qh.search(hs)
+
+        med, _ = time_it(do_all, self.n_runs)
+        # count matches
+        count = 0
+        for s in series:
+            if pd.notna(s) and s:
+                hs = HedString(s, schema)
+                if qh.search(hs):
+                    count += 1
+        return {
+            "engine": "QueryHandler_loop",
+            "query_label": q_label,
+            "config_label": cfg_label,
+            "n_rows": n_rows,
+            "total_time": med,
+            "per_row": med / n_rows,
+            "matches": count,
+        }
+
+
+# ======================================================================
+# Factor sweeps
+# ======================================================================
+
+
+class FactorSweep:
+    """Isolate the effect of one variable on performance."""
+
+    def __init__(self, gen: DataGenerator, n_runs=10):
+        self.gen = gen
+        self.schema = gen.schema
+        self.lookup = gen.lookup
+        self.n_runs = n_runs
+
+    def sweep_tag_count(self, tag_counts=(1, 5, 10, 25, 50, 100)):
+        """Vary number of tags per string, fixed simple query."""
+        query = "Event"
+        bs_query = "@Event"
+        records = []
+        for nt in tag_counts:
+            raw = self.gen.make_string(n_tags=nt)
+            for engine, med in self._bench_all_engines(raw, query, bs_query):
+                records.append({"factor": "tag_count", "level": nt, "engine": engine, "time": med})
+        return records
+
+    def sweep_nesting_depth(self, depths=(0, 1, 2, 3, 5, 10, 15, 20)):
+        """Vary nesting depth using deeply nested strings."""
+        query = "Event"
+        bs_query = "@Event"
+        records = []
+        for d in depths:
+            if d == 0:
+                raw = self.gen.make_string(n_tags=10)
+            else:
+                raw = self.gen.make_deeply_nested_string(depth=d, tags_per_level=2)
+            for engine, med in self._bench_all_engines(raw, query, bs_query):
+                records.append({"factor": "nesting_depth", "level": d, "engine": engine, "time": med})
+        return records
+
+    def sweep_repeated_tags(self, repeat_counts=(0, 3, 5, 10, 20, 40)):
+        """Vary duplicate tag count — stresses basic_search cartesian product.
+
+        Uses strings that actually contain 'Event' and 'Action' as the repeated
+        tags so the group query ``(Event, Action)`` triggers combinatorial matching.
+        """
+        query = "(Event, Action)"
+        bs_query = "(Event, Action)"
+        records = []
+        for r in repeat_counts:
+            raw = self.gen.make_string_with_specific_tags(
+                ["Event", "Action"], n_extra=3, n_groups=1, depth=1, repeats=r
+            )
+            for engine, med in self._bench_all_engines(raw, query, bs_query):
+                records.append({"factor": "repeated_tags", "level": r, "engine": engine, "time": med})
+        return records
+
+    def sweep_group_count(self, group_counts=(0, 1, 5, 10, 20)):
+        """Vary number of groups per string."""
+        query = "Event"
+        bs_query = "@Event"
+        records = []
+        for ng in group_counts:
+            raw = self.gen.make_string(n_tags=max(10, ng * 2 + 3), n_groups=ng, depth=1)
+            for engine, med in self._bench_all_engines(raw, query, bs_query):
+                records.append({"factor": "group_count", "level": ng, "engine": engine, "time": med})
+        return records
+
+    def sweep_series_size(self, sizes=(10, 100, 500, 1000, 5000)):
+        """Vary series length."""
+        query = "Event"
+        bs_query = "@Event"
+        records = []
+        for n in sizes:
+            series = self.gen.make_series(n_rows=n, n_tags=10, n_groups=2, depth=1)
+            for engine, med in self._bench_series_engines(series, query, bs_query, n):
+                records.append({"factor": "series_size", "level": n, "engine": engine, "time": med, "per_row": med / n})
+        return records
+
+    def sweep_query_complexity(self):
+        """Compare queries of increasing complexity."""
+        raw = self.gen.make_string(n_tags=20, n_groups=5, depth=2)
+        complexity_queries = [
+            ("1_single_term", "@Event", "Event"),
+            ("2_two_and", "@Event, @Action", "Event && Action"),
+            ("3_three_and", "@Event, @Action, @Agent", "Event && Action && Agent"),
+            ("4_or", None, "Event || Action"),
+            ("5_negation", "~Event", "~Event"),
+            ("6_group", "(Event, Action)", "[Event && Action]"),
+            ("7_exact", None, "{Event && Action}"),
+            ("8_complex", None, "{(Onset || Offset), (Def || {Def-expand}): ???}"),
+        ]
+        records = []
+        for clabel, bs_q, qh_q in complexity_queries:
+            for engine, med in self._bench_all_engines(raw, qh_q, bs_q):
+                records.append({"factor": "query_complexity", "level": clabel, "engine": engine, "time": med})
+        return records
+
+    def sweep_schema_lookup(self):
+        """Compare StringQueryHandler with vs without schema_lookup."""
+        raw = self.gen.make_string(n_tags=15, n_groups=3, depth=1)
+        query = "Event"
+        sqh = StringQueryHandler(query)
+        records = []
+        for with_lookup in [False, True]:
+            lk = self.lookup if with_lookup else None
+            label = "with_lookup" if with_lookup else "no_lookup"
+            med, _ = time_it(lambda lk=lk: sqh.search(raw, schema_lookup=lk), self.n_runs)
+            records.append({"factor": "schema_lookup", "level": label, "engine": "StringQueryHandler", "time": med})
+        return records
+
+    def sweep_string_form(self):
+        """Compare short vs long form strings."""
+        query = "Event"
+        bs_query = "@Event"
+        records = []
+        for form in ["short", "long"]:
+            raw = self.gen.make_string(n_tags=15, n_groups=3, depth=1, form=form)
+            for engine, med in self._bench_all_engines(raw, query, bs_query):
+                records.append({"factor": "string_form", "level": form, "engine": engine, "time": med})
+        return records
+
+    def sweep_compilation_vs_search(self):
+        """Separate compilation cost from per-search cost."""
+        raw = self.gen.make_string(n_tags=15, n_groups=3, depth=1)
+        query = "Event"
+        records = []
+
+        # QueryHandler
+        comp, _ = time_it(lambda: QueryHandler(query), self.n_runs)
+        qh = QueryHandler(query)
+
+        def qh_search():
+            hs = HedString(raw, self.schema)
+            qh.search(hs)
+
+        search_med, _ = time_it(qh_search, self.n_runs)
+        records.append({"factor": "compile_vs_search", "level": "compile", "engine": "QueryHandler", "time": comp})
+        records.append({"factor": "compile_vs_search", "level": "search", "engine": "QueryHandler", "time": search_med})
+
+        # StringQueryHandler
+        comp2, _ = time_it(lambda: StringQueryHandler(query), self.n_runs)
+        sqh = StringQueryHandler(query)
+        search_med2, _ = time_it(lambda: sqh.search(raw, schema_lookup=self.lookup), self.n_runs)
+        records.append(
+            {"factor": "compile_vs_search", "level": "compile", "engine": "StringQueryHandler", "time": comp2}
+        )
+        records.append(
+            {"factor": "compile_vs_search", "level": "search", "engine": "StringQueryHandler", "time": search_med2}
+        )
+
+        return records
+
+    def sweep_per_operation(self):
+        """Test every query operation type on the same string.
+
+        Uses a string with enough structure to exercise all operations:
+        groups, nested groups, Def tags, Onset, etc.
+        """
+        # Build a string with structure that can match all query types
+        raw = (
+            "Sensory-event, Action, Agent, "
+            "(Event, (Onset, (Def/MyDef))), "
+            "(Offset, Item, (Def-expand/MyDef, (Red, Blue))), "
+            "(Visual-presentation, Square, Green)"
+        )
+
+        operation_queries = [
+            # (label, basic_search_query, qh_query)
+            ("bare_term", "@Event", "Event"),
+            ("exact_quoted", None, '"Sensory-event"'),
+            ("wildcard_prefix", "Def/*", "Def/*"),
+            ("and_2", "@Event, @Action", "Event && Action"),
+            ("and_3", "@Event, @Action, @Agent", "Event && Action && Agent"),
+            ("or", None, "Event || Action"),
+            ("negation", "~Event", "~Event"),
+            ("nested_group_[]", "(Event, Action)", "[Event && Action]"),
+            ("exact_group_{}", None, "{Event && Action}"),
+            ("exact_optional_{:}", None, "{Event && Action: Agent}"),
+            ("wildcard_?", None, "{Event, ?}"),
+            ("wildcard_??", None, "{Event, ??}"),
+            ("wildcard_???", None, "{Event, ???}"),
+            ("descendant_nested", None, "[Def && Onset]"),
+            ("complex_onset_def", None, "{(Onset || Offset), (Def || {Def-expand}): ???}"),
+            ("deep_and_chain", "@Event, @Action, @Agent, @Item, @Red", "Event && Action && Agent && Item && Red"),
+            ("nested_or_and", None, "(Event || Sensory-event) && (Action || Agent)"),
+            ("double_negation", None, "~(~Event)"),
+        ]
+
+        records = []
+        for op_label, bs_q, qh_q in operation_queries:
+            for engine, med in self._bench_all_engines(raw, qh_q, bs_q):
+                records.append({"factor": "per_operation", "level": op_label, "engine": engine, "time": med})
+        return records
+
+    def sweep_deep_nesting_by_query(self):
+        """Test how different query types perform on deeply nested strings."""
+        depths = [1, 5, 10, 20]
+        queries = [
+            ("bare_term", "@Event", "Event"),
+            ("two_and", "@Event, @Action", "Event && Action"),
+            ("group_match", "(Event, Action)", "[Event && Action]"),
+            ("exact_group", None, "{Event && Action}"),
+            ("negation", "~Event", "~Event"),
+        ]
+        records = []
+        for d in depths:
+            raw = self.gen.make_deeply_nested_string(depth=d, tags_per_level=2)
+            for q_label, bs_q, qh_q in queries:
+                for engine, med in self._bench_all_engines(raw, qh_q, bs_q):
+                    records.append(
+                        {
+                            "factor": f"deep_nest_{q_label}",
+                            "level": d,
+                            "engine": engine,
+                            "time": med,
+                        }
+                    )
+        return records
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _bench_all_engines(self, raw, qh_query, bs_query=None):
+        """Yield (engine_name, median_time) for all engines on a single string."""
+        series1 = pd.Series([raw])
+
+        # basic_search
+        if bs_query is not None:
+            med, _ = time_it(lambda: find_matching(series1, bs_query), self.n_runs)
+            yield "basic_search", med
+
+        # QueryHandler
+        qh = QueryHandler(qh_query)
+
+        def qh_search():
+            hs = HedString(raw, self.schema)
+            qh.search(hs)
+
+        med, _ = time_it(qh_search, self.n_runs)
+        yield "QueryHandler", med
+
+        # StringQueryHandler no lookup
+        sqh = StringQueryHandler(qh_query)
+        med, _ = time_it(lambda: sqh.search(raw, schema_lookup=None), self.n_runs)
+        yield "SQH_no_lookup", med
+
+        # StringQueryHandler with lookup
+        med, _ = time_it(lambda: sqh.search(raw, schema_lookup=self.lookup), self.n_runs)
+        yield "SQH_with_lookup", med
+
+    def _bench_series_engines(self, series, qh_query, bs_query, n_rows):
+        """Yield (engine_name, median_time) for series-level engines."""
+        # basic_search
+        if bs_query is not None:
+            med, _ = time_it(lambda: find_matching(series, bs_query), max(3, self.n_runs // 2))
+            yield "basic_search", med
+
+        # search_series no lookup
+        med, _ = time_it(lambda: search_series(series, qh_query, schema_lookup=None), max(3, self.n_runs // 2))
+        yield "search_series_no_lookup", med
+
+        # search_series with lookup
+        med, _ = time_it(lambda: search_series(series, qh_query, schema_lookup=self.lookup), max(3, self.n_runs // 2))
+        yield "search_series_with_lookup", med
+
+        # QueryHandler loop
+        qh = QueryHandler(qh_query)
+        schema = self.schema
+
+        def qh_loop():
+            for s in series:
+                if pd.notna(s) and s:
+                    hs = HedString(s, schema)
+                    qh.search(hs)
+
+        med, _ = time_it(qh_loop, max(3, self.n_runs // 2))
+        yield "QueryHandler_loop", med
+
+
+# ======================================================================
+# Main orchestrator
+# ======================================================================
+
+
+def run_full_benchmark(quick=False):
+    """Run the complete benchmark suite and save results."""
+    print("Initialising DataGenerator (loading schema)…")
+    gen = DataGenerator()
+
+    n_single = 10 if quick else 20
+    n_series = 3 if quick else 5
+    n_sweep = 5 if quick else 10
+
+    # ------------------------------------------------------------------
+    # 1. Single-string benchmark
+    # ------------------------------------------------------------------
+    print("\n=== Single-string benchmarks ===")
+    ssb = SingleStringBenchmark(gen, n_runs=n_single)
+
+    string_configs = [
+        {"label": "tiny_1tag", "n_tags": 1},
+        {"label": "small_5tag", "n_tags": 5},
+        {"label": "medium_10tag", "n_tags": 10, "n_groups": 2, "depth": 1},
+        {"label": "large_25tag", "n_tags": 25, "n_groups": 5, "depth": 2},
+        {"label": "xlarge_50tag", "n_tags": 50, "n_groups": 10, "depth": 2},
+    ]
+    if not quick:
+        string_configs.append({"label": "xxlarge_100tag", "n_tags": 100, "n_groups": 15, "depth": 3})
+    single_results = ssb.run_all(string_configs)
+    print(f"  Collected {len(single_results)} single-string records.")
+
+    # ------------------------------------------------------------------
+    # 2. Series benchmark
+    # ------------------------------------------------------------------
+    print("\n=== Series benchmarks ===")
+    sb = SeriesBenchmark(gen, n_runs=n_series)
+
+    if quick:
+        series_sizes = [10, 100, 500]
+    else:
+        series_sizes = [10, 100, 500, 1000, 5000]
+
+    series_configs = []
+    for n in series_sizes:
+        series_configs.append({"label": f"homo_{n}", "n_rows": n, "n_tags": 10, "n_groups": 2, "depth": 1})
+    for n in [100, 1000] if not quick else [100]:
+        series_configs.append({"label": f"hetero_{n}", "n_rows": n, "n_tags": 10, "heterogeneous": True})
+
+    series_results = sb.run_all(series_configs)
+    print(f"  Collected {len(series_results)} series records.")
+
+    # ------------------------------------------------------------------
+    # 3. Factor sweeps
+    # ------------------------------------------------------------------
+    print("\n=== Factor sweeps ===")
+    fs = FactorSweep(gen, n_runs=n_sweep)
+
+    sweep_results = []
+    for name, method in [
+        ("tag_count", fs.sweep_tag_count),
+        ("nesting_depth", fs.sweep_nesting_depth),
+        ("repeated_tags", fs.sweep_repeated_tags),
+        ("group_count", fs.sweep_group_count),
+        ("series_size", fs.sweep_series_size),
+        ("query_complexity", fs.sweep_query_complexity),
+        ("schema_lookup", fs.sweep_schema_lookup),
+        ("string_form", fs.sweep_string_form),
+        ("compile_vs_search", fs.sweep_compilation_vs_search),
+        ("per_operation", fs.sweep_per_operation),
+        ("deep_nesting_by_query", fs.sweep_deep_nesting_by_query),
+    ]:
+        print(f"  Sweep: {name}")
+        sweep_results.extend(method())
+
+    print(f"  Collected {len(sweep_results)} sweep records.")
+
+    # ------------------------------------------------------------------
+    # 4. Real data benchmark
+    # ------------------------------------------------------------------
+    print("\n=== Real data benchmark ===")
+    real_series = gen.load_real_data()
+    real_n = len(real_series)
+    print(f"  Real data: {real_n} rows")
+
+    real_results = []
+    for q_label, bs_query, qh_query in QUERIES:
+        if bs_query is not None:
+            med, _ = time_it(lambda bs_query=bs_query: find_matching(real_series, bs_query), n_series)
+            real_results.append(
+                {
+                    "engine": "basic_search",
+                    "query_label": q_label,
+                    "total_time": med,
+                    "per_row": med / real_n,
+                    "n_rows": real_n,
+                }
+            )
+
+        med, _ = time_it(
+            lambda qh_query=qh_query: search_series(real_series, qh_query, schema_lookup=gen.lookup), n_series
+        )
+        real_results.append(
+            {
+                "engine": "search_series",
+                "query_label": q_label,
+                "total_time": med,
+                "per_row": med / real_n,
+                "n_rows": real_n,
+            }
+        )
+
+        qh = QueryHandler(qh_query)
+        schema = gen.schema
+
+        def qh_loop(qh=qh, schema=schema):
+            for s in real_series:
+                if pd.notna(s) and s:
+                    hs = HedString(s, schema)
+                    qh.search(hs)
+
+        med, _ = time_it(qh_loop, n_series)
+        real_results.append(
+            {
+                "engine": "QueryHandler_loop",
+                "query_label": q_label,
+                "total_time": med,
+                "per_row": med / real_n,
+                "n_rows": real_n,
+            }
+        )
+
+    print(f"  Collected {len(real_results)} real-data records.")
+
+    # ------------------------------------------------------------------
+    # Save
+    # ------------------------------------------------------------------
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    output = {
+        "timestamp": timestamp,
+        "quick": quick,
+        "single_string": single_results,
+        "series": series_results,
+        "factor_sweeps": sweep_results,
+        "real_data": real_results,
+    }
+    out_path = RESULTS_DIR / f"benchmark_{timestamp}.json"
+    out_path.write_text(json.dumps(output, indent=2, default=str), encoding="utf-8")
+    print(f"\nResults saved to {out_path}")
+    return output
+
+
+# ======================================================================
+# Entry point
+# ======================================================================
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="HED search benchmark")
+    parser.add_argument("--quick", action="store_true", help="Reduced run for smoke testing")
+    args = parser.parse_args()
+    run_full_benchmark(quick=args.quick)
diff --git a/docs/_static/images/benchmark_compile_vs_search.png b/docs/_static/images/benchmark_compile_vs_search.png
new file mode 100644
index 000000000..a8a317963
Binary files /dev/null and b/docs/_static/images/benchmark_compile_vs_search.png differ
diff --git a/docs/_static/images/benchmark_query_heatmap.png b/docs/_static/images/benchmark_query_heatmap.png
new file mode 100644
index 000000000..993bb0040
Binary files /dev/null and b/docs/_static/images/benchmark_query_heatmap.png differ
diff --git a/docs/_static/images/benchmark_real_data.png b/docs/_static/images/benchmark_real_data.png
new file mode 100644
index 000000000..5fa4e3cf2
Binary files /dev/null and b/docs/_static/images/benchmark_real_data.png differ
diff --git a/docs/_static/images/benchmark_series_scaling.png b/docs/_static/images/benchmark_series_scaling.png
new file mode 100644
index 000000000..2372b0c0a
Binary files /dev/null and b/docs/_static/images/benchmark_series_scaling.png differ
diff --git a/docs/_static/images/benchmark_sweep_compile_vs_search.png b/docs/_static/images/benchmark_sweep_compile_vs_search.png
new file mode 100644
index 000000000..afb71a4b8
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_compile_vs_search.png differ
diff --git a/docs/_static/images/benchmark_sweep_deep_nest_bare_term.png b/docs/_static/images/benchmark_sweep_deep_nest_bare_term.png
new file mode 100644
index 000000000..bd4d580e9
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_deep_nest_bare_term.png differ
diff --git a/docs/_static/images/benchmark_sweep_deep_nest_exact_group.png b/docs/_static/images/benchmark_sweep_deep_nest_exact_group.png
new file mode 100644
index 000000000..b8098dd19
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_deep_nest_exact_group.png differ
diff --git a/docs/_static/images/benchmark_sweep_deep_nest_group_match.png b/docs/_static/images/benchmark_sweep_deep_nest_group_match.png
new file mode 100644
index 000000000..91595e58f
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_deep_nest_group_match.png differ
diff --git a/docs/_static/images/benchmark_sweep_deep_nest_negation.png b/docs/_static/images/benchmark_sweep_deep_nest_negation.png
new file mode 100644
index 000000000..a510be20b
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_deep_nest_negation.png differ
diff --git a/docs/_static/images/benchmark_sweep_deep_nest_two_and.png b/docs/_static/images/benchmark_sweep_deep_nest_two_and.png
new file mode 100644
index 000000000..c12bd8f7a
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_deep_nest_two_and.png differ
diff --git a/docs/_static/images/benchmark_sweep_group_count.png b/docs/_static/images/benchmark_sweep_group_count.png
new file mode 100644
index 000000000..19bca25fa
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_group_count.png differ
diff --git a/docs/_static/images/benchmark_sweep_nesting_depth.png b/docs/_static/images/benchmark_sweep_nesting_depth.png
new file mode 100644
index 000000000..bae381e6e
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_nesting_depth.png differ
diff --git a/docs/_static/images/benchmark_sweep_per_operation.png b/docs/_static/images/benchmark_sweep_per_operation.png
new file mode 100644
index 000000000..715786af9
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_per_operation.png differ
diff --git a/docs/_static/images/benchmark_sweep_query_complexity.png b/docs/_static/images/benchmark_sweep_query_complexity.png
new file mode 100644
index 000000000..efb4a06e9
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_query_complexity.png differ
diff --git a/docs/_static/images/benchmark_sweep_repeated_tags.png b/docs/_static/images/benchmark_sweep_repeated_tags.png
new file mode 100644
index 000000000..83375fe70
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_repeated_tags.png differ
diff --git a/docs/_static/images/benchmark_sweep_schema_lookup.png b/docs/_static/images/benchmark_sweep_schema_lookup.png
new file mode 100644
index 000000000..9818123e1
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_schema_lookup.png differ
diff --git a/docs/_static/images/benchmark_sweep_series_size.png b/docs/_static/images/benchmark_sweep_series_size.png
new file mode 100644
index 000000000..e92b8c68e
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_series_size.png differ
diff --git a/docs/_static/images/benchmark_sweep_string_form.png b/docs/_static/images/benchmark_sweep_string_form.png
new file mode 100644
index 000000000..8c777d3d9
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_string_form.png differ
diff --git a/docs/_static/images/benchmark_sweep_tag_count.png b/docs/_static/images/benchmark_sweep_tag_count.png
new file mode 100644
index 000000000..4efce2e38
Binary files /dev/null and b/docs/_static/images/benchmark_sweep_tag_count.png differ
diff --git a/docs/api/models.rst b/docs/api/models.rst
index 91dc10d26..01d662de2 100644
--- a/docs/api/models.rst
+++ b/docs/api/models.rst
@@ -181,7 +181,7 @@ String-based search
 -------------------
 
 Search functions that operate on raw HED strings without requiring pre-parsed ``HedString`` objects
-or a loaded schema. See also :doc:`/search_implementation` for a full comparison of all three
+or a loaded schema. See also :doc:`/search_details` for a full comparison of all three
 search implementations.
 
 StringQueryHandler
diff --git a/docs/index.rst b/docs/index.rst
index 530937629..d65a14b27 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -27,7 +27,7 @@ in various formats:
    :maxdepth: 2
 
    User guide <user_guide>
-   Search implementations <search_implementation>
+   Search implementations <search_details>
    API <api/index>
 
 * :ref:`genindex`
diff --git a/docs/search_details.md b/docs/search_details.md
new file mode 100644
index 000000000..35ebcdecf
--- /dev/null
+++ b/docs/search_details.md
@@ -0,0 +1,443 @@
+---
+html_meta:
+  description: HED search details — implementation comparison and performance benchmarks for basic_search, QueryHandler, and StringQueryHandler in hedtools
+  keywords: HED search, string search, query handler, basic search, performance, benchmarks, hedtools, pattern matching
+---
+
+```{index} search, string search, query, QueryHandler, StringQueryHandler, basic_search
+```
+
+# HED search details
+
+HEDtools provides three distinct mechanisms for searching HED-annotated data. This page covers their design and query languages ({ref}`implementations <hed-search-implementations>`) and measured performance characteristics ({ref}`performance <hed-search-performance>`).
+
+(hed-search-implementations)=
+
+## HED search implementations
+
+The three implementations share a common goal — "does this HED string match this query?" — but differ substantially in their inputs, capabilities, schema requirements, and performance characteristics. Choosing the right implementation depends on whether you need schema-aware ancestor matching, full group-structural queries, or raw throughput on unannotated strings.
+
+### Overview of the three implementations
+
+#### `basic_search` — regex-based flat matching
+
+Located in {mod}`hed.models.basic_search`, the `find_matching()` function operates directly on a `pd.Series` of raw HED strings using compiled regular expressions. It requires no schema and no parsing step, making it the fastest option for bulk row filtering.
+
+Key characteristics:
+
+- Input is a `pd.Series` of raw strings; output is a `pd.Series[bool]` mask.
+- The query is compiled once into a regex and applied with `Series.str.contains`.
+- Matches are purely literal — `Event` does not match `Sensory-event`.
+- `@A` in a basic-search query means A **must be present** anywhere in the string (note: this is the **opposite** of what `@A` means in `QueryHandler`/`StringQueryHandler`).
+- `~A` means A must not appear anywhere (global negation).
+- `(A, B)` syntax checks that A and B appear at the same nesting level.
+- Wildcard `A*` expands to the regex `A.*?`, which can span `/` and match mid-token substrings.
+
+Use `basic_search` when you are working with a large series of raw strings, don't need ancestor matching, and want maximum throughput. See {func}`hed.models.basic_search.find_matching`.
+
+#### `QueryHandler` — schema-backed object search
+
+Located in {mod}`hed.models.query_handler`, `QueryHandler` is the full-featured search engine. It compiles a query string into an expression tree once, then evaluates that tree against `HedString` objects that have already been parsed against a loaded `HedSchema`.
+
+Key characteristics:
+
+- Input is a `HedString` object; a full `HedSchema` is required.
+- Output is a `list[SearchResult]` containing `HedTag` / `HedGroup` object references, useful for tag-level introspection (not just row filtering).
+- Supports the complete query language: `&&`, `||`, `~`, `@`, `{}`, `[]`, `{:}`, `?`, `??`, `???`.
+- `@A` means A must **not** appear anywhere in the string.
+- Ancestor matching is exact — the schema normalises both query and string tags to short form, so `Event` matches `Sensory-event` because the schema knows `Sensory-event` descends from `Event`.
+- Per-string cost includes a full HedString parse and schema tag resolution.
+
+Use `QueryHandler` when you need schema-aware ancestor matching, or when you want object references (e.g., to retrieve the matched group for further processing). See {class}`hed.models.query_handler.QueryHandler`.
+
+#### `StringQueryHandler` — tree-based schema-optional search
+
+Located in {mod}`hed.models.string_search`, `StringQueryHandler` is a new middle-ground implementation that inherits from `QueryHandler` and reuses the full expression-tree compiler, but operates on raw strings rather than pre-parsed `HedString` objects.
+
+It parses each raw HED string into a lightweight {class}`~hed.models.string_search.StringNode` tree that duck-types the `HedGroup`/`HedTag` interfaces expected by the existing expression evaluators — so all `QueryHandler` query syntax works unchanged.
+
+Key characteristics:
+
+- Input is a raw string (or a `pd.Series` via {func}`~hed.models.string_search.search_series`).
+- Schema is **optional**: pass a `schema_lookup` dict (see {mod}`hed.models.schema_lookup`) to enable ancestor matching for short-form strings (e.g. `Event` matching `Sensory-event`); omit it for purely literal matching.
+- Output is a list (truthy/falsy) — row-filtering only, no object references.
+- Supports the same full query syntax as `QueryHandler` (`&&`, `||`, `~`, `@`, `{}`, etc.).
+- `@A` carries the same semantics as `QueryHandler` — A must **not** be present.
+- Long-form strings (`Event/Sensory-event`) support ancestor matching via slash-splitting even without a lookup. Short-form strings (`Sensory-event`) require a `schema_lookup` for ancestor matching; without one, matching is purely literal.
+- Parse cost is a lightweight recursive split — much cheaper than a full HedString + schema parse.
+
+Use `StringQueryHandler` when you have raw strings (not `HedString` objects), need the full `QueryHandler` query syntax, and either don't have a schema available or want faster processing at the cost of losing full schema-aware ancestor matching. See {class}`hed.models.string_search.StringQueryHandler`.
+
+#### Generating a schema lookup
+
+If you want `StringQueryHandler` to resolve ancestors for short-form strings (e.g. query `Event` matching `Sensory-event`) without a full schema parse per row, you can pre-generate a lookup dictionary from a `HedSchema`:
+
+```python
+from hed import load_schema_version
+from hed import generate_schema_lookup, save_schema_lookup, load_schema_lookup
+
+schema = load_schema_version("8.4.0")
+lookup = generate_schema_lookup(schema)  # {short_name_casefold: tag_terms_tuple}
+
+# Persist for reuse
+save_schema_lookup(lookup, "hed840_lookup.json")
+lookup = load_schema_lookup("hed840_lookup.json")
+```
+
+See {func}`hed.models.schema_lookup.generate_schema_lookup`.
+
+______________________________________________________________________
+
+### Comparison tables
+
+#### Core characteristics
+
+| Property              | `basic_search`             | `QueryHandler`                                     | `StringQueryHandler`                            |
+| --------------------- | -------------------------- | -------------------------------------------------- | ----------------------------------------------- |
+| **Input**             | `pd.Series` of raw strings | `HedString` object                                 | Raw string or `pd.Series` (via `search_series`) |
+| **Schema required**   | No                         | Yes — full `HedSchema` for tag parsing             | No; optional `schema_lookup` dict               |
+| **Output**            | `pd.Series[bool]` mask     | `list[SearchResult]` with `HedTag`/`HedGroup` refs | `list` (truthy/falsy); `StringNode` refs        |
+| **Result usable for** | Row filtering              | Row filtering + tag/group introspection            | Row filtering only                              |
+| **Batch API**         | Native (`series`)          | Manual loop                                        | `search_series(series, query)`                  |
+| **Parse cost**        | Regex compilation once     | Full `HedString` + schema parse per string         | Lightweight tree parse per string               |
+| **Unrecognised tags** | Matched literally          | Silent match failure (`tag_terms = ()`)            | Matched literally                               |
+
+#### Query syntax
+
+| Feature                      | `basic_search` query syntax                         | `QueryHandler` / `StringQueryHandler` query syntax |
+| ---------------------------- | --------------------------------------------------- | -------------------------------------------------- |
+| **AND**                      | Space or comma between terms (context-dependent)    | `A && B` or `A, B`                                 |
+| **OR**                       | Not supported                                       | `A \|\| B`                                         |
+| **Absent from string (`@`)** | ⚠️ `@A` means A **must be present** anywhere        | `@A` means A must **not** appear anywhere          |
+| **Must-not-appear (`~`)**    | `~A` — A must not appear anywhere (global)          | `~A` — negation within group context (local)       |
+| **Prefix wildcard**          | `A*` → regex `A.*?` (spans `/`, matches substrings) | `A*` → prefix on short form only                   |
+| **Full regex per term**      | Yes (`regex=True` mode)                             | No                                                 |
+| **Quoted exact match**       | No                                                  | `"A"` — exact match, no ancestor search            |
+| **Implicit default**         | If no `(` or `@`: all terms become "anywhere"       | No implicit conversion — must be explicit          |
+
+#### Group / structural operators
+
+| Feature                           | `basic_search`                            | `QueryHandler`                               | `StringQueryHandler`   |
+| --------------------------------- | ----------------------------------------- | -------------------------------------------- | ---------------------- |
+| **Same nesting level**            | `(A, B)` — A and B at same relative level | N/A — use `{A, B}`                           | N/A — use `{A, B}`     |
+| **Same parenthesised group `{}`** | No                                        | `{A, B}` — must share a direct parent group  | Same as `QueryHandler` |
+| **Exact group `{:}`**             | No                                        | `{A, B:}` — same group, no other children    | Same                   |
+| **Optional exact group**          | No                                        | `{A, B: C}` — A and B required, C optional   | Same                   |
+| **Descendant group `[]`**         | No                                        | `[A, B]` — both in same subtree at any depth | Same                   |
+| **Any child `?`**                 | No                                        | `?` — any tag or group child                 | Same                   |
+| **Any tag child `??`**            | No                                        | `??` — any leaf (non-group) child            | Same                   |
+| **Any group child `???`**         | No                                        | `???` — any parenthesised group child        | Same                   |
+| **Nested query operators**        | No                                        | Yes — full recursive composition             | Same                   |
+
+#### Ancestor / cross-form search
+
+| Scenario                                                | `basic_search`                                    | `QueryHandler`                          | `StringQueryHandler`                                             |
+| ------------------------------------------------------- | ------------------------------------------------- | --------------------------------------- | ---------------------------------------------------------------- |
+| Query `Event`, string `Sensory-event` (short form)      | ❌ literal only                                   | ✅ `tag_terms` from schema              | ✅ with `schema_lookup`; ❌ without                              |
+| Query `Event`, string `Event/Sensory-event` (long form) | ❌ `Event` ≠ `Event/Sensory-event`                | ✅ schema normalises                    | ✅ slash-split produces `tag_terms = ("event", "sensory-event")` |
+| Query `Event/Sensory-event`, string `Sensory-event`     | ❌                                                | ✅ schema normalises both to short form | ❌ no schema to normalise                                        |
+| Schema-free ancestor search                             | `convert_query()` + long-form series (workaround) | N/A — schema always required            | ✅ works natively for long-form strings                          |
+| Tag `Def/Name` matched by query `Def`                   | ❌ literal prefix mismatch                        | ✅ `short_base_tag = "Def"`             | ✅ `tag_terms` contains `"def"`                                  |
+
+#### Critical semantic traps
+
+These differences are silent — no error, just wrong answers if you mix up query strings across implementations:
+
+| Operator          | `basic_search`                                           | `QueryHandler` / `StringQueryHandler`                                               |
+| ----------------- | -------------------------------------------------------- | ----------------------------------------------------------------------------------- |
+| `@A`              | A **must** appear anywhere in the string                 | A must **not** appear anywhere in the string                                        |
+| `~A`              | A must not appear **anywhere** (global)                  | A must not appear in any group that also matches the rest of the expression (local) |
+| `*` wildcard      | Regex `.*?` — spans `/` and matches mid-token substrings | Strict prefix on the tag's short form — anchored to start                           |
+| No-operator `A B` | Both present anywhere (implicit `@@`)                    | Parse error — `&&` required                                                         |
+
+______________________________________________________________________
+
+(hed-search-performance)=
+
+## HED search performance
+
+Benchmarks were run using HED 8.4.0 with `timeit` on both synthetic strings and real BIDS event data. All times are medians in milliseconds. Relative ratios between engines are more meaningful than absolute values, which depend on hardware.
+
+### Key findings
+
+- **Series throughput:** `basic_search` is ~16× faster than a `QueryHandler` row-by-row loop at 5 000 rows because it leverages vectorised pandas `str.contains` regex matching.
+- **Single-string speed:** `StringQueryHandler` (no lookup) is ~39% faster than `QueryHandler` per string because it avoids schema-based `HedString` construction.
+- **Schema-lookup overhead:** Enabling `schema_lookup` in `StringQueryHandler` has negligible overhead for most queries; cost appears only when ancestor matching is actually invoked.
+- **Nesting depth:** At depth 20, `QueryHandler` is ~8× slower than on a flat string; `StringQueryHandler` shows similar scaling (~8×).
+- **Operation coverage:** `basic_search` supports 7 of 18 tested operation types. The remaining 11 (OR, exact groups, logical groups, `?`/`??`/`???` wildcards, quoted terms) require `QueryHandler` or `StringQueryHandler`.
+
+### Series throughput
+
+Whole-series search over a `pd.Series` of HED strings. `basic_search` uses vectorised regex; `search_series` uses `StringQueryHandler.search()` per row; `QueryHandler_loop` constructs a `HedString` per row then searches. Query: `single_bare_term`.
+
+|  Rows | QueryHandler_loop (ms) | basic_search (ms) | search_series (ms) |
+| ----: | ---------------------: | ----------------: | -----------------: |
+|    10 |                   0.34 |              0.20 |               0.30 |
+|   100 |                   3.43 |              0.40 |               2.41 |
+|   500 |                   16.7 |              2.25 |               13.1 |
+| 1 000 |                   29.8 |              1.91 |               19.5 |
+| 5 000 |                    164 |              11.7 |                114 |
+
+All three engines scale linearly with row count. `basic_search` is 14–20× faster than `QueryHandler_loop`; `search_series` is roughly 1.4× faster than `QueryHandler_loop`.
+
+![Series search time vs row count](_static/images/benchmark_sweep_series_size.png)
+
+### Single-string timing
+
+Per-string median search time (ms) across string sizes. Tag counts: tiny = 1, small = 5, medium = 10, large = 25, xlarge = 50, xxlarge = 100. Query: `single_bare_term`.
+
+| String size        | QueryHandler (ms) | SQH_no_lookup (ms) | basic_search (ms) |
+| ------------------ | ----------------: | -----------------: | ----------------: |
+| tiny (1 tag)       |             0.012 |              0.007 |             0.131 |
+| small (5 tags)     |             0.020 |              0.014 |             0.197 |
+| medium (10 tags)   |             0.041 |              0.021 |             0.123 |
+| large (25 tags)    |             0.132 |              0.102 |             0.157 |
+| xlarge (50 tags)   |             0.176 |              0.113 |             0.131 |
+| xxlarge (100 tags) |             0.329 |              0.248 |             0.154 |
+
+`basic_search` regex overhead dominates on small strings; `QueryHandler` and `StringQueryHandler` dominate on large strings. The crossover occurs around 25–50 tags.
+
+![Median search time per query × engine (ms)](_static/images/benchmark_query_heatmap.png)
+
+### Operation coverage and cost
+
+Per-operation timing on a 10-tag string. `basic_search` returns no results (not an error) for unsupported constructs, so queries using those operations will silently produce incorrect results.
+
+| Operation                      | QueryHandler (ms) | SQH (ms) | basic_search  |
+| ------------------------------ | ----------------: | -------: | ------------- |
+| `bare_term`                    |             0.061 |    0.037 | 0.278 ms      |
+| `and_2`                        |             0.063 |    0.041 | 0.321 ms      |
+| `and_3`                        |             0.067 |    0.045 | 0.355 ms      |
+| `negation`                     |             0.083 |    0.043 | 0.160 ms      |
+| `wildcard_prefix` (`*` suffix) |             0.046 |    0.037 | 0.204 ms      |
+| `nested_group_[]`              |             0.057 |    0.039 | 0.634 ms      |
+| `deep_and_chain`               |             0.117 |    0.059 | 0.515 ms      |
+| `or`                           |             0.058 |    0.037 | — unsupported |
+| `exact_group_{}`               |             0.052 |    0.030 | — unsupported |
+| `exact_optional_{:}`           |             0.071 |    0.043 | — unsupported |
+| `exact_quoted`                 |             0.062 |    0.030 | — unsupported |
+| `wildcard_?`                   |             0.086 |    0.047 | — unsupported |
+| `wildcard_??`                  |             0.068 |    0.041 | — unsupported |
+| `wildcard_???`                 |             0.074 |    0.041 | — unsupported |
+| `descendant_nested`            |             0.138 |    0.086 | — unsupported |
+| `double_negation`              |             0.057 |    0.035 | — unsupported |
+| `complex_onset_def`            |             0.113 |    0.068 | — unsupported |
+| `nested_or_and`                |             0.080 |    0.057 | — unsupported |
+
+`StringQueryHandler` supports all 18 operation types.
+
+![Per-operation timing across all three engines](_static/images/benchmark_sweep_per_operation.png)
+
+### Nesting depth
+
+Parenthesisation depth from 0 (flat) to 20. Deeper nesting increases the tree walk for `QueryHandler` and `StringQueryHandler`. `basic_search` shows no consistent depth trend because its cost depends on delimiter count, not recursion depth.
+
+| Depth | QueryHandler (ms) | SQH_no_lookup (ms) | basic_search (ms) |
+| ----: | ----------------: | -----------------: | ----------------: |
+|     0 |             0.026 |              0.017 |             0.125 |
+|     1 |             0.022 |              0.013 |             0.256 |
+|     2 |             0.028 |              0.019 |             0.218 |
+|     3 |             0.034 |              0.023 |             0.215 |
+|     5 |             0.110 |              0.076 |             0.531 |
+|    10 |             0.094 |              0.060 |             0.385 |
+|    15 |             0.116 |              0.082 |             0.226 |
+|    20 |             0.409 |              0.140 |             0.200 |
+
+At depth 20, `QueryHandler` is ~8× slower than at depth 0; `SQH` is ~8× slower.
+
+![Nesting depth sweep](_static/images/benchmark_sweep_nesting_depth.png)
+
+#### Deep nesting by query type
+
+The nesting cost depends on query type. For group-structural queries (`group_match`, `two_term_and`) the engines must evaluate all candidate groups at each level, and `QueryHandler` shows a pronounced cost spike at depth 10 while `StringQueryHandler` stays flatter. All values in ms at depths 1–20.
+
+**Bare term:**
+
+| Depth | QueryHandler | SQH_no_lookup | basic_search |
+| ----: | -----------: | ------------: | -----------: |
+|     1 |        0.030 |         0.019 |        0.204 |
+|     5 |        0.045 |         0.031 |        0.198 |
+|    10 |        0.087 |         0.059 |        0.209 |
+|    20 |        0.141 |         0.154 |        0.212 |
+
+![Deep nesting — bare term](_static/images/benchmark_sweep_deep_nest_bare_term.png)
+
+**Exact group `{}`:**
+
+| Depth | QueryHandler | SQH_no_lookup |
+| ----: | -----------: | ------------: |
+|     1 |        0.025 |         0.018 |
+|     5 |        0.053 |         0.036 |
+|    10 |        0.105 |         0.072 |
+|    20 |        0.209 |         0.146 |
+
+![Deep nesting — exact group](_static/images/benchmark_sweep_deep_nest_exact_group.png)
+
+**Group match `[]`:**
+
+| Depth | QueryHandler | SQH_no_lookup | basic_search |
+| ----: | -----------: | ------------: | -----------: |
+|     1 |        0.032 |         0.020 |        0.520 |
+|     5 |        0.054 |         0.038 |        0.551 |
+|    10 |        0.181 |         0.063 |        0.536 |
+|    20 |        0.324 |         0.118 |        0.658 |
+
+![Deep nesting — group match](_static/images/benchmark_sweep_deep_nest_group_match.png)
+
+`QueryHandler` at depth 10 is 5.7× its depth-1 cost; `StringQueryHandler` is only 3.2×.
+
+**Negation:**
+
+| Depth | QueryHandler | SQH_no_lookup | basic_search |
+| ----: | -----------: | ------------: | -----------: |
+|     1 |        0.021 |         0.014 |        0.128 |
+|     5 |        0.055 |         0.037 |        0.163 |
+|    10 |        0.101 |         0.072 |        0.121 |
+|    20 |        0.177 |         0.129 |        0.112 |
+
+![Deep nesting — negation](_static/images/benchmark_sweep_deep_nest_negation.png)
+
+**Two-term AND:**
+
+| Depth | QueryHandler | SQH_no_lookup | basic_search |
+| ----: | -----------: | ------------: | -----------: |
+|     1 |        0.043 |         0.024 |        0.422 |
+|     5 |        0.065 |         0.052 |        0.395 |
+|    10 |        0.205 |         0.070 |        0.274 |
+|    20 |        0.320 |         0.109 |        0.355 |
+
+![Deep nesting — two-term AND](_static/images/benchmark_sweep_deep_nest_two_and.png)
+
+### Repeated tags
+
+Repeating a target tag N times in the string. `basic_search`'s `verify_search_delimiters` uses `itertools.product` over delimiter positions; repeated instances multiply the internal search space. Tree-based engines are linear in the number of candidates and are not affected.
+
+| Occurrences | QueryHandler (ms) | SQH_no_lookup (ms) | basic_search (ms) |
+| ----------: | ----------------: | -----------------: | ----------------: |
+|           0 |             0.034 |              0.022 |             0.544 |
+|           5 |             0.151 |              0.084 |             0.791 |
+|          10 |             0.093 |              0.073 |             0.940 |
+|          20 |             0.182 |              0.138 |             0.668 |
+|          40 |             0.200 |              0.195 |             0.654 |
+
+![Repeated target tag sweep](_static/images/benchmark_sweep_repeated_tags.png)
+
+### Compile vs. search
+
+Query compilation is a one-time cost; subsequent searches against different strings reuse the compiled expression. Reusing a compiled handler across many strings amortises compilation cost to near zero.
+
+| Phase   | QueryHandler (ms) | StringQueryHandler (ms) |
+| ------- | ----------------: | ----------------------: |
+| Compile |             0.004 |                   0.005 |
+| Search  |             0.053 |                   0.036 |
+
+![Compile vs. search cost breakdown](_static/images/benchmark_compile_vs_search.png)
+
+### Real BIDS data
+
+Search over 200 rows of the `eeg_ds003645s_hed` BIDS test dataset.
+
+| Query                  | QueryHandler_loop (ms) | basic_search (ms) | search_series (ms) |
+| ---------------------- | ---------------------: | ----------------: | -----------------: |
+| `single_bare_term`     |                    9.0 |               2.5 |                6.5 |
+| `single_wildcard`      |                    8.2 |               0.6 |                4.9 |
+| `negation`             |                    8.0 |               0.9 |                6.8 |
+| `two_term_and`         |                    8.8 |               1.2 |                4.9 |
+| `three_term_and`       |                    8.5 |               1.9 |                5.1 |
+| `group_nesting`        |                    7.9 |               0.3 |                7.8 |
+| `two_term_or`          |                    7.9 |                 — |                6.8 |
+| `exact_group`          |                    9.3 |                 — |                6.6 |
+| `exact_group_optional` |                   11.7 |                 — |                5.8 |
+| `single_exact_term`    |                    8.1 |                 — |                5.5 |
+| `wildcard_child`       |                   12.6 |                 — |                8.9 |
+| `complex_composite`    |                   14.2 |                 — |                9.5 |
+
+![Real BIDS data — 200-row search times](_static/images/benchmark_real_data.png)
+
+### Tag count
+
+Number of tags in the HED string (1 to 100). `basic_search` time is dominated by regex compilation overhead and stays roughly constant; tree-based engines scale linearly with the number of nodes to traverse.
+
+| Tags | QueryHandler (ms) | SQH_no_lookup (ms) | basic_search (ms) |
+| ---: | ----------------: | -----------------: | ----------------: |
+|    1 |             0.014 |              0.004 |             0.294 |
+|    5 |             0.019 |              0.013 |             0.163 |
+|   10 |             0.031 |              0.018 |             0.150 |
+|   25 |             0.061 |              0.080 |             0.124 |
+|   50 |             0.149 |              0.160 |             0.184 |
+|  100 |             0.287 |              0.167 |             0.271 |
+
+![Tag count sweep](_static/images/benchmark_sweep_tag_count.png)
+
+The tree-based crossover with `basic_search` occurs around 25–50 tags, where traversal cost meets the regex setup cost.
+
+### String form
+
+Short-form vs long-form HED strings. Long-form strings use fully expanded paths (e.g. `Event/Sensory-event`), increasing string length and parse cost. `basic_search` is largely unaffected because it matches on short tag names via word-boundary patterns.
+
+| Form  | QueryHandler (ms) | SQH_no_lookup (ms) | basic_search (ms) |
+| ----- | ----------------: | -----------------: | ----------------: |
+| short |             0.044 |              0.029 |             0.124 |
+| long  |             0.074 |              0.063 |             0.121 |
+
+![String form sweep](_static/images/benchmark_sweep_string_form.png)
+
+`QueryHandler` is 1.7× slower on long-form strings; `StringQueryHandler` is 2.2× slower.
+
+### Schema lookup overhead
+
+`StringQueryHandler` can be used with or without a `schema_lookup` dictionary. The dictionary enables ancestor-based matching (e.g. `Event` matches `Sensory-event`) at negligible per-call overhead.
+
+| Mode        | StringQueryHandler (ms) |
+| ----------- | ----------------------: |
+| no_lookup   |                   0.030 |
+| with_lookup |                   0.029 |
+
+![Schema lookup overhead](_static/images/benchmark_sweep_schema_lookup.png)
+
+### Group count and query complexity
+
+More top-level parenthesised groups increase the number of children the tree must inspect. Query complexity (more AND/OR clauses) adds expression-tree nodes to evaluate per candidate.
+
+**Group count** (0–20 single-level groups):
+
+| Groups | QueryHandler (ms) | SQH_no_lookup (ms) | basic_search (ms) |
+| -----: | ----------------: | -----------------: | ----------------: |
+|      0 |             0.032 |              0.022 |             0.139 |
+|      1 |             0.028 |              0.019 |             0.129 |
+|      5 |             0.045 |              0.030 |             0.114 |
+|     10 |             0.080 |              0.053 |             0.135 |
+|     20 |             0.140 |              0.085 |             0.136 |
+
+![Group count sweep](_static/images/benchmark_sweep_group_count.png)
+
+**Query complexity** (1-clause bare term → 8-clause composite):
+
+| Complexity            | QueryHandler (ms) | SQH_no_lookup (ms) | basic_search |
+| --------------------- | ----------------: | -----------------: | ------------ |
+| 1 — single term       |             0.134 |              0.100 | 0.247 ms     |
+| 2 — two AND           |             0.152 |              0.093 | 0.405 ms     |
+| 3 — three AND         |             0.158 |              0.103 | 0.460 ms     |
+| 4 — OR                |             0.134 |              0.071 | —            |
+| 5 — negation          |             0.088 |              0.056 | 0.286 ms     |
+| 6 — group `[]`        |             0.138 |              0.094 | 0.361 ms     |
+| 7 — exact group `{}`  |             0.120 |              0.078 | —            |
+| 8 — complex composite |             0.106 |              0.078 | —            |
+
+![Query complexity sweep](_static/images/benchmark_sweep_query_complexity.png)
+
+### Choosing an implementation
+
+**Use `basic_search`** when you need the fastest possible series-level filter, your queries can be expressed with simple terms, AND, negation, or descendant wildcards (`*`), and schema-aware ancestor matching is not required. Ideal for quick event file filtering when query simplicity is acceptable.
+
+**Use `StringQueryHandler`** (via `search_series()`) when you need the full query language (OR, exact groups, logical groups, `?`/`??`/`???` wildcards) and are working with raw strings from tabular files or sidecars. This is the best general-purpose choice — it is ~40 % faster than a `QueryHandler` loop per string and close to `basic_search` on large strings.
+
+**Use `QueryHandler`** when you already have parsed `HedString` objects (for example from a validation pipeline), or when you need results as structured `HedString`/`HedTag` objects rather than boolean matches. The additional overhead relative to `StringQueryHandler` comes from `HedString` construction, not from search expression evaluation, so reusing pre-parsed objects avoids the cost entirely.
+
+### Benchmark methodology
+
+- **Timing:** `timeit` — 20 iterations (single-string), 5 iterations (series), 10 iterations (sweeps). Median reported.
+- **Schema:** HED 8.4.0, loaded once and reused.
+- **Synthetic data:** Strings built from real schema tags with controlled tag count, nesting depth, group count, and tag repetition.
+- **`schema_lookup`:** Generated via `generate_schema_lookup(schema)` — a dict mapping each short tag to its ancestor tuple, enabling ancestor-based matching in `StringQueryHandler` without a full schema load per string.
+- **Hardware note:** Absolute timings depend on hardware; relative ratios between engines are the meaningful comparison.
diff --git a/docs/search_implementation.md b/docs/search_implementation.md
deleted file mode 100644
index f00f1fa69..000000000
--- a/docs/search_implementation.md
+++ /dev/null
@@ -1,157 +0,0 @@
----
-html_meta:
-  description: Comparison of the three HED string search implementations in hedtools - basic_search, QueryHandler, and StringQueryHandler
-  keywords: HED search, string search, query handler, basic search, performance, hedtools, pattern matching
----
-
-```{index} search, string search, query, QueryHandler, StringQueryHandler, basic_search
-```
-
-# HED search implementations
-
-HEDtools provides three distinct mechanisms for searching HED-annotated data. They share a common goal — "does this HED string match this query?" — but differ substantially in their inputs, capabilities, schema requirements, and performance characteristics. Choosing the right implementation depends on whether you need schema-aware ancestor matching, full group-structural queries, or raw throughput on unannotated strings.
-
-## Overview of the three implementations
-
-### `basic_search` — regex-based flat matching
-
-Located in {mod}`hed.models.basic_search`, the `find_matching()` function operates directly on a `pd.Series` of raw HED strings using compiled regular expressions. It requires no schema and no parsing step, making it the fastest option for bulk row filtering.
-
-Key characteristics:
-
-- Input is a `pd.Series` of raw strings; output is a `pd.Series[bool]` mask.
-- The query is compiled once into a regex and applied with `Series.str.contains`.
-- Matches are purely literal — `Event` does not match `Sensory-event`.
-- `@A` in a basic-search query means A **must be present** anywhere in the string (note: this is the **opposite** of what `@A` means in `QueryHandler`/`StringQueryHandler`).
-- `~A` means A must not appear anywhere (global negation).
-- `(A, B)` syntax checks that A and B appear at the same nesting level.
-- Wildcard `A*` expands to the regex `A.*?`, which can span `/` and match mid-token substrings.
-
-Use `basic_search` when you are working with a large series of raw strings, don't need ancestor matching, and want maximum throughput. See {func}`hed.models.basic_search.find_matching`.
-
-### `QueryHandler` — schema-backed object search
-
-Located in {mod}`hed.models.query_handler`, `QueryHandler` is the full-featured search engine. It compiles a query string into an expression tree once, then evaluates that tree against `HedString` objects that have already been parsed against a loaded `HedSchema`.
-
-Key characteristics:
-
-- Input is a `HedString` object; a full `HedSchema` is required.
-- Output is a `list[SearchResult]` containing `HedTag` / `HedGroup` object references, useful for tag-level introspection (not just row filtering).
-- Supports the complete query language: `&&`, `||`, `~`, `@`, `{}`, `[]`, `{:}`, `?`, `??`, `???`.
-- `@A` means A must **not** appear anywhere in the string.
-- Ancestor matching is exact — the schema normalises both query and string tags to short form, so `Event` matches `Sensory-event` because the schema knows `Sensory-event` descends from `Event`.
-- Per-string cost includes a full HedString parse and schema tag resolution.
-
-Use `QueryHandler` when you need schema-aware ancestor matching, or when you want object references (e.g., to retrieve the matched group for further processing). See {class}`hed.models.query_handler.QueryHandler`.
-
-### `StringQueryHandler` — tree-based schema-optional search
-
-Located in {mod}`hed.models.string_search`, `StringQueryHandler` is a new middle-ground implementation that inherits from `QueryHandler` and reuses the full expression-tree compiler, but operates on raw strings rather than pre-parsed `HedString` objects.
-
-It parses each raw HED string into a lightweight {class}`~hed.models.string_search.StringNode` tree that duck-types the `HedGroup`/`HedTag` interfaces expected by the existing expression evaluators — so all `QueryHandler` query syntax works unchanged.
-
-Key characteristics:
-
-- Input is a raw string (or a `pd.Series` via {func}`~hed.models.string_search.search_series`).
-- Schema is **optional**: pass a `schema_lookup` dict (see {mod}`hed.models.schema_lookup`) to enable ancestor matching for short-form strings (e.g. `Event` matching `Sensory-event`); omit it for purely literal matching.
-- Output is a list (truthy/falsy) — row-filtering only, no object references.
-- Supports the same full query syntax as `QueryHandler` (`&&`, `||`, `~`, `@`, `{}`, etc.).
-- `@A` carries the same semantics as `QueryHandler` — A must **not** be present.
-- Long-form strings (`Event/Sensory-event`) support ancestor matching via slash-splitting even without a lookup. Short-form strings (`Sensory-event`) require a `schema_lookup` for ancestor matching; without one, matching is purely literal.
-- Parse cost is a lightweight recursive split — much cheaper than a full HedString + schema parse.
-
-Use `StringQueryHandler` when you have raw strings (not `HedString` objects), need the full `QueryHandler` query syntax, and either don't have a schema available or want faster processing at the cost of losing full schema-aware ancestor matching. See {class}`hed.models.string_search.StringQueryHandler`.
-
-### Generating a schema lookup
-
-If you want `StringQueryHandler` to resolve ancestors for short-form strings (e.g. query `Event` matching `Sensory-event`) without a full schema parse per row, you can pre-generate a lookup dictionary from a `HedSchema`:
-
-```python
-from hed import load_schema_version
-from hed import generate_schema_lookup, save_schema_lookup, load_schema_lookup
-
-schema = load_schema_version("8.4.0")
-lookup = generate_schema_lookup(schema)  # {short_name_casefold: tag_terms_tuple}
-
-# Persist for reuse
-save_schema_lookup(lookup, "hed840_lookup.json")
-lookup = load_schema_lookup("hed840_lookup.json")
-```
-
-See {func}`hed.models.schema_lookup.generate_schema_lookup`.
-
-______________________________________________________________________
-
-## Comparison tables
-
-### Core characteristics
-
-| Property              | `basic_search`             | `QueryHandler`                                     | `StringQueryHandler`                            |
-| --------------------- | -------------------------- | -------------------------------------------------- | ----------------------------------------------- |
-| **Input**             | `pd.Series` of raw strings | `HedString` object                                 | Raw string or `pd.Series` (via `search_series`) |
-| **Schema required**   | No                         | Yes — full `HedSchema` for tag parsing             | No; optional `schema_lookup` dict               |
-| **Output**            | `pd.Series[bool]` mask     | `list[SearchResult]` with `HedTag`/`HedGroup` refs | `list` (truthy/falsy); `StringNode` refs        |
-| **Result usable for** | Row filtering              | Row filtering + tag/group introspection            | Row filtering only                              |
-| **Batch API**         | Native (`series`)          | Manual loop                                        | `search_series(series, query)`                  |
-| **Parse cost**        | Regex compilation once     | Full `HedString` + schema parse per string         | Lightweight tree parse per string               |
-| **Unrecognised tags** | Matched literally          | Silent match failure (`tag_terms = ()`)            | Matched literally                               |
-
-### Query syntax
-
-| Feature                      | `basic_search` query syntax                         | `QueryHandler` / `StringQueryHandler` query syntax |
-| ---------------------------- | --------------------------------------------------- | -------------------------------------------------- |
-| **AND**                      | Space or comma between terms (context-dependent)    | `A && B` or `A, B`                                 |
-| **OR**                       | Not supported                                       | `A \|\| B`                                         |
-| **Absent from string (`@`)** | ⚠️ `@A` means A **must be present** anywhere        | `@A` means A must **not** appear anywhere          |
-| **Must-not-appear (`~`)**    | `~A` — A must not appear anywhere (global)          | `~A` — negation within group context (local)       |
-| **Prefix wildcard**          | `A*` → regex `A.*?` (spans `/`, matches substrings) | `A*` → prefix on short form only                   |
-| **Full regex per term**      | Yes (`regex=True` mode)                             | No                                                 |
-| **Quoted exact match**       | No                                                  | `"A"` — exact match, no ancestor search            |
-| **Implicit default**         | If no `(` or `@`: all terms become "anywhere"       | No implicit conversion — must be explicit          |
-
-### Group / structural operators
-
-| Feature                           | `basic_search`                            | `QueryHandler`                               | `StringQueryHandler`   |
-| --------------------------------- | ----------------------------------------- | -------------------------------------------- | ---------------------- |
-| **Same nesting level**            | `(A, B)` — A and B at same relative level | N/A — use `{A, B}`                           | N/A — use `{A, B}`     |
-| **Same parenthesised group `{}`** | No                                        | `{A, B}` — must share a direct parent group  | Same as `QueryHandler` |
-| **Exact group `{:}`**             | No                                        | `{A, B:}` — same group, no other children    | Same                   |
-| **Optional exact group**          | No                                        | `{A, B: C}` — A and B required, C optional   | Same                   |
-| **Descendant group `[]`**         | No                                        | `[A, B]` — both in same subtree at any depth | Same                   |
-| **Any child `?`**                 | No                                        | `?` — any tag or group child                 | Same                   |
-| **Any tag child `??`**            | No                                        | `??` — any leaf (non-group) child            | Same                   |
-| **Any group child `???`**         | No                                        | `???` — any parenthesised group child        | Same                   |
-| **Nested query operators**        | No                                        | Yes — full recursive composition             | Same                   |
-
-### Ancestor / cross-form search
-
-| Scenario                                                | `basic_search`                                    | `QueryHandler`                          | `StringQueryHandler`                                             |
-| ------------------------------------------------------- | ------------------------------------------------- | --------------------------------------- | ---------------------------------------------------------------- |
-| Query `Event`, string `Sensory-event` (short form)      | ❌ literal only                                   | ✅ `tag_terms` from schema              | ✅ with `schema_lookup`; ❌ without                              |
-| Query `Event`, string `Event/Sensory-event` (long form) | ❌ `Event` ≠ `Event/Sensory-event`                | ✅ schema normalises                    | ✅ slash-split produces `tag_terms = ("event", "sensory-event")` |
-| Query `Event/Sensory-event`, string `Sensory-event`     | ❌                                                | ✅ schema normalises both to short form | ❌ no schema to normalise                                        |
-| Schema-free ancestor search                             | `convert_query()` + long-form series (workaround) | N/A — schema always required            | ✅ works natively for long-form strings                          |
-| Tag `Def/Name` matched by query `Def`                   | ❌ literal prefix mismatch                        | ✅ `short_base_tag = "Def"`             | ✅ `tag_terms` contains `"def"`                                  |
-
-### Critical semantic traps
-
-These differences are silent — no error, just wrong answers if you mix up query strings across implementations:
-
-| Operator          | `basic_search`                                           | `QueryHandler` / `StringQueryHandler`                                               |
-| ----------------- | -------------------------------------------------------- | ----------------------------------------------------------------------------------- |
-| `@A`              | A **must** appear anywhere in the string                 | A must **not** appear anywhere in the string                                        |
-| `~A`              | A must not appear **anywhere** (global)                  | A must not appear in any group that also matches the rest of the expression (local) |
-| `*` wildcard      | Regex `.*?` — spans `/` and matches mid-token substrings | Strict prefix on the tag's short form — anchored to start                           |
-| No-operator `A B` | Both present anywhere (implicit `@@`)                    | Parse error — `&&` required                                                         |
-
-______________________________________________________________________
-
-## Performance
-
-*Performance benchmarks will be added here.*
-
-Preliminary guidance:
-
-- For large-scale row filtering on raw strings where schema awareness is not needed, `basic_search` is likely fastest due to vectorised regex on the full series with no per-row parsing.
-- `StringQueryHandler` trades some throughput for full query-language support and optional ancestor matching; parse cost per row is a lightweight recursive split.
-- `QueryHandler` has the highest per-string cost because it requires a pre-parsed `HedString` (including schema tag resolution), but provides the richest result objects.