From cea63c9bcbfde95aca9a2cf77bf74629a8917db7 Mon Sep 17 00:00:00 2001 From: Eliot Hedeman Date: Wed, 22 Apr 2026 16:38:07 -0400 Subject: [PATCH] chore(toolpath-desktop): add synthetic preview benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a fixture generator (new `gen_synthetic_path` binary in toolpath-cli) and a pure-TS bench script covering the Preview's hot paths: `normalize`, `buildTree`, `flattenChatHead`, `classify`, `matchesFilter`. `bun run bench` reports median/p95/max over 10 iterations on fixtures at 1k / 5k / 10k steps. BENCHMARKS.md captures the 2026-04-22 baseline on Apple M4 Pro. Notable: `buildTree` at 10k steps is ~579 ms median (p95 1.8 s) — well over the 200 ms keystroke budget, and the primary thing #39 should improve. Manual Tauri webview procedure (render time, memory) is documented with an empty template for a human to fill after a DevTools session. Bumps toolpath-cli to 0.4.0 (additive public change: new binary). Fixture files are gitignored — regenerate locally. Addresses #41 --- .gitignore | 2 + CHANGELOG.md | 1 + Cargo.lock | 2 +- crates/toolpath-cli/Cargo.toml | 7 +- .../src/bin/gen_synthetic_path.rs | 327 ++++++++++++++++++ .../toolpath-desktop/frontend/BENCHMARKS.md | 151 ++++++++ crates/toolpath-desktop/frontend/package.json | 3 +- .../src/lib/__bench__/preview.bench.ts | 181 ++++++++++ .../toolpath-desktop/frontend/tsconfig.json | 3 +- site/_data/crates.json | 2 +- 10 files changed, 674 insertions(+), 5 deletions(-) create mode 100644 crates/toolpath-cli/src/bin/gen_synthetic_path.rs create mode 100644 crates/toolpath-desktop/frontend/BENCHMARKS.md create mode 100644 crates/toolpath-desktop/frontend/src/lib/__bench__/preview.bench.ts diff --git a/.gitignore b/.gitignore index 6786a40..dfbf9c4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ /local site/wasm/ .claude/worktrees/ +# Synthetic benchmark fixtures — generate locally via gen_synthetic_path. +/bench/fixtures/ diff --git a/CHANGELOG.md b/CHANGELOG.md index f388481..ef88355 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ All notable changes to the Toolpath workspace are documented here. - `toolpath-gemini` 0.1.0: new crate — reads Gemini CLI conversation logs from `~/.gemini/tmp//chats/`, implements `ConversationProvider`, and derives Toolpath `Path` documents. `PathResolver` supports both friendly-name (`projects.json`) and SHA-256 hash-slot layouts. Sub-agent chat files (`kind: "subagent"`) are folded into `DelegatedWork` on the parent `task` tool invocation, with `turns` populated from the sub-agent's messages. Polling-based `ConversationWatcher` (feature `watcher`, default on) emits `Turn` / `TurnUpdated` / `Progress { kind: "subagent_started" | "subagent_complete" }` events. Guarantees round-trip fidelity at the `ChatFile` layer via `Option>` for absent-vs-empty preservation, `GeminiRole::Other(String)` catch-all, `Option` on polymorphic `resultDisplay`, and `#[serde(flatten)] extra` at chat and message levels. 163 unit + 12 integration + 4 doc tests. - `toolpath-codex` 0.1.0: new crate — reads Codex CLI rollout JSONL from `~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`, implements `ConversationProvider`, and derives Toolpath `Path` documents. Maps the streaming `response_item` / `event_msg` model onto message-shaped `Turn`s: pairs `function_call` / `custom_tool_call` to outputs by `call_id`, buffers `reasoning` onto the next assistant turn, enriches tool invocations with `exec_command_end` stdout/exit code, and surfaces `patch_apply_end.changes` as sibling file artifacts carrying the real unified diff as the `raw` perspective. Non-turn rollout items (`session_meta`, `turn_context`, `task_started`, `task_complete`, `token_count`, etc.) preserved as `ConversationEvent`s for round-trip fidelity. Sessions are global (date-bucketed), not project-keyed; session id is either the UUIDv7 or the filename stem. 69 unit + 33 integration + 1 doc test. - `toolpath-opencode` 0.1.0: new crate — reads opencode's `~/.local/share/opencode/opencode.db` SQLite database (opened read-only via `rusqlite` with `SQLITE_OPEN_READ_ONLY` so it never interferes with a live opencode process), implements `ConversationProvider`, and derives Toolpath `Path` documents. Strongly types all 12 `part.data` variants (text, reasoning, tool, step-start/-finish, snapshot, patch, file, agent, subtask, retry, compaction) with `#[serde(other)]` catch-alls so new upstream variants round-trip. Each message becomes a step with tool invocations attached; reasoning folds onto `Turn.thinking`. Real unified diffs come from opencode's sibling bare git snapshot repositories via `git2` tree↔tree comparisons, honoring both the current `snapshot///` layout and the older `snapshot//` flat layout. Files under `.gitignore`d paths (which opencode never captures in its snapshot store) fall back to tool-input-derived structural changes with `source: "tool_input_gitignored"` labeling. Project id is the SHA of the repo's first root commit (stable across clones and renames). 43 unit + 1 doc test. +- `toolpath-cli` 0.4.0: adds `gen_synthetic_path` binary for generating synthetic `Path` fixtures at configurable step counts (bench support for toolpath-desktop Preview, see issue #41). - `toolpath-cli` 0.3.1: `path project claude` and `path incept` commands for projecting toolpath documents into Claude sessions; `derive gemini`/`list gemini`, `derive codex`/`list codex`, and `derive opencode [--session ID] [--all] [--project ID] [--no-snapshot-diffs]` / `list opencode [--project ID] [--json]` subcommands. - `toolpath-desktop` 0.1.0: new crate — Tauri 2 desktop app for non-technical users. Source discovery for Claude Code + Pi + local git + GitHub PRs; interactive DAG preview (d3 + dagre-d3, Svelte 5 + TypeScript frontend); local `.path.json` export; stubbed Pathbase upload. GitHub PAT stored in the OS keychain under `dev.pathbase.toolpath-desktop`. Hot-reloading dev loop via `cargo tauri dev` (spawns Vite on port 1420). diff --git a/Cargo.lock b/Cargo.lock index c6e91b6..5b2e009 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5075,7 +5075,7 @@ dependencies = [ [[package]] name = "toolpath-cli" -version = "0.3.1" +version = "0.4.0" dependencies = [ "anyhow", "assert_cmd", diff --git a/crates/toolpath-cli/Cargo.toml b/crates/toolpath-cli/Cargo.toml index e2d871b..01a5e05 100644 --- a/crates/toolpath-cli/Cargo.toml +++ b/crates/toolpath-cli/Cargo.toml @@ -1,17 +1,22 @@ [package] name = "toolpath-cli" -version = "0.3.1" +version = "0.4.0" edition.workspace = true license.workspace = true repository = "https://github.com/empathic/toolpath" description = "CLI for deriving, querying, and visualizing Toolpath provenance" keywords = ["provenance", "toolpath", "cli", "audit", "traceability"] categories = ["command-line-utilities", "development-tools"] +default-run = "path" [[bin]] name = "path" path = "src/main.rs" +[[bin]] +name = "gen_synthetic_path" +path = "src/bin/gen_synthetic_path.rs" + [dependencies] toolpath = { workspace = true } toolpath-git = { workspace = true } diff --git a/crates/toolpath-cli/src/bin/gen_synthetic_path.rs b/crates/toolpath-cli/src/bin/gen_synthetic_path.rs new file mode 100644 index 0000000..678fa0d --- /dev/null +++ b/crates/toolpath-cli/src/bin/gen_synthetic_path.rs @@ -0,0 +1,327 @@ +//! Synthetic `Path` generator for desktop Preview benchmarks (issue #41). +//! +//! Emits a `.path.json` with N steps that approximate a Claude session: +//! +//! - ~70% linear `conversation.append` turns alternating user / assistant +//! - ~20% Edit / Write tool invocations (sibling children of the assistant) +//! - ~10% MultiEdit tool invocations +//! +//! Steps are deterministic given a seed so benches are comparable across runs. +//! +//! Usage: +//! +//! ```text +//! cargo run -p toolpath-cli --bin gen_synthetic_path -- \ +//! --steps 10000 --out bench/fixtures/synthetic-10k.path.json +//! ``` +//! +//! The output is not intended to be semantically coherent — it's just big, +//! well-shaped JSON matching what the Preview's normalize/flattenTree code +//! actually walks. + +use std::collections::HashMap; +use std::fs; +use std::path::PathBuf; + +use anyhow::{Context, Result}; +use clap::Parser; +use rand::{Rng, SeedableRng, rngs::StdRng}; +use serde_json::{Value, json}; +use toolpath::v1::{ + ActorDefinition, ArtifactChange, Base, Document, Path, PathIdentity, PathMeta, Step, StepMeta, + StructuralChange, +}; + +#[derive(Parser, Debug)] +#[command(name = "gen_synthetic_path")] +#[command(about = "Generate a synthetic Toolpath Path document for benchmarking")] +struct Args { + /// Number of steps to generate. + #[arg(long, default_value_t = 1_000)] + steps: usize, + + /// Output file path (parent dirs are created). + #[arg(long)] + out: PathBuf, + + /// Deterministic seed. + #[arg(long, default_value_t = 42)] + seed: u64, +} + +const LOREM: &[&str] = &[ + "lorem ipsum dolor sit amet consectetur adipiscing elit", + "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", + "ut enim ad minim veniam quis nostrud exercitation ullamco laboris", + "duis aute irure dolor in reprehenderit in voluptate velit esse", + "excepteur sint occaecat cupidatat non proident sunt in culpa", + "at vero eos et accusamus et iusto odio dignissimos ducimus", + "nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit", + "sed ut perspiciatis unde omnis iste natus error sit voluptatem", +]; + +const TOOLS: &[(&str, f64)] = &[ + ("Edit", 0.50), + ("Write", 0.30), + ("MultiEdit", 0.20), +]; + +const FILES: &[&str] = &[ + "src/main.rs", + "src/lib.rs", + "src/server.rs", + "src/store.rs", + "src/routes/index.ts", + "src/routes/api.ts", + "src/lib/tree.ts", + "src/lib/viz.ts", + "Cargo.toml", + "package.json", + "README.md", +]; + +fn lorem_block(rng: &mut StdRng, sentences: usize) -> String { + (0..sentences) + .map(|_| LOREM[rng.random_range(0..LOREM.len())]) + .collect::>() + .join(". ") +} + +fn pick_tool(rng: &mut StdRng) -> &'static str { + let r: f64 = rng.random(); + let mut acc = 0.0; + for (name, w) in TOOLS { + acc += w; + if r < acc { + return name; + } + } + TOOLS[0].0 +} + +fn synth_diff(rng: &mut StdRng, path: &str) -> String { + let lines = rng.random_range(3..12); + let mut s = format!("--- a/{}\n+++ b/{}\n@@ -1,{} +1,{} @@\n", path, path, lines, lines); + for i in 0..lines { + if rng.random_bool(0.5) { + s.push_str(&format!("-old_line_{} = value;\n", i)); + s.push_str(&format!("+new_line_{} = value;\n", i)); + } else { + s.push_str(&format!(" context_line_{};\n", i)); + } + } + s +} + +fn assistant_step(i: usize, parent: Option<&str>, rng: &mut StdRng) -> Step { + let sentences = rng.random_range(1..5); + let text = lorem_block(rng, sentences); + let mut extra = HashMap::new(); + extra.insert("role".into(), Value::String("assistant".into())); + extra.insert("text".into(), Value::String(text)); + extra.insert("model".into(), Value::String("claude-opus-4-6".into())); + + let mut s = Step::new( + format!("step-{:06}", i), + "agent:claude-code", + format!("2026-04-22T10:{:02}:{:02}Z", (i / 60) % 60, i % 60), + ); + if let Some(p) = parent { + s = s.with_parent(p); + } + s.change.insert( + "conversation".into(), + ArtifactChange { + raw: None, + structural: Some(StructuralChange { + change_type: "conversation.append".into(), + extra, + }), + }, + ); + s.meta = Some(StepMeta { + intent: Some(format!("assistant turn {}", i)), + ..Default::default() + }); + s +} + +fn user_step(i: usize, parent: Option<&str>, rng: &mut StdRng) -> Step { + let sentences = rng.random_range(1..3); + let text = lorem_block(rng, sentences); + let mut extra = HashMap::new(); + extra.insert("role".into(), Value::String("user".into())); + extra.insert("text".into(), Value::String(text)); + + let mut s = Step::new( + format!("step-{:06}", i), + "human:bench", + format!("2026-04-22T10:{:02}:{:02}Z", (i / 60) % 60, i % 60), + ); + if let Some(p) = parent { + s = s.with_parent(p); + } + s.change.insert( + "conversation".into(), + ArtifactChange { + raw: None, + structural: Some(StructuralChange { + change_type: "conversation.append".into(), + extra, + }), + }, + ); + s +} + +fn tool_step(i: usize, parent: &str, rng: &mut StdRng) -> Step { + let name = pick_tool(rng); + let file = FILES[rng.random_range(0..FILES.len())]; + + let mut extra = HashMap::new(); + extra.insert("name".into(), Value::String(name.into())); + extra.insert("tool_id".into(), Value::String(format!("tool-{}", i))); + + let mut s = Step::new( + format!("step-{:06}", i), + "agent:claude-code", + format!("2026-04-22T10:{:02}:{:02}Z", (i / 60) % 60, i % 60), + ) + .with_parent(parent); + + // Conversation-side marker (the tool.invoke payload). + s.change.insert( + "conversation".into(), + ArtifactChange { + raw: None, + structural: Some(StructuralChange { + change_type: "tool.invoke".into(), + extra, + }), + }, + ); + + // File artifact with a raw diff (mirrors what `toolpath-convo` emits for + // file-write tools) — this is the payload `diff.raw.split("\n")` chews on. + let raw = synth_diff(rng, file); + let mut file_extra = HashMap::new(); + file_extra.insert("tool".into(), Value::String(name.into())); + file_extra.insert("tool_id".into(), Value::String(format!("tool-{}", i))); + s.change.insert( + file.into(), + ArtifactChange { + raw: Some(raw), + structural: Some(StructuralChange { + change_type: "file.write".into(), + extra: file_extra, + }), + }, + ); + + s +} + +fn main() -> Result<()> { + let args = Args::parse(); + let mut rng = StdRng::seed_from_u64(args.seed); + let n = args.steps; + + // Weighted mix: ~70% conversation turns (alternating user/assistant), + // ~20% Edit/Write, ~10% MultiEdit. Tool turns attach as sibling children + // of the previous assistant step — advancing HEAD stays on the + // conversation spine, matching the derived Claude shape the Preview was + // designed against. + let mut steps: Vec = Vec::with_capacity(n); + let mut head_id: Option = None; + let mut last_assistant_id: Option = None; + + // Seed with a user turn. + if n > 0 { + let s = user_step(0, None, &mut rng); + head_id = Some(s.step.id.clone()); + steps.push(s); + } + + for i in 1..n { + let r: f64 = rng.random(); + let parent = head_id.clone(); + if r < 0.30 { + // Tool invocation — sibling child of last assistant, does not + // advance HEAD. + if let Some(la) = &last_assistant_id { + let s = tool_step(i, la, &mut rng); + steps.push(s); + continue; + } + // No assistant yet — fall through to a regular conv turn. + } + // Alternate user/assistant on the spine. + if i % 2 == 1 { + let s = assistant_step(i, parent.as_deref(), &mut rng); + head_id = Some(s.step.id.clone()); + last_assistant_id = Some(s.step.id.clone()); + steps.push(s); + } else { + let s = user_step(i, parent.as_deref(), &mut rng); + head_id = Some(s.step.id.clone()); + steps.push(s); + } + } + + let head = head_id.unwrap_or_else(|| "step-000000".into()); + + let mut actors: HashMap = HashMap::new(); + actors.insert( + "agent:claude-code".into(), + ActorDefinition { + name: Some("Claude Code".into()), + provider: Some("anthropic".into()), + model: Some("claude-opus-4-6".into()), + ..Default::default() + }, + ); + actors.insert( + "human:bench".into(), + ActorDefinition { + name: Some("Bench User".into()), + ..Default::default() + }, + ); + + let path = Path { + path: PathIdentity { + id: format!("synthetic-{}-steps", n), + base: Some(Base { + uri: "file:///synthetic".into(), + ref_str: None, + }), + head, + }, + steps, + meta: Some(PathMeta { + title: Some(format!("Synthetic {}-step path", n)), + source: Some(format!("synthetic://seed={}", args.seed)), + actors: Some(actors), + extra: { + let mut m = HashMap::new(); + m.insert("bench".into(), json!({"seed": args.seed, "steps": n})); + m + }, + ..Default::default() + }), + }; + + let doc = Document::Path(path); + let json = doc.to_json()?; + if let Some(parent) = args.out.parent() { + fs::create_dir_all(parent).with_context(|| format!("creating {}", parent.display()))?; + } + fs::write(&args.out, &json).with_context(|| format!("writing {}", args.out.display()))?; + eprintln!( + "wrote {} ({} bytes, {} steps)", + args.out.display(), + json.len(), + n + ); + Ok(()) +} diff --git a/crates/toolpath-desktop/frontend/BENCHMARKS.md b/crates/toolpath-desktop/frontend/BENCHMARKS.md new file mode 100644 index 0000000..5cb00c2 --- /dev/null +++ b/crates/toolpath-desktop/frontend/BENCHMARKS.md @@ -0,0 +1,151 @@ +# toolpath-desktop Preview benchmarks + +Tracks the Preview's performance on synthetic `Path` fixtures at 1k / 5k / 10k +steps. Filed against [issue #41](https://github.com/empathic/toolpath/issues/41). +Rerun after [#38](https://github.com/empathic/toolpath/issues/38) (markdown +memoization) and [#39](https://github.com/empathic/toolpath/issues/39) +(buildTree dep narrowing) land to quantify the win. + +## What's measured + +Two layers, split by whether an agent can run them unattended: + +| Layer | Hot path | Where it runs | Automatable? | +|-------|----------|----------------|--------------| +| Pure TS | `normalize` / `buildTree` / `flattenChatHead` / `classify` / `matchesFilter` | Node-compatible, runs in `bun` | Yes — `bun run bench` | +| Tauri webview | `renderMarkdown` per turn, `diff.raw.split("\n")` per tool turn, whole-graph re-layout, DOM update | `cargo tauri dev` + real webview | No — manual Chrome DevTools | + +Pure-TS numbers are reproducible on any host. Render / memory numbers need a +human at a running Tauri binary because the cost is dominated by the +webview's layout/paint, which `bun` doesn't simulate. + +## Generating fixtures + +Fixtures are not committed (~5 MB at 10k steps, trivially regenerated). From +the repo root: + +```bash +cargo run -p toolpath-cli --bin gen_synthetic_path -- --steps 1000 --out bench/fixtures/synthetic-1k.path.json +cargo run -p toolpath-cli --bin gen_synthetic_path -- --steps 5000 --out bench/fixtures/synthetic-5k.path.json +cargo run -p toolpath-cli --bin gen_synthetic_path -- --steps 10000 --out bench/fixtures/synthetic-10k.path.json +``` + +Mix is ~70% conversation turns (alternating user/assistant), ~20% Edit/Write +tool invocations, ~10% MultiEdit — chosen to approximate a derived Claude +session. Seed is deterministic (default 42). + +## Running the pure-TS bench + +From `crates/toolpath-desktop/frontend`: + +```bash +bun install +bun run bench +# or a single fixture: +bun run src/lib/__bench__/preview.bench.ts --fixture ../../../bench/fixtures/synthetic-10k.path.json +``` + +Prints `median / p95 / max / mean` over 10 iterations per (size × op) cell. + +## Running the manual Tauri bench + +1. Generate the 10k fixture (steps above). +2. `cd crates/toolpath-desktop && cargo tauri dev`. +3. When the app opens, use **New upload → Local git → Pick** (or whatever + route gets you into Preview) on any real session; then in the dev + console, paste the fixture as a `PreviewSlice`: + ```js + // In DevTools console, with the app on the Preview route: + const raw = await fetch("file:///ABSOLUTE/PATH/TO/bench/fixtures/synthetic-10k.path.json").then(r => r.text()); + // Then dispatch a DeriveSucceeded msg via the store. Exact wiring TBD + // — the cleanest path is a hidden "load from file" debug Msg; for now + // just derive from a real large session and use that. + ``` + In practice the easier route is to derive from a real long Claude session + (e.g. `~/.claude/projects/` with 1000+ turns) and measure that + directly. Synthetic fixtures catch regressions in normalize / buildTree; + they don't exercise `renderMarkdown` the way real prose does. +4. Open **Chrome DevTools → Performance** (Tauri exposes Chromium devtools; + right-click → Inspect). +5. Click **Record**, interact (open Preview, type in the tree search, toggle + view mode between `chat` and `graph`), stop. +6. Read **Scripting**, **Rendering**, **Painting**, and **Total** columns + from the summary. +7. For memory: DevTools → **Memory → Take heap snapshot** before and after + opening the Preview. Subtract for the Document + parsed-DOM footprint. + +## Metrics table + +Fill in the columns as you run them. Leave `—` for "not measured yet"; +`N/A` for "not measurable in this environment". + +### Legend +- **TFP** — time to first paint of the Preview after `DeriveSucceeded` +- **Keystroke** — median render time after a keypress in the tree search box +- **Mem** — resident heap delta when Preview is open vs Home route + +### 2026-04-22 baseline (pre-#38, pre-#39) + +Host: Apple M4 Pro / Darwin 25.4 / bun 1.3.5 (Node v24 compat) / commit `eliot/issue-41-preview-benchmark` HEAD. + +Pure-TS ops, 10 iterations each: + +| Size | JSON.parse | normalize | buildTree (median) | buildTree (p95) | keystroke filter | flattenChatHead | classify × all | +|------|------------|-----------|---------------------|------------------|-------------------|------------------|-----------------| +| 1k | 1.16 ms | 0.23 ms | **3.98 ms** | 7.5 ms | 0.08 ms | 0.23 ms | 0.14 ms | +| 5k | 3.17 ms | 0.79 ms | **82.2 ms** | 113 ms | 0.43 ms | 1.20 ms | 0.47 ms | +| 10k | 6.32 ms | 2.15 ms | **579 ms** | 1830 ms | 1.12 ms | 5.34 ms | 1.49 ms | + +Tauri webview ops (measure manually, DevTools → Performance): + +| Size | TFP | Keystroke (DOM-updated) | Mem (Document + DOM) | +|------|-----|--------------------------|------------------------| +| 1k | — | — | — | +| 5k | — | — | — | +| 10k | — | — | — | + +### Notes on the 2026-04-22 baseline + +- `buildTree` at 10k is **way above** the 200 ms target from the issue + (median 579 ms, p95 1.8 s). Expected — the HEAD-ancestor walk inside + `normalize` and the DFS in `flattenTree` are both O(N) with per-node + allocations, and the variance suggests significant GC pressure. This is + the primary thing #39 should improve. +- `filter(matchesFilter)` itself is fine in pure TS (1 ms at 10k) — the + keystroke cost in the UI is dominated by `renderMarkdown` inside + `StepTree.svelte`'s re-render, not the filter. #38 should eliminate that. +- `flattenChatHead` is cheap (5.3 ms at 10k) because it only walks the HEAD + spine. +- `classify` is near-free; no need to memoize it unless #38 reveals it + inside a hot loop. +- Numbers are from `bun` running on Node-compatible mode, not a real + webview. Webview layout/paint and V8 JIT vs JavaScriptCore differ — treat + these as **lower bounds** for what the Preview actually experiences. + +### Post-#38 / post-#39 rerun + +Rerun `bun run bench` on this same branch layout after the fix PRs land and +fill the table below. Commit numbers alongside the reference to the merged +PR number. + +| Size | buildTree (median) | keystroke (DOM) | TFP (Tauri) | +|------|---------------------|------------------|--------------| +| 1k | — | — | — | +| 5k | — | — | — | +| 10k | — | — | — | + +## When to act + +The issue calls out **200 ms on a keystroke** as the threshold. A few rules +of thumb for reading the table: + +- Pure-TS `buildTree` > 100 ms at any size → probably visible as a freeze; + deserves a targeted fix (memoization, incremental update, or dep + narrowing — #39). +- Keystroke (DOM-updated) > 200 ms → real user-visible lag; deserves a + targeted fix (virtualized list, markdown memo — #38). +- Memory delta > ~200 MB for a 10k-step Document → investigate the parsed + DOM; consider lazy-rendering off-screen cards. + +Don't file sub-issues for known wins from #38 / #39 — they're already +tracked. diff --git a/crates/toolpath-desktop/frontend/package.json b/crates/toolpath-desktop/frontend/package.json index f8f71a2..d64cc35 100644 --- a/crates/toolpath-desktop/frontend/package.json +++ b/crates/toolpath-desktop/frontend/package.json @@ -8,7 +8,8 @@ "build": "vite build", "check": "svelte-check --tsconfig ./tsconfig.json", "test": "vitest run", - "test:watch": "vitest" + "test:watch": "vitest", + "bench": "bun run src/lib/__bench__/preview.bench.ts" }, "dependencies": { "@tauri-apps/api": "^2", diff --git a/crates/toolpath-desktop/frontend/src/lib/__bench__/preview.bench.ts b/crates/toolpath-desktop/frontend/src/lib/__bench__/preview.bench.ts new file mode 100644 index 0000000..ade6c6c --- /dev/null +++ b/crates/toolpath-desktop/frontend/src/lib/__bench__/preview.bench.ts @@ -0,0 +1,181 @@ +// Bench script for the Preview's pure-TS hot paths (issue #41). +// +// Runs against the synthetic fixtures generated by +// `cargo run -p toolpath-cli --bin gen_synthetic_path`. Prints median / +// p95 / max over N iterations per (size × operation) cell. Intended for +// tracking wins from #38 (markdown memo) and #39 (buildTree deps narrow). +// +// Usage (from `crates/toolpath-desktop/frontend`): +// +// bun run bench # default: all sizes it can find +// bun run src/lib/__bench__/preview.bench.ts --fixture ../../../bench/fixtures/synthetic-10k.path.json +// +// No external deps — just `performance.now()` and the modules under test. +// Render-time + memory measurements are the Tauri side and live in +// `BENCHMARKS.md` as a manual procedure. + +import { readFileSync } from "node:fs"; +import { resolve, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { classify } from "../classify"; +import { buildTree, flattenChatHead, matchesFilter } from "../tree"; +import { normalize } from "../viz"; +import type { Document } from "../types"; + +const ITERATIONS_PER_OP = 10; +const KEYSTROKE_QUERIES = ["step", "step-0", "agent", "claude", "bench", "xyz"]; + +interface Stats { + n: number; + median: number; + p95: number; + max: number; + mean: number; +} + +function stats(samples: number[]): Stats { + const sorted = [...samples].sort((a, b) => a - b); + const n = sorted.length; + const median = sorted[Math.floor(n / 2)]; + const p95 = sorted[Math.min(n - 1, Math.floor(n * 0.95))]; + const max = sorted[n - 1]; + const mean = sorted.reduce((a, b) => a + b, 0) / n; + return { n, median, p95, max, mean }; +} + +function fmt(ms: number): string { + if (ms < 1) return `${(ms * 1000).toFixed(0)}us`; + if (ms < 100) return `${ms.toFixed(2)}ms`; + return `${ms.toFixed(1)}ms`; +} + +function printRow(label: string, s: Stats): void { + console.log( + ` ${label.padEnd(36)}` + + `median=${fmt(s.median).padStart(8)} ` + + `p95=${fmt(s.p95).padStart(8)} ` + + `max=${fmt(s.max).padStart(8)} ` + + `mean=${fmt(s.mean).padStart(8)} (n=${s.n})`, + ); +} + +function time(fn: () => T): [number, T] { + const t0 = performance.now(); + const out = fn(); + const t1 = performance.now(); + return [t1 - t0, out]; +} + +function benchFixture(fixturePath: string): void { + const raw = readFileSync(fixturePath, "utf8"); + const parseStart = performance.now(); + const doc = JSON.parse(raw) as Document; + const parseMs = performance.now() - parseStart; + + const stepCount = + "Path" in doc ? doc.Path.steps.length + : "Graph" in doc ? doc.Graph.paths.flatMap((p) => ("$ref" in p ? [] : p.steps)).length + : "Step" in doc ? 1 + : 0; + + console.log(""); + console.log(`── ${fixturePath} (${stepCount} steps, ${raw.length.toLocaleString()} bytes)`); + console.log(` JSON.parse: ${fmt(parseMs)}`); + + // normalize + const normSamples: number[] = []; + for (let i = 0; i < ITERATIONS_PER_OP; i++) { + const [ms] = time(() => normalize(doc)); + normSamples.push(ms); + } + printRow("normalize(doc)", stats(normSamples)); + + // buildTree (normalize + flattenTree) + const buildSamples: number[] = []; + for (let i = 0; i < ITERATIONS_PER_OP; i++) { + const [ms] = time(() => buildTree(doc)); + buildSamples.push(ms); + } + printRow("buildTree(doc)", stats(buildSamples)); + + // Warm a tree once for keystroke + classify benches. + const { nodes, norm } = buildTree(doc); + + // matchesFilter — per-keystroke simulation over all nodes, "all" filter. + const keystrokeSamples: number[] = []; + for (let i = 0; i < ITERATIONS_PER_OP; i++) { + const q = KEYSTROKE_QUERIES[i % KEYSTROKE_QUERIES.length]; + const [ms] = time(() => nodes.filter((n) => matchesFilter(n, q, "all"))); + keystrokeSamples.push(ms); + } + printRow("filter(matchesFilter) per keystroke", stats(keystrokeSamples)); + + // flattenChatHead — HEAD-chain walk used by the transcript view. + const chatSamples: number[] = []; + for (let i = 0; i < ITERATIONS_PER_OP; i++) { + const [ms] = time(() => flattenChatHead(norm)); + chatSamples.push(ms); + } + printRow("flattenChatHead(norm)", stats(chatSamples)); + + // classify — per-step metadata extraction, run across every step. + const classifySamples: number[] = []; + for (let i = 0; i < ITERATIONS_PER_OP; i++) { + const [ms] = time(() => { + for (const s of norm.steps) classify(s); + }); + classifySamples.push(ms); + } + printRow("classify × all steps", stats(classifySamples)); +} + +// ── Entry point ────────────────────────────────────────────────────────── + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const repoRoot = resolve(__dirname, "../../../../../.."); + +function defaultFixtures(): string[] { + return [ + "bench/fixtures/synthetic-1k.path.json", + "bench/fixtures/synthetic-5k.path.json", + "bench/fixtures/synthetic-10k.path.json", + ] + .map((p) => resolve(repoRoot, p)) + .filter((p) => { + try { + readFileSync(p); + return true; + } catch { + return false; + } + }); +} + +function main(): void { + const argv = process.argv.slice(2); + const fixtureArgIdx = argv.indexOf("--fixture"); + const fixtures = + fixtureArgIdx >= 0 && argv[fixtureArgIdx + 1] + ? [resolve(process.cwd(), argv[fixtureArgIdx + 1])] + : defaultFixtures(); + + if (fixtures.length === 0) { + console.error( + "No fixtures found. Generate them first:\n" + + " cargo run -p toolpath-cli --bin gen_synthetic_path -- --steps 1000 --out bench/fixtures/synthetic-1k.path.json\n" + + " cargo run -p toolpath-cli --bin gen_synthetic_path -- --steps 5000 --out bench/fixtures/synthetic-5k.path.json\n" + + " cargo run -p toolpath-cli --bin gen_synthetic_path -- --steps 10000 --out bench/fixtures/synthetic-10k.path.json\n", + ); + process.exit(1); + } + + console.log(`Preview bench — ${ITERATIONS_PER_OP} iterations per op`); + console.log(`Node/Bun: ${process.version ?? "unknown"}`); + + for (const f of fixtures) benchFixture(f); + console.log(""); +} + +main(); diff --git a/crates/toolpath-desktop/frontend/tsconfig.json b/crates/toolpath-desktop/frontend/tsconfig.json index 3a73fdf..d409648 100644 --- a/crates/toolpath-desktop/frontend/tsconfig.json +++ b/crates/toolpath-desktop/frontend/tsconfig.json @@ -15,5 +15,6 @@ "lib": ["ES2022", "DOM", "DOM.Iterable"], "types": ["svelte", "vite/client"] }, - "include": ["src/**/*.ts", "src/**/*.svelte"] + "include": ["src/**/*.ts", "src/**/*.svelte"], + "exclude": ["src/lib/__bench__/**"] } diff --git a/site/_data/crates.json b/site/_data/crates.json index b4f9c55..f17693b 100644 --- a/site/_data/crates.json +++ b/site/_data/crates.json @@ -89,7 +89,7 @@ }, { "name": "toolpath-cli", - "version": "0.3.1", + "version": "0.4.0", "description": "Unified CLI (binary: path)", "docs": "https://docs.rs/toolpath-cli", "crate": "https://crates.io/crates/toolpath-cli",