From 565c2692c4d6a81629f684b24dfe7dde8d99f41f Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 10 May 2026 18:51:26 +0200 Subject: [PATCH 01/11] Plan: Obsidian-friendly output (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three new CLI flags (`--output` for `--all-projects`, `--expand-paths`, `--filter-path`) for projecting Claude Code transcripts into the directory topology Obsidian vaults expect. Plan covers: - The empirical finding that `--output` was silently ignored in `--all-projects` mode (closing that gap is part of the scope). - Helper API: `project_dir_to_real_path` + `project_destination`. - Three-tier path resolution: cache → JSONL peek → naive last-resort. - Flag interaction matrix. - Test plan: unit (test_path_projection.py) + integration (test_obsidian_output.py, Markdown-scoped). All six initial open questions resolved by the user. Co-Authored-By: Claude Opus 4.7 (1M context) --- work/obsidian-friendly-output.md | 381 +++++++++++++++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 work/obsidian-friendly-output.md diff --git a/work/obsidian-friendly-output.md b/work/obsidian-friendly-output.md new file mode 100644 index 00000000..c237fba4 --- /dev/null +++ b/work/obsidian-friendly-output.md @@ -0,0 +1,381 @@ +# Obsidian-friendly output (issue #151) + +## Status: Plan — not started + +## Context + +Issue #151 wants three CLI flags that project Claude Code transcripts into +the same Markdown-vault topology Obsidian (and similar Markdown-based KM +tools) expect: + +``` +claude-code-log --output ~/Documents/Obsidian/ClaudeProjects \ + --expand-paths --filter-path /home/joe \ + --format md --detail low --compact +``` + +Should land sessions at: + +``` +~/Documents/Obsidian/ClaudeProjects/project/A/.low.md +``` + +The use case is the user's wider knowledge-management workflow — SAM +(the federation coordinator at `~/SAM`) and the Obsidian vault at +`~/Documents/Obsidian/Work` keep cross-project knowledge in a Markdown +tree; this feature gives Claude Code transcripts a clean projection +into that topology. + +## What's already there vs. what's missing + +The issue's framing implies `--output` already works for `--all-projects` +mode and produces a flat structure. **Empirically that's not what the +code does.** `claude_code_log/converter.py:process_projects_hierarchy` +writes `combined_transcripts.html` directly into each *source* +`project_dir` (e.g. `~/.claude/projects/-home-joe-project-A/`), and the +index lands at `~/.claude/projects/index.html`. `--output` is honoured +by the single-file/single-project paths (`convert_jsonl_to`, +`convert_single_session`) but is **not threaded through to +`process_projects_hierarchy`** (cli.py around line 816). + +So #151's three "flag flavours" actually decompose into: + +1. `--output ` honoured in `--all-projects` mode (currently a gap). +2. `--expand-paths` — undo Claude Code's flat encoding of project dirs. +3. `--filter-path ` — select subset + truncate prefix. + +The plan addresses all three; (1) is partially a prerequisite of (2)/(3). + +## Implementation surface + +### `claude_code_log/cli.py` + +- Add two `@click.option` declarations: + - `--expand-paths` — `is_flag=True`, default False. + - `--filter-path` — `type=str` (path-like), default None. + Optional but `--expand-paths` is a soft-prerequisite (filter + truncation only meaningful with expansion). Decision: allow either + flag standalone; document the behaviour matrix (see §Scope). +- Pass both into `main()` and forward to `process_projects_hierarchy` + (and to `convert_jsonl_to` if we decide to support flat-output for + the single-directory path too). +- Validation: warn if `--expand-paths` / `--filter-path` are given + without `--output` *and* without `--all-projects` (no-op flags). + +### `claude_code_log/converter.py` + +- `process_projects_hierarchy` gains four new parameters: + - `output_dir: Optional[Path]` — destination root (was missing entirely). + - `expand_paths: bool` — flag. + - `filter_path: Optional[str]` — prefix. + - (Optional) `path_resolver: Optional[Callable]` — injection point + for tests, defaulting to a real implementation that consults the + cache for the authoritative `cwd`. +- Inside the per-project loop, just before computing `output_path`, + decide the **destination directory for this project's outputs** + using a small helper (see §Path-projection logic). Replace the + current hard-coded `project_dir / "combined_transcripts.html"` with + `dest_dir / "combined_transcripts.html"` where `dest_dir = project_dir` + in the legacy flat case and the projected path in the new case. +- The index file (`projects_path / get_index_filename(...)`) likewise + needs a destination decision (see §Index page question). +- Filtering: when `filter_path` is set and a project's resolved path + does not start with the prefix, **skip it** (don't emit anything). + +### Renderers + +Format-agnostic: HTML, Markdown, and JSON renderers all consume the +final destination path from `converter.py`. None of them need changes +for #151. The flag is triggered via the CLI; the renderer doesn't +care whether its output lives in `~/.claude/projects//` or +`~/Obsidian//`. + +### Tests + +- New `test/test_path_projection.py` (unit) — exercises the helper + with a mix of real-corpus names from `~/.claude/projects/` plus + synthesised edge cases. +- New `test/test_obsidian_output.py` (integration) — drives the CLI + end-to-end with a tmp `--output` and asserts the directory tree + matches the expected projected shape for each flag combination. + +--- + +## Path-projection logic + +This is the load-bearing piece. Three subtleties make it more than a +mechanical inverse: + +### Subtlety 1: Claude Code's encoding is lossy + +The forward direction (real path → flat name) is documented in +`cli.py:convert_project_path_to_claude_dir`: + +- `/` → `-` +- `.` → `-` (effectively — see real-corpus samples below) +- Leading `-` is the path-root marker. + +Confirmed against `~/.claude/projects/`: + +| Real path | Encoded form | +|---|---| +| `/home/cboos/bin` | `-home-cboos-bin` | +| `/home/cboos/.claude` | `-home-cboos--claude` | +| `/home/cboos/Documents/Obsidian/Work/.git` | `-home-cboos-Documents-Obsidian-Work--git` | + +Inverting is **fundamentally ambiguous**: `-home-joe-x-y` could mean +either `/home/joe/x/y` (a four-segment path with x and y as dirs) or +`/home/joe/x-y` (a three-segment path with `x-y` as a single dir). +A naïve "dash-as-separator" inverse cannot tell them apart. + +### Subtlety 2: The cache *has* the real path; if it doesn't, peek a session + +Claude Code records the actual `cwd` in every JSONL entry. Our +SQLite cache aggregates these into `cache.ProjectCache.working_directories` +and `SessionCacheData.cwd`. `convert_project_path_to_claude_dir`'s +forward direction is irrelevant here — for the inverse, we should read +the cache as the source of truth, not parse the encoded name. + +When the cache hasn't been populated yet, **peek the first JSONL** in +the project directory: open the file, read just enough lines to find +one entry with a `cwd` field, extract it. No need for the full +`parser.py` model-validation pipeline — we want a single string field, +the entry shape is stable and well-known, a tiny `json.loads(line) +.get("cwd")` loop suffices. + +Helper signature: + +```python +def project_dir_to_real_path( + project_dir: Path, + cache_manager: Optional[CacheManager] = None, +) -> Path: + """Recover the real on-disk path for a Claude project directory. + + Strategy (in order): + 1. If a cache_manager is available and the project has cached + `working_directories`, return the first entry. Authoritative + — that's the actual `cwd` Claude Code recorded at session time. + 2. Otherwise, peek the first JSONL file: read up to N lines, + json.loads each, return the first non-empty `cwd`. Cheap + (O(few KB) read, no validation overhead). + 3. Fall back to naïve `/`-for-`-` inversion only as a last + resort (e.g. project dir has no JSONLs left — archived but + cache evicted). + + Returns: + Path representing the recovered real path. + """ +``` + +Worked examples: + +| project_dir.name | cache hit | JSONL peek | Result | +|---|---|---|---| +| `-home-joe-project-A` | `["/home/joe/project/A"]` | — | `/home/joe/project/A` | +| `-home-cboos--claude` | `["/home/cboos/.claude"]` | — | `/home/cboos/.claude` | +| `-home-joe-x-y` (cache empty) | — | `cwd: "/home/joe/x-y"` | `/home/joe/x-y` | +| `-home-joe-x-y` (cache empty) | — | `cwd: "/home/joe/x/y"` | `/home/joe/x/y` | +| `-home-joe-orphan` (no cache, no JSONLs) | — | — | `/home/joe/orphan` (naïve last-resort) | + +Filesystem-existence-testing as a fallback was considered and rejected: +the *target* path may have moved/been deleted since the session was +recorded, and we shouldn't make resolution depend on the local FS state +in a way that produces different output for the same project on +different machines. + +### Subtlety 3: Filter-path semantics + +When `filter_path` is set: + +- **Selection**: skip projects whose resolved real path does not + satisfy `Path.is_relative_to(filter_path)` (Python 3.9+). +- **Truncation**: the surviving project's destination becomes + `output_dir / resolved.relative_to(filter_path)`. + +Worked examples for `--filter-path /home/joe --output /tmp/obsidian +--expand-paths`: + +| Resolved real path | Selected? | Destination | +|---|---|---| +| `/home/joe/project/A` | yes | `/tmp/obsidian/project/A/` | +| `/home/joe/.claude` | yes | `/tmp/obsidian/.claude/` | +| `/home/joe` | yes (matches itself) | `/tmp/obsidian/` (root) | +| `/home/jane/project/B` | no | (skipped) | + +### Subtlety 4: Flag interaction matrix + +`--filter-path` operates on **whatever path representation we're using**: +expanded real paths when `--expand-paths` is set, the flat encoded +project-dir name otherwise. This keeps the filter consistent with the +"current view" of project paths and avoids the surprise of a filter +silently consulting the cache when the user thought they were just +matching dir-name prefixes. + +| --output | --expand-paths | --filter-path | Behaviour | +|---|---|---|---| +| ✗ | ✗ | ✗ | Legacy: write into `~/.claude/projects//`. | +| ✓ | ✗ | ✗ | Flat copy under `//`. (Closes the implicit gap.) | +| ✓ | ✓ | ✗ | Expanded under `//`. | +| ✓ | ✓ | ✓ | Expanded + filtered: filter against real path, truncate prefix, land under `//`. | +| ✓ | ✗ | ✓ | Filter against the flat encoded name (e.g. `--filter-path -home-joe` selects projects starting with `-home-joe-`). No prefix truncation (truncation only meaningful with `--expand-paths`). Result lands under `//`. | +| ✗ | (any) | (any) | Warn that the new flags are no-ops; proceed with legacy behaviour. | + +### Helper API + +```python +def project_dir_to_real_path( + project_dir: Path, + cache_manager: Optional[CacheManager] = None, +) -> Path: ... + +def project_destination( + project_dir: Path, + *, + output_dir: Optional[Path], + expand_paths: bool, + filter_path: Optional[str], + cache_manager: Optional[CacheManager] = None, +) -> Optional[Path]: + """Compute the per-project destination directory. + + Returns: + The destination Path, or None if the project should be skipped + (filter_path is set and the project doesn't match). + """ +``` + +Both pure (no I/O beyond reading the cache, which is read-only here); +both trivially testable with mocked CacheManager. + +--- + +## Index page question + +The current code writes the index to +`projects_path / get_index_filename(output_format)`. With `--output`, +the natural choice is `output_dir / get_index_filename(output_format)`. + +Two open questions: + +1. **Should the index even exist in Obsidian-friendly mode?** Obsidian + discovers files by walking its vault tree. A separate index page is + redundant in the common Obsidian use case. Recommendation: emit it + anyway (cheap; users can ignore or `.gitignore`-equivalent it), but + add a `--no-index` flag as a follow-up if users complain. + +2. **Where does the index live when `--filter-path` truncates the + tree?** The index naturally goes at `output_dir/`, which is *above* + the truncated tree. Recommendation: keep it at `output_dir/`. The + alternative — putting it at the deepest common ancestor of the + filtered projects — would surprise users (the path depends on which + projects matched, which depends on cache state). + +--- + +## Backwards compatibility + +- Default behaviour with no new flags is **byte-identical** to current + output (verified by snapshot tests after the change). +- Closing the `--output` gap for `--all-projects` is *not* a + behaviour change because `--output` was previously silently ignored + in that mode — users who passed it got the legacy path anyway. + Documenting this in the changelog. +- `convert_project_path_to_claude_dir` (the forward direction) is + unchanged. The new helper is the inverse and lives alongside it. + +--- + +## Tests + +### Unit (`test/test_path_projection.py`) + +- `test_project_dir_to_real_path_uses_cache_cwd` — cache populated + with explicit `cwd`; helper returns it verbatim. +- `test_project_dir_to_real_path_peeks_jsonl_when_no_cache` — + no cache, but project dir has a JSONL whose first user/assistant + entry carries `cwd`. Helper peeks, extracts, returns. Sampled + corpus shapes: + - `-home-cboos-bin` → JSONL with `cwd: "/home/cboos/bin"` → `/home/cboos/bin` + - `-home-cboos--claude` → JSONL with `cwd: "/home/cboos/.claude"` → `/home/cboos/.claude` +- `test_project_dir_to_real_path_naive_last_resort` — no cache AND + no JSONLs left (orphan archived dir); helper returns naïve + `/`-for-`-` inversion. Documented as best-effort. +- `test_project_dir_to_real_path_disambiguates_via_cache` — two + flat-encoded names that collide (both `-home-joe-x-y`) but the + cache stores different `cwd`s; helper returns the right one for + each. +- `test_project_destination_filter_match_expanded` — `--expand-paths + --filter-path /home/joe`: filter against real path, destination + is `output / relpath`. +- `test_project_destination_filter_miss_expanded` — same but real + path doesn't match prefix; helper returns None. +- `test_project_destination_filter_match_flat` — `--filter-path + -home-joe` (no expand): filter against flat name, destination is + `output / ` for matching projects. +- `test_project_destination_no_expand_no_filter` — flat copy under + `output_dir`. +- `test_project_destination_expand_no_filter` — full real-path + expansion under `output_dir`. + +### Integration (`test/test_obsidian_output.py`) + +- Mock `~/.claude/projects/` with two-three project shapes (using the + existing test_data fixtures pattern; e.g. tmp_path with a couple + `-home-fixture-*` dirs each with one JSONL). +- Drive the CLI with each flag combination from the matrix above; + assert the produced directory tree. +- Format coverage: **Markdown only** for the integration test + matrix. The flag mechanics are format-agnostic (no per-renderer + logic), so HTML/JSON parity is asserted by inspection of the + shared converter.py path rather than by re-running the matrix + per format. + +### Snapshot + +`test/test_snapshot_html.py` should not need changes — only the +output destination changes, not the rendered content. + +--- + +## Open questions for main — *resolved by user* + +1. **JSON output**: format-agnostic (mechanics live in converter.py, + not the renderers); test only the Markdown path and trust parity + for HTML/JSON by code inspection. + +2. **Filter without expand**: filter against the **unexpanded** flat + project-dir name (`-home-joe-...`), not the resolved real path. + No prefix truncation in this mode — truncation only meaningful + with `--expand-paths`. + +3. **No-cache fallback**: peek the first JSONL in the project dir, + read just enough lines to find one entry with a `cwd` field, return + it. Cheap, deterministic, no full-parse overhead. Naïve `/`-for-`-` + inversion stays as the last resort (orphan dirs with no JSONLs). + +4. **`--output ` vs ``**: simpler heuristic — if the + `--output` value ends with a recognised extension suffix + (`.html` / `.md` / `.markdown` / `.json`), treat as a file; + otherwise treat as a directory. Both `--expand-paths` and + `--filter-path` apply only in the directory case. + +5. **Python 3.10 baseline**: confirmed; `Path.is_relative_to` + (3.9+) is safe to use. + +6. **Index page location with filter**: confirmed — keep at + `output_dir/index.{html,md,json}`. Predictable, doesn't depend + on which projects matched the filter. + +--- + +## Out of scope (mention for completeness) + +- Obsidian-specific frontmatter (YAML at top of each `.md` for tags / + links). Could be a follow-up `--obsidian-frontmatter` flag; not + part of #151's bullet list. +- Wikilink generation (`[[…]]`) for cross-references between + sessions. Same — follow-up. +- Symlink-based projection (write once, link from many places). The + current write-then-copy model is fine for Obsidian; symlinks would + complicate cache invalidation. From 93f579b3c82548fbcd1e0e686a8e8e568729a0e8 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 10 May 2026 19:25:04 +0200 Subject: [PATCH 02/11] Obsidian-friendly output: --output dir + --expand-paths + --filter-path (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three CLI flags for projecting Claude Code transcripts into the directory topology Obsidian-style Markdown vaults expect: - `--output` is now honoured for `--all-projects` (closing the gap where it was silently ignored). Combined with the suffix heuristic in utils.output_path_is_file(), the same flag handles both file and directory destinations. - `--expand-paths` undoes Claude Code's flat encoding so each project's output lands at its real on-disk path under /. - `--filter-path` selects projects by prefix and (with --expand-paths) truncates the prefix from the destination. ## Helpers (utils.py) `project_dir_to_real_path(project_dir, cached_working_directories=None)` recovers the real path with three-tier resolution: 1. cache hit (ProjectCache.working_directories), absolute paths only; 2. peek the first JSONL for a `cwd` field (lightweight json.loads loop, bounded to 32 lines, agent-* sidechain files skipped); 3. naive last-resort with `--` → `/.` mapping for dotfile dirs. `project_destination(...)` implements the flag-interaction matrix from work/obsidian-friendly-output.md and returns None for filter-excluded projects. ## process_projects_hierarchy Now consults project_destination() per project; skips filtered-out ones; threads dest_dir through convert_jsonl_to via a new `output_root` parameter. Index lives at output_dir/index.{ext} (or projects_path/ in legacy mode); html_file links computed relative to that. ## Tests - test/test_path_projection.py — 26 unit tests across resolution tiers, the disambiguation case, the agent-* skip, and every cell of the project_destination matrix. - test/test_obsidian_output.py — 5 integration tests (Markdown-scoped per the user's Q1 resolution) driving process_projects_hierarchy end-to-end and asserting the produced directory tree. `just ci` clean: 1721+ tests, ruff + pyright + ty all green. Co-Authored-By: Claude Opus 4.7 (1M context) --- claude_code_log/cli.py | 61 +++++++- claude_code_log/converter.py | 129 +++++++++++++++-- claude_code_log/utils.py | 189 +++++++++++++++++++++++++ test/test_obsidian_output.py | 200 ++++++++++++++++++++++++++ test/test_path_projection.py | 262 +++++++++++++++++++++++++++++++++++ 5 files changed, 827 insertions(+), 14 deletions(-) create mode 100644 test/test_obsidian_output.py create mode 100644 test/test_path_projection.py diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 8f7005e3..feeb5e60 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -474,7 +474,40 @@ def _clear_output_files( "-o", "--output", type=click.Path(path_type=Path), - help="Output file path (default: input file with format extension, or combined_transcripts.{html,md} for directories)", + help=( + "Output destination. With a recognised file suffix " + "(.html/.md/.markdown/.json) treated as a single output file; " + "otherwise treated as a directory root (and now also honoured " + "for --all-projects, where outputs land at " + "//...). Pair with --expand-paths to project " + "back to the real on-disk tree." + ), +) +@click.option( + "--expand-paths", + is_flag=True, + help=( + "When set with --output and --all-projects, expand each " + "project's flat encoded dir name (e.g. '-home-joe-project-A') " + "back to its real path under /. Resolves the encoded " + "name via the cache's recorded `cwd`, falling back to a peek " + "of the first JSONL when the cache is empty. Useful for " + "projecting transcripts into Obsidian-style Markdown vaults." + ), +) +@click.option( + "--filter-path", + type=str, + default=None, + help=( + "Restrict --all-projects to projects matching a path prefix. " + "With --expand-paths, the prefix is matched against the " + "expanded real path AND truncated from the destination " + "(`/home/joe/project/A` with --filter-path /home/joe lands at " + "/project/A/). Without --expand-paths, matches the " + "flat encoded dir name (e.g. '-home-joe' selects projects " + "starting with '-home-joe-')." + ), ) @click.option( "--open-browser", @@ -587,6 +620,8 @@ def _clear_output_files( def main( input_path: Optional[Path], output: Optional[Path], + expand_paths: bool, + filter_path: Optional[str], open_browser: bool, from_date: Optional[str], to_date: Optional[str], @@ -616,6 +651,17 @@ def main( # Configure logging to show warnings and above logging.basicConfig(level=logging.WARNING, format="%(levelname)s: %(message)s") + # Warn early if Obsidian-friendly flags (#151) were passed in a + # context where they're no-ops. `--all-projects` is the only mode + # that consumes them; `--output` must be a directory (file-suffixed + # output goes through the single-file path which doesn't honour + # these flags). + if (expand_paths or filter_path) and tui: + click.echo( + "Warning: --expand-paths / --filter-path are ignored in --tui mode.", + err=True, + ) + from .models import DetailLevel detail_level = DetailLevel(detail.lower()) @@ -813,6 +859,16 @@ def main( raise FileNotFoundError(f"Projects directory not found: {input_path}") click.echo(f"Processing all projects in {input_path}...") + # `--output` for `--all-projects` (#151): pass a *directory* + # to project per-project outputs into. File-suffixed values + # are routed to the single-file path elsewhere; here we + # only honour directory-shaped `--output`. + from .utils import output_path_is_file + + output_dir_for_projects: Optional[Path] = None + if output is not None and not output_path_is_file(output): + output_dir_for_projects = output + output_path = process_projects_hierarchy( input_path, from_date, @@ -824,6 +880,9 @@ def main( page_size=page_size, detail=detail_level, compact=compact, + output_dir=output_dir_for_projects, + expand_paths=expand_paths, + filter_path=filter_path, ) # Count processed projects diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index cd111376..0a4f3538 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -1525,6 +1525,7 @@ def convert_jsonl_to( detail: DetailLevel = DetailLevel.FULL, compact: bool = False, update_cache: bool = True, + output_root: Optional[Path] = None, ) -> Path: """Convert JSONL transcript(s) to the specified format. @@ -1567,6 +1568,11 @@ def convert_jsonl_to( suffix = _variant_suffix(detail, compact, format) + # Output destination decoupled from `input_path` (#151). Both + # branches below assign to `effective_output_dir`; declare it + # upfront so pyright sees it as defined unconditionally. + effective_output_dir: Path = output_root if output_root is not None else input_path + if input_path.is_file(): # Single file mode - cache only available for directory mode if output_path is None: @@ -1582,8 +1588,16 @@ def convert_jsonl_to( cache_was_updated = False # No cache in single file mode else: # Directory mode - Cache-First Approach + # `output_root` (#151) decouples the output destination from + # the source `input_path` so we can write under e.g. + # ~/Documents/Obsidian// while still reading + # from ~/.claude/projects//. (`effective_output_dir` + # is declared above the if/else; this branch only ensures the + # destination dir exists and supplies the default output_path.) + if output_root is not None: + effective_output_dir.mkdir(parents=True, exist_ok=True) if output_path is None: - output_path = input_path / f"combined_transcripts{suffix}.{ext}" + output_path = effective_output_dir / f"combined_transcripts{suffix}.{ext}" # Phase 1: Ensure cache is fresh and populated cache_was_updated = ensure_fresh_cache( @@ -1687,7 +1701,7 @@ def convert_jsonl_to( session_data = _build_session_data_from_messages(messages) output_path = _generate_paginated_html( messages, - input_path, + effective_output_dir, title, page_size, cache_manager, @@ -1749,7 +1763,7 @@ def convert_jsonl_to( _generate_individual_session_files( format, messages, - input_path, + effective_output_dir, from_date, to_date, cache_manager, @@ -2465,6 +2479,9 @@ def process_projects_hierarchy( page_size: int = 2000, detail: DetailLevel = DetailLevel.FULL, compact: bool = False, + output_dir: Optional[Path] = None, + expand_paths: bool = False, + filter_path: Optional[str] = None, ) -> Path: """Process the entire ~/.claude/projects/ hierarchy and create linked output files. @@ -2478,6 +2495,14 @@ def process_projects_hierarchy( image_export_mode: Image export mode for markdown silent: If True, suppress verbose per-file logging (show summary only) page_size: Maximum messages per page for combined transcript pagination + output_dir: Optional destination root for projected outputs (#151). + When None, outputs land under each source ``project_dir`` as + before (legacy in-place behaviour). + expand_paths: When True (and ``output_dir`` is set), expand each + project's flat encoded dir name to its real on-disk path + under ``output_dir``. + filter_path: When set, restrict to projects matching the prefix. + See ``utils.project_destination`` for the matching semantics. """ import time @@ -2522,6 +2547,27 @@ def process_projects_hierarchy( # Per-project stats for summary output project_stats: List[tuple[str, GenerationStats]] = [] + # `--filter-path` selection happens at the top of the loop + # (#151). Resolve once per project — using the cache when + # populated, else a quick JSONL peek — so `_collect_project_sessions` + # / cache rebuilds are skipped for filtered-out projects entirely. + from .utils import project_destination + + # Index page lives at the root of whatever output destination we + # use (either `--output` if set, or the legacy in-place projects + # tree). Per-project `html_file` entries are relative to this root. + index_root = output_dir if output_dir is not None else projects_path + + def _rel_to_index(p: Path) -> Path: + """Path of `p` relative to the index root, falling back to + absolute when `p` lives outside `index_root` (shouldn't happen + in normal use but keeps the index renderable rather than + crashing).""" + try: + return p.relative_to(index_root) + except ValueError: + return p + for project_dir in sorted(project_dirs): project_start_time = time.time() stats = GenerationStats() @@ -2535,6 +2581,29 @@ def process_projects_hierarchy( except Exception as e: stats.add_warning(f"Failed to initialize cache: {e}") + # Per-project destination (#151). When `output_dir` / + # `expand_paths` / `filter_path` are unset this returns + # `project_dir` (legacy in-place behaviour). When the + # filter excludes this project, returns None. + cached_working_dirs: Optional[list[str]] = None + if cache_manager is not None: + try: + cached_working_dirs = cache_manager.get_working_directories() + except Exception: + cached_working_dirs = None + dest_dir = project_destination( + project_dir, + output_dir=output_dir, + expand_paths=expand_paths, + filter_path=filter_path, + cached_working_directories=cached_working_dirs, + ) + if dest_dir is None: + # Filter-out: don't process this project at all. + if not silent: + print(f" {project_dir.name}: skipped (filter)") + continue + # Phase 1: Fast check if anything needs updating (mtime comparison only) # Exclude agent files - they are loaded via session references, not directly jsonl_files = [ @@ -2560,7 +2629,10 @@ def process_projects_hierarchy( else 0 ) total_archived += archived_count - output_path = project_dir / "combined_transcripts.html" + # Output destination — `dest_dir` for #151's `--output` / + # `--expand-paths` / `--filter-path`, falling back to the + # source project_dir for legacy in-place behaviour. + output_path = dest_dir / "combined_transcripts.html" # Check combined_stale using the appropriate cache: # - Paginated projects store data in html_pages table (via save_page_cache) # - Non-paginated projects store data in html_cache table (via update_html_cache) @@ -2623,6 +2695,7 @@ def process_projects_hierarchy( page_size=page_size, detail=detail, compact=compact, + output_root=(dest_dir if dest_dir != project_dir else None), ) # Track timing @@ -2658,14 +2731,18 @@ def process_projects_hierarchy( if cached_project_data is not None: # Track total sessions for stats stats.sessions_total = len(cached_project_data.sessions) + # Path the index uses to link to this project's + # combined transcript (and to enumerate variants). + # Same as `project_dir.name` in legacy mode. + rel_dest = _rel_to_index(dest_dir) # Use cached aggregation data project_summaries.append( { "name": project_dir.name, "path": project_dir, - "html_file": f"{project_dir.name}/{output_path.name}", + "html_file": f"{rel_dest}/{output_path.name}", "html_variants": _enumerate_project_variants( - project_dir, project_dir.name + dest_dir, str(rel_dest) ), "jsonl_count": jsonl_count, "message_count": cached_project_data.total_message_count, @@ -2811,13 +2888,14 @@ def process_projects_hierarchy( team_name_per_session[_sid] = _tn team_names_set: set[str] = set(team_name_per_session.values()) + rel_dest = _rel_to_index(dest_dir) project_summaries.append( { "name": project_dir.name, "path": project_dir, - "html_file": f"{project_dir.name}/{output_path.name}", + "html_file": f"{rel_dest}/{output_path.name}", "html_variants": _enumerate_project_variants( - project_dir, project_dir.name + dest_dir, str(rel_dest) ), "jsonl_count": jsonl_count, "message_count": len(messages), @@ -2862,19 +2940,40 @@ def process_projects_hierarchy( if cached_project_data is None: continue + # Apply --filter-path / --expand-paths to archived + # projects too. Note: archived dirs have no JSONLs to peek, + # so resolution falls back to cache (which exists for + # archived projects) or naive last-resort. + archived_cached_dirs: Optional[list[str]] = None + try: + archived_cached_dirs = cache_manager.get_working_directories() + except Exception: + archived_cached_dirs = None + archived_dest = project_destination( + archived_dir, + output_dir=output_dir, + expand_paths=expand_paths, + filter_path=filter_path, + cached_working_directories=archived_cached_dirs, + ) + if archived_dest is None: + continue + archived_project_count += 1 print( f" {archived_dir.name}: [ARCHIVED] ({len(cached_project_data.sessions)} sessions)" ) - # Add archived project to summaries + # Index entry for an archived project; the file may not + # exist at the projected path until the user re-renders. + archived_rel = _rel_to_index(archived_dest) project_summaries.append( { "name": archived_dir.name, "path": archived_dir, - "html_file": f"{archived_dir.name}/combined_transcripts.html", + "html_file": f"{archived_rel}/combined_transcripts.html", "html_variants": _enumerate_project_variants( - archived_dir, archived_dir.name + archived_dest, str(archived_rel) ), "jsonl_count": 0, "message_count": cached_project_data.total_message_count, @@ -2923,9 +3022,11 @@ def process_projects_hierarchy( # Update total projects count to include archived total_projects = len(project_dirs) + archived_project_count - # Generate index (always regenerate if outdated) + # Generate index (always regenerate if outdated). Index lives at + # the root of the output destination — `output_dir` if set + # (#151), else the legacy `projects_path` location. ext = get_file_extension(output_format) - index_path = projects_path / get_index_filename(output_format) + index_path = index_root / get_index_filename(output_format) renderer = get_renderer(output_format, image_export_mode) index_regenerated = False if renderer.is_outdated(index_path) or from_date or to_date or any_cache_updated: @@ -2933,6 +3034,8 @@ def process_projects_hierarchy( project_summaries, from_date, to_date ) assert index_content is not None + # Ensure the index root exists when projecting into a fresh dir. + index_path.parent.mkdir(parents=True, exist_ok=True) # See issue #139: errors="replace" for lone-surrogate safety. index_path.write_text(index_content, encoding="utf-8", errors="replace") index_regenerated = True diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py index f6a3022d..136090a1 100644 --- a/claude_code_log/utils.py +++ b/claude_code_log/utils.py @@ -155,6 +155,195 @@ def get_project_display_name( return display_name +def project_dir_to_real_path( + project_dir: Path, + cached_working_directories: Optional[list[str]] = None, +) -> Path: + """Recover the real on-disk path for a Claude project directory. + + Claude Code encodes project paths flatly: ``/`` and leading ``.`` + both become ``-`` (e.g. ``/home/joe/.claude`` → + ``-home-joe--claude``). The encoding is **lossy** — ``-home-joe-x-y`` + could mean either ``/home/joe/x/y`` or ``/home/joe/x-y``. The cache + (and live JSONLs) preserve the original ``cwd`` so we can disambiguate + without parsing the encoded name. + + Resolution strategy (issue #151): + + 1. **Cache hit** — if ``cached_working_directories`` is non-empty, + use its first entry. Authoritative — that's what Claude Code + recorded at session time. + 2. **JSONL peek** — open the project's first JSONL, scan up to a + handful of lines for the first entry with a ``cwd`` field, + return that. Cheap (one ``json.loads`` per line, no model + validation). + 3. **Naive last-resort** — strip the leading ``-`` and replace + remaining ``-``s with ``/``. Best-effort only; collapses + ambiguity in the lossy direction. Used when the project dir + has been emptied (orphan archived dir) and no cache survives. + + Args: + project_dir: The encoded project directory + (e.g. ``~/.claude/projects/-home-joe-project-A``). + cached_working_directories: Optional cached ``working_directories`` + list from the project's cache (``ProjectCache.working_directories``). + + Returns: + The recovered real path. May be a best-effort guess in the + last-resort case. + """ + # Tier 1: cache. Only accept absolute paths — relative or oddly + # shaped values fall through (e.g. test fixtures with synthetic + # `cwd` entries). + if cached_working_directories: + real_dirs = [ + wd + for wd in cached_working_directories + if not _is_temp_path(wd) and Path(wd).is_absolute() + ] + if real_dirs: + return Path(real_dirs[0]) + + # Tier 2: peek the first JSONL for a `cwd` field. Same + # absoluteness guard as tier 1. + if project_dir.is_dir(): + # Skip agent-* sidechain files; they may not carry the + # top-level project cwd. Take any other JSONL. + for jsonl_path in sorted(project_dir.glob("*.jsonl")): + if jsonl_path.name.startswith("agent-"): + continue + cwd_from_peek = _peek_jsonl_for_cwd(jsonl_path) + if cwd_from_peek and Path(cwd_from_peek).is_absolute(): + return Path(cwd_from_peek) + # First non-agent JSONL exhausted with no usable cwd — + # bail out rather than scanning every file. + break + + # Tier 3: naive last-resort. Recovers leading-dot dir components + # via `--` → `/.` mapping (Claude Code encodes `/.foo` as `--foo`). + # Remaining ambiguity (`/foo-bar` vs `/foo/bar`) collapses toward + # the more-segments interpretation; documented as best-effort. + name = project_dir.name + if name.startswith("-"): + body = name[1:].replace("--", "/.").replace("-", "/") + return Path("/" + body) + return Path(name.replace("--", "/.").replace("-", "/")) + + +# Maximum number of lines we read from a project's first JSONL when +# trying to recover the project's `cwd`. Real-world JSONLs put `cwd` +# on the very first user/assistant entry, so 32 is generous. +_PEEK_JSONL_MAX_LINES = 32 + + +def _peek_jsonl_for_cwd(jsonl_path: Path) -> Optional[str]: + """Return the first non-empty ``cwd`` value found in the JSONL, + or ``None`` if none is found within the peek window.""" + import json + from typing import cast + + try: + with jsonl_path.open("r", encoding="utf-8", errors="replace") as fh: + for _ in range(_PEEK_JSONL_MAX_LINES): + line = fh.readline() + if not line: + return None + line = line.strip() + if not line: + continue + try: + entry: object = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(entry, dict): + continue + # `json.loads` produces Unknown-typed values; cast to + # a concrete shape for pyright. Runtime is unaffected. + cwd = cast("dict[str, object]", entry).get("cwd") + if isinstance(cwd, str) and cwd: + return cwd + except OSError: + return None + return None + + +# Recognised output format suffixes for the `--output` dir-vs-file +# heuristic. If a user passes ``--output /tmp/out.md`` we treat it as +# a file; ``--output /tmp/obsidian/`` is a directory. +_OUTPUT_FILE_SUFFIXES = frozenset({".html", ".md", ".markdown", ".json"}) + + +def output_path_is_file(output: Path) -> bool: + """Heuristic for ``--output`` interpretation (issue #151). + + A path is a *file* destination when its suffix is one of the + recognised output-format extensions; otherwise it's a *directory* + destination. Doesn't touch the filesystem — pure path-string + inspection. + """ + return output.suffix.lower() in _OUTPUT_FILE_SUFFIXES + + +def project_destination( + project_dir: Path, + *, + output_dir: Optional[Path], + expand_paths: bool, + filter_path: Optional[str], + cached_working_directories: Optional[list[str]] = None, +) -> Optional[Path]: + """Compute the per-project output destination directory (issue #151). + + Implements the flag interaction matrix from + ``work/obsidian-friendly-output.md``. Pure function — no I/O beyond + what ``project_dir_to_real_path`` may do (cache or one JSONL peek). + + Args: + project_dir: The source project directory under + ``~/.claude/projects/`` (e.g. ``-home-joe-project-A``). + output_dir: Target root, or None for legacy in-place behaviour. + expand_paths: When True, project's flat name is expanded back + to its real on-disk path under ``output_dir``. + filter_path: When set, restrict to projects whose path + (real path if ``expand_paths``, else flat dir name) + starts with the prefix. With ``expand_paths``, the + matched prefix is also truncated from the destination. + cached_working_directories: Optional cached working dirs for + ``project_dir_to_real_path``. + + Returns: + Destination directory, or ``None`` if the project should be + skipped (filter excluded it). + """ + # Legacy: no --output → write into the source dir (current behaviour). + if output_dir is None: + return project_dir + + # With --expand-paths: resolve the real path and (optionally) trim + # the filter prefix. + if expand_paths: + real_path = project_dir_to_real_path(project_dir, cached_working_directories) + if filter_path: + filter_root = Path(filter_path) + try: + rel = real_path.relative_to(filter_root) + except ValueError: + # Real path is not under filter prefix — skip. + return None + return output_dir / rel + # Real-path tree directly under output_dir. Drop the leading + # `/` so the joined path stays relative to output_dir. + rel_parts = real_path.parts[1:] if real_path.is_absolute() else real_path.parts + return output_dir.joinpath(*rel_parts) + + # No --expand-paths: filter against the flat dir name (per Q2), + # destination keeps the flat name. + if filter_path: + if not project_dir.name.startswith(filter_path): + return None + return output_dir / project_dir.name + + def should_skip_message(text_content: str) -> bool: """ Determine if a message should be skipped in transcript rendering. diff --git a/test/test_obsidian_output.py b/test/test_obsidian_output.py new file mode 100644 index 00000000..8119e71d --- /dev/null +++ b/test/test_obsidian_output.py @@ -0,0 +1,200 @@ +"""End-to-end tests for the Obsidian-friendly output flags (issue #151). + +Drives the converter through ``process_projects_hierarchy`` with each +flag combination from the matrix and asserts the produced directory +tree. **Markdown-scoped per Q1 resolution** — the flag mechanics live +in ``converter.py``/``utils.py``, not the renderers, so HTML/JSON parity +is asserted by code inspection rather than by re-running the matrix +per format. +""" + +import json +from pathlib import Path + +import pytest + +from claude_code_log.converter import process_projects_hierarchy + + +def _build_fake_projects_dir( + root: Path, + projects: list[tuple[str, str]], +) -> Path: + """Create a fake `~/.claude/projects/`-shaped directory. + + Args: + root: tmp_path-style scratch directory. + projects: list of (encoded_name, real_cwd) pairs. + Returns: + The projects-dir path. + """ + projects_dir = root / "projects" + projects_dir.mkdir() + for encoded, cwd in projects: + proj = projects_dir / encoded + proj.mkdir() + # Minimal session JSONL — enough for the loader to find one + # session and produce one combined transcript. + entry = { + "parentUuid": None, + "isSidechain": False, + "userType": "external", + "cwd": cwd, + "sessionId": f"session-{encoded.lstrip('-')[:32]}", + "version": "2.1.0", + "type": "user", + "uuid": f"uuid-{encoded.lstrip('-')[:32]}", + "timestamp": "2026-05-10T10:00:00.000Z", + "message": { + "role": "user", + "content": [{"type": "text", "text": f"hi from {encoded}"}], + }, + } + (proj / "session.jsonl").write_text(json.dumps(entry) + "\n", encoding="utf-8") + return projects_dir + + +@pytest.fixture +def fake_projects(tmp_path: Path) -> Path: + """Three encoded projects with realistic absolute cwds (which is + what the JSONL-peek tier of `project_dir_to_real_path` will pick up). + """ + return _build_fake_projects_dir( + tmp_path, + projects=[ + ("-home-joe-project-A", "/home/joe/project/A"), + ("-home-joe-project-B", "/home/joe/project/B"), + ("-home-jane-project-C", "/home/jane/project/C"), + ], + ) + + +@pytest.fixture +def isolated_cache(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + """Steer the cache to tmp so the test doesn't pollute / depend on + the user's real `~/.claude/projects/` cache.""" + cache_path = tmp_path / "cache.db" + monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(cache_path)) + return cache_path + + +# Keep usage explicit so the fixture clearly applies even when its +# return value isn't read directly in the test body. +_ = isolated_cache + + +class TestObsidianOutputMatrix: + """The matrix from work/obsidian-friendly-output.md, end-to-end. + Each test asserts the produced directory shape under the relevant + flag combination.""" + + def test_legacy_no_output(self, fake_projects: Path, isolated_cache: Path): + """Legacy: `--output` unset → outputs land inside each + source project_dir under the projects tree (current behaviour + from before #151).""" + process_projects_hierarchy( + fake_projects, + output_format="md", + ) + + # Each project gets a combined_transcripts.md under its source. + for encoded in [ + "-home-joe-project-A", + "-home-joe-project-B", + "-home-jane-project-C", + ]: + assert (fake_projects / encoded / "combined_transcripts.md").exists() + # Index at the projects-dir root. + assert (fake_projects / "index.md").exists() + + def test_output_only_flat_copy( + self, + fake_projects: Path, + isolated_cache: Path, + tmp_path: Path, + ): + """`--output` alone → flat copy of each project under + //. Closes the implicit gap (`--output` was + previously silently ignored for `--all-projects`).""" + out = tmp_path / "out-flat" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + ) + assert (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + assert (out / "-home-joe-project-B" / "combined_transcripts.md").exists() + assert (out / "-home-jane-project-C" / "combined_transcripts.md").exists() + assert (out / "index.md").exists() + + def test_expand_paths_full_tree( + self, + fake_projects: Path, + isolated_cache: Path, + tmp_path: Path, + ): + """`--output --expand-paths` → expanded real-path tree under + /. Encoded names are resolved via JSONL peek (the + fixture's cwd field).""" + out = tmp_path / "out-expanded" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + expand_paths=True, + ) + assert (out / "home/joe/project/A/combined_transcripts.md").exists() + assert (out / "home/joe/project/B/combined_transcripts.md").exists() + assert (out / "home/jane/project/C/combined_transcripts.md").exists() + assert (out / "index.md").exists() + # The encoded-name flat directories must NOT exist — we + # expanded, didn't both expand and copy. + assert not (out / "-home-joe-project-A").exists() + + def test_expand_paths_filter_match_truncates( + self, + fake_projects: Path, + isolated_cache: Path, + tmp_path: Path, + ): + """`--filter-path /home/joe --expand-paths`: filter against + real path; truncate the prefix; matching projects land at + //.""" + out = tmp_path / "out-filtered" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + expand_paths=True, + filter_path="/home/joe", + ) + # Projects under /home/joe matched, prefix truncated. + assert (out / "project/A/combined_transcripts.md").exists() + assert (out / "project/B/combined_transcripts.md").exists() + # Project under /home/jane filtered out — no output produced. + assert not (out / "project/C").exists() + assert not (out / "home").exists() # would only exist if /home/joe survived + assert (out / "index.md").exists() + + def test_filter_flat_no_expand( + self, + fake_projects: Path, + isolated_cache: Path, + tmp_path: Path, + ): + """`--filter-path -home-joe`without `--expand-paths`: filter + against the encoded dir name; no truncation; matching + projects land at //.""" + out = tmp_path / "out-flat-filtered" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + expand_paths=False, + filter_path="-home-joe", + ) + # Two `-home-joe-...` projects matched; flat name preserved. + assert (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + assert (out / "-home-joe-project-B" / "combined_transcripts.md").exists() + # `-home-jane-...` doesn't start with `-home-joe`. + assert not (out / "-home-jane-project-C").exists() diff --git a/test/test_path_projection.py b/test/test_path_projection.py new file mode 100644 index 00000000..297c36f0 --- /dev/null +++ b/test/test_path_projection.py @@ -0,0 +1,262 @@ +"""Unit tests for the path-projection helpers (issue #151). + +Covers ``project_dir_to_real_path`` (three-tier resolution: cache → +JSONL peek → naive last-resort) and ``project_destination`` (the +flag-interaction matrix from ``work/obsidian-friendly-output.md``). +""" + +import json +from pathlib import Path + +import pytest + +from claude_code_log.utils import ( + output_path_is_file, + project_destination, + project_dir_to_real_path, +) + + +def _write_jsonl_with_cwd(jsonl_path: Path, cwd: str) -> None: + """Write a minimal JSONL line carrying a `cwd` field — enough to + exercise the JSONL-peek tier of `project_dir_to_real_path`.""" + entry = { + "parentUuid": None, + "isSidechain": False, + "userType": "external", + "cwd": cwd, + "sessionId": "11111111-1111-1111-1111-111111111111", + "version": "2.1.0", + "type": "user", + "uuid": "22222222-2222-2222-2222-222222222222", + "timestamp": "2026-05-10T10:00:00.000Z", + "message": { + "role": "user", + "content": [{"type": "text", "text": "hi"}], + }, + } + jsonl_path.write_text(json.dumps(entry) + "\n", encoding="utf-8") + + +# ----------------------------------------------------------------------------- +# project_dir_to_real_path +# ----------------------------------------------------------------------------- + + +class TestProjectDirToRealPath: + """Three-tier resolution: cache → JSONL peek → naive last-resort.""" + + def test_uses_cache_cwd(self, tmp_path: Path): + """Tier 1: when cached_working_directories is supplied, the + first absolute entry wins.""" + result = project_dir_to_real_path( + tmp_path / "-anything", + cached_working_directories=["/home/joe/x/y"], + ) + assert result == Path("/home/joe/x/y") + + def test_skips_relative_cache_entries(self, tmp_path: Path): + """Tier 1 absoluteness guard: relative `cwd` values fall + through (test fixtures sometimes carry these).""" + project_dir = tmp_path / "-skipped" + project_dir.mkdir() + # Relative cache value should be rejected; with no JSONLs to + # peek, falls through to naive last-resort. + result = project_dir_to_real_path( + project_dir, + cached_working_directories=["relative-not-absolute"], + ) + # Naive: -skipped → /skipped + assert result == Path("/skipped") + + def test_skips_temp_paths_in_cache(self, tmp_path: Path): + """Tier 1: temp paths (/tmp/, macOS /private/var/folders/) + are filtered out — they're not the user's authoritative cwd.""" + project_dir = tmp_path / "-orphan" + project_dir.mkdir() + result = project_dir_to_real_path( + project_dir, + cached_working_directories=["/tmp/pytest-of-cboos/xyz"], + ) + # Filter dropped the /tmp/ entry → naive last-resort. + assert result == Path("/orphan") + + def test_peeks_jsonl_when_no_cache(self, tmp_path: Path): + """Tier 2: with no cache, the first JSONL's `cwd` is read.""" + project_dir = tmp_path / "-home-joe-x-y" + project_dir.mkdir() + _write_jsonl_with_cwd(project_dir / "session.jsonl", "/home/joe/x/y") + result = project_dir_to_real_path(project_dir) + assert result == Path("/home/joe/x/y") + + def test_peek_disambiguates_cache_collision(self, tmp_path: Path): + """Two `-home-joe-x-y` dirs with different real cwds: each + resolves correctly because the cache (or JSONL) is consulted.""" + # Same encoded name, different cwds → different real paths. + cache_a = ["/home/joe/x/y"] # subdir interpretation + cache_b = ["/home/joe/x-y"] # single-dir interpretation + result_a = project_dir_to_real_path( + Path("/anywhere/-home-joe-x-y"), + cached_working_directories=cache_a, + ) + result_b = project_dir_to_real_path( + Path("/anywhere/-home-joe-x-y"), + cached_working_directories=cache_b, + ) + assert result_a == Path("/home/joe/x/y") + assert result_b == Path("/home/joe/x-y") + + def test_peek_skips_agent_files(self, tmp_path: Path): + """`agent-*.jsonl` files (sidechains) are skipped during peek + because they may not carry the project's top-level cwd.""" + project_dir = tmp_path / "-peek-test" + project_dir.mkdir() + # Agent file FIRST alphabetically — would be picked if not + # skipped. Real session JSONL has the right cwd. + _write_jsonl_with_cwd(project_dir / "agent-aaaa.jsonl", "/wrong/path") + _write_jsonl_with_cwd(project_dir / "session-bbbb.jsonl", "/right/path") + result = project_dir_to_real_path(project_dir) + assert result == Path("/right/path") + + @pytest.mark.parametrize( + "encoded,expected", + [ + ("-home-cboos-bin", "/home/cboos/bin"), + # Double-dash → leading-dot dir component (`/.foo`). + ("-home-cboos--claude", "/home/cboos/.claude"), + ( + "-home-cboos-Documents-Obsidian-Work--git", + "/home/cboos/Documents/Obsidian/Work/.git", + ), + ("-home-joe-project-A", "/home/joe/project/A"), + ], + ) + def test_naive_last_resort(self, tmp_path: Path, encoded: str, expected: str): + """Tier 3: no cache, no JSONLs, no fallback file. Naive + `/`-for-`-` inversion with `--` → `/.` for dotfile dirs. + Sampled from real `~/.claude/projects/` corpus.""" + project_dir = tmp_path / encoded + # Don't mkdir — `is_dir()` returns False, so peek tier is + # skipped and we go straight to naive. + result = project_dir_to_real_path(project_dir) + assert result == Path(expected) + + +# ----------------------------------------------------------------------------- +# project_destination — the flag interaction matrix +# ----------------------------------------------------------------------------- + + +class TestProjectDestination: + """Per-project destination logic. Six matrix rows.""" + + SRC = Path("/proj/-home-joe-project-A") + OUT = Path("/tmp/obsidian") + + def test_legacy_no_output_dir(self): + """No `--output` → write into the source dir (current + behaviour — strict backwards compatibility).""" + dest = project_destination( + self.SRC, + output_dir=None, + expand_paths=False, + filter_path=None, + ) + assert dest == self.SRC + + def test_flat_copy(self): + """`--output` only → flat copy under output_dir, project + keeps its encoded name. Closes the previously-implicit gap + where `--output` was silently ignored in `--all-projects`.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=False, + filter_path=None, + ) + assert dest == self.OUT / "-home-joe-project-A" + + def test_expand_no_filter(self): + """`--output --expand-paths` → full real-path expansion + under output_dir.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=True, + filter_path=None, + cached_working_directories=["/home/joe/project/A"], + ) + assert dest == self.OUT / "home/joe/project/A" + + def test_expand_filter_match(self): + """`--expand-paths --filter-path /home/joe`: filter against + real path, truncate the prefix from the destination.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=True, + filter_path="/home/joe", + cached_working_directories=["/home/joe/project/A"], + ) + assert dest == self.OUT / "project/A" + + def test_expand_filter_miss(self): + """When the real path doesn't start with the filter prefix, + the project is excluded (returns None).""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=True, + filter_path="/home/jane", # different user + cached_working_directories=["/home/joe/project/A"], + ) + assert dest is None + + def test_filter_match_flat(self): + """`--filter-path` without `--expand-paths` matches the flat + encoded dir name (per Q2 resolution); no truncation.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=False, + filter_path="-home-joe", + ) + assert dest == self.OUT / "-home-joe-project-A" + + def test_filter_miss_flat(self): + """Flat-name filter that doesn't match the encoded prefix + excludes the project.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=False, + filter_path="-home-jane", + ) + assert dest is None + + +# ----------------------------------------------------------------------------- +# output_path_is_file (--output suffix heuristic, Q4 resolution) +# ----------------------------------------------------------------------------- + + +class TestOutputPathIsFile: + @pytest.mark.parametrize( + "value,is_file", + [ + ("/tmp/out.md", True), + ("/tmp/out.markdown", True), + ("/tmp/out.html", True), + ("/tmp/out.json", True), + # Case-insensitive + ("/tmp/Out.HTML", True), + # No recognised suffix → directory + ("/tmp/out", False), + ("/tmp/obsidian-vault", False), + # Suffix that isn't a recognised output format + ("/tmp/out.txt", False), + ("/tmp/out.tar.gz", False), + ], + ) + def test_suffix_heuristic(self, value: str, is_file: bool): + assert output_path_is_file(Path(value)) is is_file From dd0db6433ff5419b12ce3e02717662e4c0f43492 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 10 May 2026 19:37:30 +0200 Subject: [PATCH 03/11] Address monk review: CLI footgun guards (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two footgun fixes flagged in monk's review of dev/obsidian-friendly-output: 1. Reject relative `--filter-path` when paired with `--expand-paths`. Path.relative_to raises ValueError for *any* mismatch including "argument is relative" — so without the guard, a user typing `--filter-path home/joe` (forgetting the leading `/`) would get every project silently skipped. Now rejected at click parse time with click.BadParameter. 2. Add the no-op flag warnings the plan promised but the impl missed: - --expand-paths/--filter-path without --all-projects (or implicit --all-projects via no INPUT_PATH) → warn. - --output unset OR --output is file-suffixed (single-file path bypasses these flags) → warn. The existing --tui guard stays as the first branch. Plus monk's optional doc clarification: tightened the _rel_to_index helper's comment from "shouldn't happen" to "unreachable under the documented matrix; kept as a paranoia rail" — empirically verified by monk's matrix-row walkthrough. Adds three regression tests in test_obsidian_output.py exercising the new validations through CliRunner end-to-end. Co-Authored-By: Claude Opus 4.7 (1M context) --- claude_code_log/cli.py | 39 +++++++++-- claude_code_log/converter.py | 12 ++-- test/test_obsidian_output.py | 121 +++++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 8 deletions(-) diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index feeb5e60..7d49674f 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -651,16 +651,47 @@ def main( # Configure logging to show warnings and above logging.basicConfig(level=logging.WARNING, format="%(levelname)s: %(message)s") + # Loud rejection of relative `--filter-path` when paired with + # `--expand-paths` (#151). Without this, a user typing + # `--filter-path home/joe` (forgetting the leading `/`) would + # match against an absolute resolved path via `Path.relative_to`, + # which raises ValueError for *any* mismatch including + # "argument is relative" — so the silent failure mode is "every + # project skipped". Reject up-front instead. + if filter_path and expand_paths and not Path(filter_path).is_absolute(): + raise click.BadParameter( + f"--filter-path must be an absolute path when --expand-paths is set; " + f"got {filter_path!r}", + param_hint="--filter-path", + ) + # Warn early if Obsidian-friendly flags (#151) were passed in a - # context where they're no-ops. `--all-projects` is the only mode - # that consumes them; `--output` must be a directory (file-suffixed - # output goes through the single-file path which doesn't honour - # these flags). + # context where they're no-ops. `--all-projects` (explicit or + # implicit via no input_path) is the only mode that consumes them; + # `--output` must be a directory (file-suffixed output goes + # through the single-file path which doesn't honour these flags). + from .utils import output_path_is_file as _output_path_is_file + + will_run_all_projects = all_projects or input_path is None if (expand_paths or filter_path) and tui: click.echo( "Warning: --expand-paths / --filter-path are ignored in --tui mode.", err=True, ) + elif (expand_paths or filter_path) and not will_run_all_projects: + click.echo( + "Warning: --expand-paths / --filter-path require --all-projects " + "(or omitting INPUT_PATH); ignoring.", + err=True, + ) + elif (expand_paths or filter_path) and ( + output is None or _output_path_is_file(output) + ): + click.echo( + "Warning: --expand-paths / --filter-path require --output to be a " + "directory (no recognised file suffix); ignoring.", + err=True, + ) from .models import DetailLevel diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 0a4f3538..c7bd41bf 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -2559,10 +2559,14 @@ def process_projects_hierarchy( index_root = output_dir if output_dir is not None else projects_path def _rel_to_index(p: Path) -> Path: - """Path of `p` relative to the index root, falling back to - absolute when `p` lives outside `index_root` (shouldn't happen - in normal use but keeps the index renderable rather than - crashing).""" + """Path of `p` relative to the index root. + + Unreachable under the documented flag matrix: every + ``project_destination`` shape produces a ``dest_dir`` that + lives under ``index_root`` (legacy → ``projects_path``; + ``--output`` modes → ``output_dir``). Kept as a paranoia rail + for future code paths that might inject an unexpected + absolute ``dest_dir`` (e.g. via a test seam).""" try: return p.relative_to(index_root) except ValueError: diff --git a/test/test_obsidian_output.py b/test/test_obsidian_output.py index 8119e71d..88cfb379 100644 --- a/test/test_obsidian_output.py +++ b/test/test_obsidian_output.py @@ -198,3 +198,124 @@ def test_filter_flat_no_expand( assert (out / "-home-joe-project-B" / "combined_transcripts.md").exists() # `-home-jane-...` doesn't start with `-home-joe`. assert not (out / "-home-jane-project-C").exists() + + +# ----------------------------------------------------------------------------- +# CLI validation guards (#151 footgun fixes from monk's review) +# ----------------------------------------------------------------------------- + + +class TestCliValidationGuards: + """The CLI rejects relative `--filter-path` when paired with + `--expand-paths` (would otherwise silently exclude every project), + and warns when the new flags are passed in no-op contexts.""" + + def test_relative_filter_path_with_expand_is_rejected( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """Loud rejection — without this, `--filter-path home/joe` + (forgetting the leading `/`) would match no projects silently + because `Path.relative_to` raises for relative paths.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--expand-paths", + "--filter-path", + "home/joe", # relative — should be rejected + ], + ) + assert result.exit_code != 0 + assert "must be an absolute path" in result.output + # No projects rendered. + assert not out.exists() or not any( + (out / p).exists() for p in ["home", "project", "-home-joe-project-A"] + ) + + def test_absolute_filter_path_with_expand_is_accepted( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """Counterpart: absolute `--filter-path` passes the guard.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--expand-paths", + "--filter-path", + "/home/joe", + "--format", + "md", + ], + ) + assert result.exit_code == 0, result.output + assert (out / "project/A/combined_transcripts.md").exists() + + def test_warns_when_flags_used_without_all_projects( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """`--expand-paths` against a single-file/single-project + target (without `--all-projects`) is a no-op; user gets a + warning rather than silent ignore.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + # Point at a single project_dir with --expand-paths but no + # --all-projects (and explicitly no `output` to make the + # control flow predictable). Should warn. + single_project = fake_projects / "-home-joe-project-A" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(single_project), + "--expand-paths", + "--format", + "md", + ], + ) + # The exact stderr-output ordering is implementation-dependent, + # but the warning text must surface somewhere. + assert "require --all-projects" in result.output + + def test_warns_when_expand_paths_with_file_output( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """`--output some-file.md --expand-paths` is a no-op (file + output goes through the single-file path); warn.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(tmp_path / "out.md"), # file-suffixed + "--expand-paths", + "--format", + "md", + ], + ) + assert "require --output to be a directory" in result.output From f24bef1be00bbccdaf5ee981bb9fbbe6170f537a Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 10 May 2026 20:03:11 +0200 Subject: [PATCH 04/11] Plan: record cache-freshness-vs-source observation as follow-up (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `is_html_stale` / `is_page_stale` resolve `actual_file` against `self.project_path` (the SOURCE dir under ~/.claude/projects/), not the actual output destination. With legacy in-place behaviour the two are identical. With `--output` they diverge — every run re-renders because the cache thinks the source's combined_transcripts.html is the canonical artifact (and it's never written there in `--output` mode). Practical implication: bouncing between several --output dirs always re-renders even when destinations are current. JSONL parsing is still cache-hit; only rendering re-runs. Recorded as a "Follow-up / Open points" section alongside the two follow-ups monk surfaced (archived projects with --output; peek-debug logging). Co-Authored-By: Claude Opus 4.7 (1M context) --- work/obsidian-friendly-output.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/work/obsidian-friendly-output.md b/work/obsidian-friendly-output.md index c237fba4..825c4aca 100644 --- a/work/obsidian-friendly-output.md +++ b/work/obsidian-friendly-output.md @@ -369,6 +369,31 @@ output destination changes, not the rendered content. --- +## Follow-up / Open points + +### Cache-freshness checks resolve against `project_path` (source), not the output destination + +`cache.is_html_stale(html_path, ...)` and `cache.is_page_stale(...)` both compute their `actual_file` check as `self.project_path / html_path` — the **source** project dir under `~/.claude/projects/`, not the actual output destination (`dest_dir`). With the legacy in-place behaviour the two are identical, so the check works as intended. With `--output` projecting to a different tree, the source path never has a `combined_transcripts.html`, so `is_html_stale` returns "file_missing" / "stale" on every run. + +**Practical implication** — both runs of the same source against two different `--output` dirs both produce correct output (the `not output_path.exists()` term in `process_projects_hierarchy`'s `needs_work` and the per-session-file existence checks force regeneration). But every `--output` switch always re-renders, even when the destination is already up-to-date. JSONL parsing is still cache-hit ("X sessions" instead of "X files updated"), only rendering re-runs. + +``` +Run 1 (--output /tmp/A): 4.4s (8 projects updated) +Run 2 (--output /tmp/B): 2.3s (cache-hit on JSONL parse, + but rendering re-ran) +Run 3 (--output /tmp/A): ~2.3s (same — A's existing files + are not consulted) +``` + +**Future optimisation** — make the html-cache row's freshness check destination-aware (e.g. record the absolute destination path when writing, compare against it on next run). Bounded value: only matters when users alternate between several `--output` destinations on the same source. Not worth the complexity until someone hits the slowdown in practice. + +### Other follow-ups (already noted in the implementation) + +- **Archived projects with `--output`** — index links point to projected paths whose files won't exist until the user re-renders. Two plausible mitigations: exclude archived projects from the index in `--output` mode, or always link to the original on-disk location regardless of `--output` / `--expand-paths`. (Surfaced by monk; left for follow-up.) +- **`_peek_jsonl_for_cwd` debug logging** — current shape is silent on tier-2→tier-3 fallthroughs; a `logger.debug(...)` would help when someone is debugging an unexpected naive-tier hit. Zero-noise default kept. + +--- + ## Out of scope (mention for completeness) - Obsidian-specific frontmatter (YAML at top of each `.md` for tags / From d41a6e45193f1f2bf372f570ec41f81a53bb392d Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 10 May 2026 20:23:24 +0200 Subject: [PATCH 05/11] Plan: record absolute-filter-no-expand footgun + ergonomic follow-ups (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User reported empirically: --filter-path /home/cboos/Workspace/github/daain (no --expand-paths) → 665 projects processed, all skipped, only index landed Same class of footgun as the relative-filter-with-expand case monk caught: filter resolves against the encoded flat name (-home-...), which an absolute path never matches. Recorded as a follow-up alongside three related ergonomics items the user surfaced: - --filter-path / --expand-paths should imply --all-projects (no reason for them not to; nothing else to filter). - --expand-paths for single-session / single-project mode (project one artefact into //). - --dry-run flag — show planned destinations without writing. Two fixes considered for the absolute-filter case: rejection at parse time (symmetric with monk's existing footgun guard) or auto-implying --expand-paths (friendlier; encoded-form filtering is the niche case). Plan leans toward auto-implying. No code changes in this commit — recording for review/dispatch. Co-Authored-By: Claude Opus 4.7 (1M context) --- work/obsidian-friendly-output.md | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/work/obsidian-friendly-output.md b/work/obsidian-friendly-output.md index 825c4aca..d93a5529 100644 --- a/work/obsidian-friendly-output.md +++ b/work/obsidian-friendly-output.md @@ -392,6 +392,45 @@ Run 3 (--output /tmp/A): ~2.3s (same — A's existing files - **Archived projects with `--output`** — index links point to projected paths whose files won't exist until the user re-renders. Two plausible mitigations: exclude archived projects from the index in `--output` mode, or always link to the original on-disk location regardless of `--output` / `--expand-paths`. (Surfaced by monk; left for follow-up.) - **`_peek_jsonl_for_cwd` debug logging** — current shape is silent on tier-2→tier-3 fallthroughs; a `logger.debug(...)` would help when someone is debugging an unexpected naive-tier hit. Zero-noise default kept. +### User-surfaced ergonomics gaps + +#### Absolute `--filter-path` without `--expand-paths` silently excludes everything + +Symmetric inverse of the footgun monk caught (relative `--filter-path` with `--expand-paths` excludes everything via `Path.relative_to`). Reproduced empirically: + +``` +$ uv run claude-code-log -o .examples/.../ccl --all-projects \ + --filter-path /home/cboos/Workspace/github/daain \ + --detail low --compact --format md +Processed 665 projects in 1.3s + Index regenerated +$ ls .examples/.../ccl +index.md # ← no per-project output +``` + +The Q2 resolution says: without `--expand-paths`, the filter matches against the encoded flat dir name (`-home-cboos-...`). An absolute path starting with `/` matches no encoded name, so all 665 projects filter out. No error, no warning — only the index lands. + +Two fixes to consider (same shape as the existing footgun guards): + +- **(A) Reject** at click parse time when `--filter-path.startswith("/")` and `--expand-paths` is unset. Symmetric with monk's relative-filter rejection. +- **(B) Auto-imply `--expand-paths`** when `--filter-path` is absolute. Friendlier; encoded-form filtering is the niche case. + +Lean toward (B). Either is straightforward. + +#### `--filter-path` and `--expand-paths` should imply `--all-projects` + +Without `--all-projects`, both flags are no-ops (currently warned-about, but no auto-elevation). What would a user reasonably expect from `--filter-path /home/joe` other than "filter the projects under `/home/joe`"? There's nothing else to filter. Auto-imply rather than warn-and-ignore. + +#### `--expand-paths` for single-session / single-project mode + +Today `--expand-paths` is wired only through `process_projects_hierarchy`. Reasonable extension: when a single-session or single-project export is requested with `--output ` and `--expand-paths`, project that one artefact into `//` using the same path-projection helper. Same convention, same matrix shape — just narrower scope. + +#### `--dry-run` mode + +Show what would be generated (projected destinations, filter selections) without actually rendering or writing. Useful for sanity-checking a flag combination — especially with the path-projection logic where the destination depends on cache state and JSONL peek results. Pairs naturally with `--filter-path` + `--expand-paths` exploration. + +Implementation sketch: a top-level CLI flag that, when set, prints the per-project decision (`source -> dest` or `: filter excluded`) and exits before any file I/O. Cheap to implement on top of `project_destination()` since the helper is already pure. + --- ## Out of scope (mention for completeness) From 0034beb9a40dac09d08ff76ac9abc64d28798f88 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 10 May 2026 20:36:50 +0200 Subject: [PATCH 06/11] Plan: clarify --filter-path/--expand-paths auto-imply asymmetry (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only --filter-path can safely imply --all-projects — there's nothing else to filter. --expand-paths can't, because it has independent meaning in single-session / single-project mode (project one artefact under //). Implying --all-projects from --expand-paths would silently switch the input scope, which is a much bigger surprise than --filter-path could ever be. Co-Authored-By: Claude Opus 4.7 (1M context) --- work/obsidian-friendly-output.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/work/obsidian-friendly-output.md b/work/obsidian-friendly-output.md index d93a5529..7f0b6e45 100644 --- a/work/obsidian-friendly-output.md +++ b/work/obsidian-friendly-output.md @@ -417,9 +417,11 @@ Two fixes to consider (same shape as the existing footgun guards): Lean toward (B). Either is straightforward. -#### `--filter-path` and `--expand-paths` should imply `--all-projects` +#### `--filter-path` should imply `--all-projects` -Without `--all-projects`, both flags are no-ops (currently warned-about, but no auto-elevation). What would a user reasonably expect from `--filter-path /home/joe` other than "filter the projects under `/home/joe`"? There's nothing else to filter. Auto-imply rather than warn-and-ignore. +Filtering only makes sense over a set of projects — without `--all-projects` there's nothing for `--filter-path` to filter. Currently it's warned-about-and-ignored; auto-imply is friendlier. + +**Asymmetry note** (worth recording): `--expand-paths` *cannot* safely imply `--all-projects` because the flag has independent meaning in single-session / single-project mode (next item — project one artefact under `//`). Implying `--all-projects` from `--expand-paths` would silently switch from "expand this one input" to "scan ~/.claude/projects/", which is a much bigger surprise than `--filter-path` could ever be. So the auto-imply is `--filter-path` only; `--expand-paths` keeps the current behaviour matrix. #### `--expand-paths` for single-session / single-project mode From ced19a8a12ceb349cdec0b2bbfa9c6a22e8881d4 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 10 May 2026 20:50:24 +0200 Subject: [PATCH 07/11] Address CR review + Windows test failure (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CR review on #155 had one MAJOR and several MINOR findings; plus the Windows CI run failed on the integration tests. Both rooted in the same place — path-shape assumptions — so addressing together. ## Windows failure / CR #3: cross-platform path handling `Path(s).is_absolute()` returns False on Windows for POSIX-form strings like `/home/joe/project/A` (no drive letter). So when a Linux-recorded JSONL cwd is processed on Windows, my tier-1 and tier-2 absoluteness guards in `project_dir_to_real_path` rejected it as "non-absolute" and fell through to the naive last-resort. The subsequent `joinpath('\\', 'home', 'joe', ...)` then anchored to the drive root, writing outside `output_dir` — which is what the failing `test_expand_paths_full_tree[windows-3.14]` caught. Fixed by adding two form-aware helpers in `utils.py`: - `_path_looks_absolute(s)` — accepts both POSIX (`/foo`) and Windows (`C:\foo`) forms regardless of host OS. Replaces the bare `Path(s).is_absolute()` calls in tier 1 and tier 2. - `_split_real_path_for_join(s)` — decomposes a real-path string into the parts to join under `output_dir`. POSIX → drop leading `/`. Windows → keep drive letter as a leading dirname segment (colon stripped), so `C:\foo\bar` lands at `/C/foo/bar`. `project_destination`'s filter-with-expand branch also gained form-aware dispatch: POSIX-form real paths use `PurePosixPath` for the `relative_to` check; Windows-form uses `PureWindowsPath`. Mixing forms returns None (user-error path). ## CR MAJOR #1: variant filename derivation `process_projects_hierarchy` was hard-coding `combined_transcripts.html` for `output_path` and the index `html_file` entry, but `convert_jsonl_to` writes `combined_transcripts{variant}.{ext}` (e.g. `combined_transcripts.low.compact.md`). With `--format md` or non-default --detail/--compact, the cache check always saw "stale" and the index linked to the wrong file. Now derives `combined_name` from the same `variant_suffix(detail, compact, format)` shape and threads it through `output_path`, `is_html_stale`, `is_page_stale` (with the variant arg), and the archived-project index entry. ## CR MINOR #2: filter word boundary `name.startswith(filter_path)` over-matched siblings — `--filter-path -home-joe` would also pass `-home-joet-...`. Tightened to `name == filter or name.startswith(filter + "-")`. ## CR MINOR #4: exit_code asserts in warning tests `test_warns_when_flags_used_without_all_projects` and `test_warns_when_expand_paths_with_file_output` only checked warning text. Added `assert result.exit_code == 0` so they verify the warning doesn't escalate to a failure. ## CR MINOR #5: status header `work/obsidian-friendly-output.md` still said "Status: Plan — not started" but the implementation + tests are in this PR. Updated to "Shipped (impl + tests in this PR; follow-ups recorded below)". Co-Authored-By: Claude Opus 4.7 (1M context) --- claude_code_log/converter.py | 35 +++++++++--- claude_code_log/utils.py | 94 +++++++++++++++++++++++++++----- test/test_obsidian_output.py | 6 +- work/obsidian-friendly-output.md | 2 +- 4 files changed, 113 insertions(+), 24 deletions(-) diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index c7bd41bf..75373a44 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -2551,7 +2551,18 @@ def process_projects_hierarchy( # (#151). Resolve once per project — using the cache when # populated, else a quick JSONL peek — so `_collect_project_sessions` # / cache rebuilds are skipped for filtered-out projects entirely. - from .utils import project_destination + from .utils import project_destination, variant_suffix as _variant_suffix + + # Combined-transcript filename. `convert_jsonl_to` writes + # `combined_transcripts{variant}.{ext}` (e.g. + # `combined_transcripts.low.compact.md`); the cache lookup keys, + # `output_path` existence check, and `html_file` index entries + # all need to use the same name. Hard-coding "combined_transcripts.html" + # would make non-default --format / --detail / --compact + # combinations cache-miss forever and link to the wrong file. + variant = _variant_suffix(detail, compact, output_format) + combined_ext = get_file_extension(output_format) + combined_name = f"combined_transcripts{variant}.{combined_ext}" # Index page lives at the root of whatever output destination we # use (either `--output` if set, or the legacy in-place projects @@ -2635,18 +2646,26 @@ def _rel_to_index(p: Path) -> Path: total_archived += archived_count # Output destination — `dest_dir` for #151's `--output` / # `--expand-paths` / `--filter-path`, falling back to the - # source project_dir for legacy in-place behaviour. - output_path = dest_dir / "combined_transcripts.html" + # source project_dir for legacy in-place behaviour. Filename + # uses the same {variant}.{ext} convention as + # `convert_jsonl_to`. + output_path = dest_dir / combined_name # Check combined_stale using the appropriate cache: # - Paginated projects store data in html_pages table (via save_page_cache) # - Non-paginated projects store data in html_cache table (via update_html_cache) if cache_manager is not None: - existing_page_count = cache_manager.get_page_count() + existing_page_count = cache_manager.get_page_count(variant) if existing_page_count > 0: - # Paginated project: check page 1 staleness - combined_stale = cache_manager.is_page_stale(1, page_size)[0] + # Paginated project: check page 1 staleness for the + # current --format/--detail/--compact variant. + combined_stale = cache_manager.is_page_stale(1, page_size, variant)[ + 0 + ] else: - # Non-paginated project: check html_cache + # Non-paginated project: check html_cache for the + # variant-specific filename (e.g. + # `combined_transcripts.low.compact.md`), not the + # default `combined_transcripts.html`. combined_stale = cache_manager.is_html_stale( output_path.name, None )[0] @@ -2975,7 +2994,7 @@ def _rel_to_index(p: Path) -> Path: { "name": archived_dir.name, "path": archived_dir, - "html_file": f"{archived_rel}/combined_transcripts.html", + "html_file": f"{archived_rel}/{combined_name}", "html_variants": _enumerate_project_variants( archived_dest, str(archived_rel) ), diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py index 136090a1..aedba930 100644 --- a/claude_code_log/utils.py +++ b/claude_code_log/utils.py @@ -155,6 +155,48 @@ def get_project_display_name( return display_name +def _path_looks_absolute(s: str) -> bool: + """True if ``s`` looks like an absolute path on either POSIX or + Windows. Decoupled from the host OS so JSONL-stored cwds don't + silently mismatch when a Linux-recorded transcript is processed + on Windows or vice versa (#151).""" + if not s: + return False + from pathlib import PurePosixPath, PureWindowsPath + + return PurePosixPath(s).is_absolute() or PureWindowsPath(s).is_absolute() + + +def _split_real_path_for_join(real_path_str: str) -> list[str]: + """Decompose a real-path string into the parts that should be + joined under ``output_dir`` for ``--expand-paths``. + + Form-aware: POSIX-shaped strings (``/foo/bar``) yield + ``['foo', 'bar']``; Windows-shaped strings (``C:\\foo\\bar``) + yield ``['C', 'foo', 'bar']`` (drive letter kept as a path + component, colon stripped). Relative inputs pass through as-is. + + Pure path-string inspection — no host-OS dependence; same JSONL + cwd produces the same destination tree on Linux, macOS, or + Windows. + """ + from pathlib import PurePosixPath, PureWindowsPath + + p_posix = PurePosixPath(real_path_str) + if p_posix.is_absolute(): + return list(p_posix.parts[1:]) # drop leading '/' + p_win = PureWindowsPath(real_path_str) + if p_win.is_absolute(): + # 'C:\foo\bar' → drive='C:', parts=('C:\\', 'foo', 'bar') + # Keep the drive as a leading dirname segment, strip the colon + # so it works as a real directory name on POSIX too. + drive = p_win.drive.rstrip(":") + rest = list(p_win.parts[1:]) + return [drive, *rest] if drive else rest + # Relative — POSIX-style component split. + return list(p_posix.parts) + + def project_dir_to_real_path( project_dir: Path, cached_working_directories: Optional[list[str]] = None, @@ -194,18 +236,20 @@ def project_dir_to_real_path( """ # Tier 1: cache. Only accept absolute paths — relative or oddly # shaped values fall through (e.g. test fixtures with synthetic - # `cwd` entries). + # `cwd` entries). Absoluteness check is form-aware (POSIX or + # Windows shapes), so a Linux-recorded cwd processed on Windows + # still resolves through this tier. if cached_working_directories: real_dirs = [ wd for wd in cached_working_directories - if not _is_temp_path(wd) and Path(wd).is_absolute() + if not _is_temp_path(wd) and _path_looks_absolute(wd) ] if real_dirs: return Path(real_dirs[0]) # Tier 2: peek the first JSONL for a `cwd` field. Same - # absoluteness guard as tier 1. + # form-aware absoluteness guard as tier 1. if project_dir.is_dir(): # Skip agent-* sidechain files; they may not carry the # top-level project cwd. Take any other JSONL. @@ -213,7 +257,7 @@ def project_dir_to_real_path( if jsonl_path.name.startswith("agent-"): continue cwd_from_peek = _peek_jsonl_for_cwd(jsonl_path) - if cwd_from_peek and Path(cwd_from_peek).is_absolute(): + if cwd_from_peek and _path_looks_absolute(cwd_from_peek): return Path(cwd_from_peek) # First non-agent JSONL exhausted with no usable cwd — # bail out rather than scanning every file. @@ -320,26 +364,48 @@ def project_destination( return project_dir # With --expand-paths: resolve the real path and (optionally) trim - # the filter prefix. + # the filter prefix. Form-aware throughout — POSIX and Windows + # path strings are handled symmetrically so a transcript recorded + # on one platform projects predictably on the other. if expand_paths: + from pathlib import PurePosixPath, PureWindowsPath + real_path = project_dir_to_real_path(project_dir, cached_working_directories) + real_str = str(real_path) if filter_path: - filter_root = Path(filter_path) + # Match using the same path-shape family as the real path + # (POSIX-form `/home/joe` filters POSIX-form real paths; + # Windows-form `C:\Users\joe` filters Windows-form real + # paths). Mixing forms is a user error and produces None. + if PurePosixPath(real_str).is_absolute(): + pp_cls = PurePosixPath + elif PureWindowsPath(real_str).is_absolute(): + pp_cls = PureWindowsPath + else: + pp_cls = PurePosixPath try: - rel = real_path.relative_to(filter_root) + rel = pp_cls(real_str).relative_to(pp_cls(filter_path)) except ValueError: # Real path is not under filter prefix — skip. return None - return output_dir / rel - # Real-path tree directly under output_dir. Drop the leading - # `/` so the joined path stays relative to output_dir. - rel_parts = real_path.parts[1:] if real_path.is_absolute() else real_path.parts - return output_dir.joinpath(*rel_parts) + return output_dir.joinpath(*rel.parts) + # Real-path tree directly under output_dir. Decompose the + # path string in a form-aware way: POSIX shapes drop the + # leading '/', Windows shapes keep the drive letter as a + # leading path component (so `C:\foo\bar` lands at + # `/C/foo/bar`). + rel_parts = _split_real_path_for_join(real_str) + return output_dir.joinpath(*rel_parts) if rel_parts else output_dir # No --expand-paths: filter against the flat dir name (per Q2), - # destination keeps the flat name. + # destination keeps the flat name. Require an exact match OR a + # `-`-terminated prefix so `--filter-path -home-joe` doesn't also + # accept sibling-prefix names like `-home-joe-bar` style + # (matches) but reject `-home-joet-...` (would over-match without + # the boundary). if filter_path: - if not project_dir.name.startswith(filter_path): + name = project_dir.name + if name != filter_path and not name.startswith(filter_path + "-"): return None return output_dir / project_dir.name diff --git a/test/test_obsidian_output.py b/test/test_obsidian_output.py index 88cfb379..1bfd23a1 100644 --- a/test/test_obsidian_output.py +++ b/test/test_obsidian_output.py @@ -293,7 +293,9 @@ def test_warns_when_flags_used_without_all_projects( ], ) # The exact stderr-output ordering is implementation-dependent, - # but the warning text must surface somewhere. + # but the warning text must surface somewhere — and the + # invocation must still succeed (warning, not error). + assert result.exit_code == 0, result.output assert "require --all-projects" in result.output def test_warns_when_expand_paths_with_file_output( @@ -318,4 +320,6 @@ def test_warns_when_expand_paths_with_file_output( "md", ], ) + # Warning, not error — single-file path still runs successfully. + assert result.exit_code == 0, result.output assert "require --output to be a directory" in result.output diff --git a/work/obsidian-friendly-output.md b/work/obsidian-friendly-output.md index 7f0b6e45..1d62ddde 100644 --- a/work/obsidian-friendly-output.md +++ b/work/obsidian-friendly-output.md @@ -1,6 +1,6 @@ # Obsidian-friendly output (issue #151) -## Status: Plan — not started +## Status: Shipped (impl + tests in this PR; follow-ups recorded below) ## Context From dcda2d105db7a585bdfb981be4d4424902abcb60 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 10 May 2026 22:17:53 +0200 Subject: [PATCH 08/11] Fix remaining Windows test failures (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two more path-handling bugs uncovered by Windows CI on `ced19a8`: 1. `str(WindowsPath("/home/joe"))` returns `"\\home\\joe"` on Windows (Path stringification uses native separators). `_split_real_path_for_join` then doesn't recognize the backslash form as POSIX-absolute and falls through to the relative branch; `output_dir.joinpath("\\home\\joe")` resets to drive root → destination escapes output_dir. Fixed by using `real_path.as_posix()` instead of `str(real_path)` in `project_destination`. `as_posix()` always returns forward slashes regardless of host OS, so our form-aware detection works. 2. The CLI guard `Path(filter_path).is_absolute()` is host-OS-bound too. `Path("/home/joe").is_absolute()` returns False on Windows (no drive letter), so the test_absolute_filter_path_with_expand_is_accepted test was getting BadParameter on Windows. Fixed by promoting `_path_looks_absolute` → `path_looks_absolute` (public) and reusing it in cli.py. Same form-aware logic as the utils.py internal callers. Co-Authored-By: Claude Opus 4.7 (1M context) --- claude_code_log/cli.py | 8 +++++++- claude_code_log/utils.py | 13 +++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 7d49674f..589bef0a 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -658,7 +658,13 @@ def main( # which raises ValueError for *any* mismatch including # "argument is relative" — so the silent failure mode is "every # project skipped". Reject up-front instead. - if filter_path and expand_paths and not Path(filter_path).is_absolute(): + # + # `path_looks_absolute` is host-OS-agnostic (accepts POSIX `/` + # OR Windows `C:\` form), so a Linux-recorded `/home/joe` + # processed on Windows still passes the guard. + from .utils import path_looks_absolute as _path_looks_absolute + + if filter_path and expand_paths and not _path_looks_absolute(filter_path): raise click.BadParameter( f"--filter-path must be an absolute path when --expand-paths is set; " f"got {filter_path!r}", diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py index aedba930..c1b07823 100644 --- a/claude_code_log/utils.py +++ b/claude_code_log/utils.py @@ -155,7 +155,7 @@ def get_project_display_name( return display_name -def _path_looks_absolute(s: str) -> bool: +def path_looks_absolute(s: str) -> bool: """True if ``s`` looks like an absolute path on either POSIX or Windows. Decoupled from the host OS so JSONL-stored cwds don't silently mismatch when a Linux-recorded transcript is processed @@ -243,7 +243,7 @@ def project_dir_to_real_path( real_dirs = [ wd for wd in cached_working_directories - if not _is_temp_path(wd) and _path_looks_absolute(wd) + if not _is_temp_path(wd) and path_looks_absolute(wd) ] if real_dirs: return Path(real_dirs[0]) @@ -257,7 +257,7 @@ def project_dir_to_real_path( if jsonl_path.name.startswith("agent-"): continue cwd_from_peek = _peek_jsonl_for_cwd(jsonl_path) - if cwd_from_peek and _path_looks_absolute(cwd_from_peek): + if cwd_from_peek and path_looks_absolute(cwd_from_peek): return Path(cwd_from_peek) # First non-agent JSONL exhausted with no usable cwd — # bail out rather than scanning every file. @@ -371,7 +371,12 @@ def project_destination( from pathlib import PurePosixPath, PureWindowsPath real_path = project_dir_to_real_path(project_dir, cached_working_directories) - real_str = str(real_path) + # `as_posix()` preserves the original form across platforms: + # POSIX-form paths stay `/home/...`, Windows-form paths stay + # `C:/Users/...`. The bare `str()` would convert `/home/joe` + # to `\home\joe` on Windows, which then mismatches our + # form-aware detection and joins to drive root. + real_str = real_path.as_posix() if filter_path: # Match using the same path-shape family as the real path # (POSIX-form `/home/joe` filters POSIX-form real paths; From aa92e80334f91bcf883a2b764adbffc10ca76464 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 10 May 2026 22:47:30 +0200 Subject: [PATCH 09/11] Plan: record three Obsidian-mode follow-ups (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-surfaced points for follow-up work, recorded in the "Follow-up / Open points" section: 1. `--combined yes/no/only` (or `both/none/only`) — suppress combined transcripts. Combined + per-session both is dead weight in Obsidian; the file tree itself is the navigation surface. When suppressed, the index links directly to `session-{id}.md` files. 2. Markdown index in `--expand-paths` mode should render the directory hierarchy as a nested bullet list: directories as parent bullets, sessions as nested children. Renders nicely in Obsidian preview AND plain Markdown viewers. 3. **CRITICAL**: Markdown renderer omits per-message timestamps — blocks cross-session narrative / episodic-memory reconstruction. HTML already has them; needs porting to `claude_code_log/markdown/renderer.py`. Format proposal in the doc with concrete before/after examples. Should land BEFORE Obsidian- friendly output sees serious narrative use; worth its own issue. Co-Authored-By: Claude Opus 4.7 (1M context) --- work/obsidian-friendly-output.md | 78 ++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/work/obsidian-friendly-output.md b/work/obsidian-friendly-output.md index 1d62ddde..95c5df54 100644 --- a/work/obsidian-friendly-output.md +++ b/work/obsidian-friendly-output.md @@ -433,6 +433,84 @@ Show what would be generated (projected destinations, filter selections) without Implementation sketch: a top-level CLI flag that, when set, prints the per-project decision (`source -> dest` or `: filter excluded`) and exits before any file I/O. Cheap to implement on top of `project_destination()` since the helper is already pure. +#### `--combined yes/no/only` (or `both/none/only`) — suppress combined transcripts + +For Obsidian usage, having *both* the combined `combined_transcripts.md` and the per-session `session-{id}.md` files is pointless duplication — Obsidian discovers sessions individually via the file tree, and the combined file is just dead weight that confuses graph view. The current default emits both. + +Proposed flag: `--combined yes|no|only` (or equivalent `both|none|only`): + +| Value | Combined | Per-session | Default for | +|---|---|---|---| +| `yes` / `both` | ✓ | ✓ | Current behaviour (HTML / non-Obsidian flow) | +| `no` / `none` | ✗ | ✓ | **Recommended default for `--expand-paths`** | +| `only` | ✓ | ✗ | When the user explicitly wants the rollup-only view | + +When combined is suppressed, the index page must link **directly to each `session-{id}.md`** rather than to `combined_transcripts*.md`. The `html_file` field in `project_summaries` would become a list of session links instead of one combined link. + +#### Markdown index: bullet-list directory hierarchy under `--expand-paths` + +In Markdown + `--expand-paths` mode, the natural index shape is a nested bullet list mirroring the directory tree: + +```markdown +- home/joe + - project/A + - [session-aabbccdd](home/joe/project/A/session-aabbccdd.md) — 2026-03-21 *14 messages* + - [session-eeff0011](home/joe/project/A/session-eeff0011.md) — 2026-03-22 *9 messages* + - project/B + - [session-22334455](home/joe/project/B/session-22334455.md) — 2026-03-23 *31 messages* +- home/jane + - project/C + - [session-66778899](home/jane/project/C/session-66778899.md) — 2026-03-20 *5 messages* +``` + +Each directory appears as a parent bullet with its sessions (or sub-dirs) as nested children. Walks the same path-projection tree the file system was projected into, but at the index level. Renders nicely in Obsidian's preview AND in plain Markdown viewers. Especially good when combined with the no-combined-transcripts mode (above), since each leaf bullet then directly points to the session file the user wants to open. + +#### **CRITICAL**: Markdown renderer must emit per-message timestamps + +This is "absolutely need" tier, not a nice-to-have — it's what enables a cross-session narrative / episodic-memory layer in Obsidian. Without per-message timestamps in the Markdown output, the user can't reconstruct *when* something happened, which kills the whole "transcript as Obsidian note" workflow. + +**Current Markdown output (with `--compact`):** + +```markdown +## 🤷 User: *Nice! Please commit and reply to bob that…* + +Nice! Please commit and reply to bob that you did it. + +### 🤖 Assistant: *Done! I've:* + +> Done! I've: +> +> 1. **Committed** the WebFetch tool renderer implementation (commit `da363b8`) … +> 2. **Replied** to bob (mail #250) … + +> No response requested. +``` + +**Required:** + +```markdown +## 🤷 User: *Nice! Please commit and reply to bob that…* +*2026-03-21 18:40:44* + +Nice! Please commit and reply to bob that you did it. + +### 🤖 Assistant: *Done! I've:* +*2026-03-21 18:44:22* + +> Done! I've: +> +> 1. **Committed** the WebFetch tool renderer implementation (commit `da363b8`) … +> 2. **Replied** to bob (mail #250) … + +> No response requested. +``` + +One italics line per message, immediately after the heading. Format: `*YYYY-MM-DD HH:MM:SS*` (matches the existing HTML timestamp rendering at the message level). + +The HTML renderer already emits timestamps; this is purely a Markdown-side omission to fix. Should be a small change in `claude_code_log/markdown/renderer.py` at the per-message header emission point. + +Considered out of scope for #151 (the path-projection PR), but should land **before** anyone seriously uses the Obsidian-friendly output for narrative work. Worth its own issue. + --- ## Out of scope (mention for completeness) From d5aee2116fd1f2efb3fd29c157076018dae243ee Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Thu, 14 May 2026 11:47:39 +0200 Subject: [PATCH 10/11] Obsidian follow-ups: --combined yes/no/only + bullet-tree Markdown index (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups from the user's review of PR #155 land here. Per-message timestamps stays its own issue (separate review surface area). ## --combined yes/no/only Controls which of {combined-transcript, per-session} files get written: - `yes` (default) — both, current behaviour - `no` — per-session only (Obsidian-friendly: combined is dead weight when the file tree itself is the navigation surface) - `only` — combined only (back-compat alias for --no-individual-sessions) Default switches to `no` when `--expand-paths` is set — that's the Obsidian-vault mode where per-session .md files are the unit of navigation. Explicit `--combined yes` overrides. `--no-individual-sessions` kept as alias for `--combined only`; conflicting combinations rejected at click parse time. Threaded through `convert_jsonl_to` (new `write_combined` parameter skips the combined write path entirely) and `process_projects_hierarchy` (needs_work check ignores combined-staleness in suppress-combined mode). When combined is suppressed, the Markdown index renders per-session bullet links under each project header (rather than the broken combined-link). Project's html_file is left as a path string for flat mode; per-session `file` entries are added to the sessions list on the project_summary dict (cached + fallback + archived paths). ## Bullet-tree Markdown index for --expand-paths Under `--expand-paths`, the natural index shape is a nested bullet-list mirroring the projected directory tree: ``` - **home/** - **joe/** - **project/** - **A/** - [session abc](home/joe/project/A/session-abc.md) — *2026-05-14 …* - [session def](home/joe/project/A/session-def.md) — *…* ``` New `_render_expand_paths_tree` helper builds a trie from the per-session (or per-project) link paths, walks it depth-first, emits bullets with 2-space indent per level. Triggered via `expand_paths_tree=True` kwarg to `generate_projects_index`, set from `process_projects_hierarchy` when `output_format in ("md", "markdown") and expand_paths`. Falls back to the flat per-project layout when: - format is HTML or JSON (those renderers don't accept the kwarg) - expand-paths is off - the tree builder produces no entries (defensive) ## Tests `test/test_obsidian_output.py` extended: - `TestCombinedFlag` (7 new tests): combined=yes/no/only with `process_projects_hierarchy` directly; the CLI default under --expand-paths; the bullet-tree index shape; the --no-individual-sessions alias; conflicting-flags rejection. - `test_absolute_filter_path_with_expand_is_accepted` updated to check for per-session output (the new --expand-paths default suppresses combined). `just ci` clean. 1846 collected; TUI 78p/7s; ruff + pyright + ty green. Co-Authored-By: Claude Opus 4.7 (1M context) --- claude_code_log/cli.py | 54 +++++++- claude_code_log/converter.py | 62 +++++++-- claude_code_log/markdown/renderer.py | 129 +++++++++++++++++- claude_code_log/renderer.py | 6 + test/test_obsidian_output.py | 196 ++++++++++++++++++++++++++- 5 files changed, 429 insertions(+), 18 deletions(-) diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 589bef0a..250ce08c 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -532,7 +532,25 @@ def _clear_output_files( @click.option( "--no-individual-sessions", is_flag=True, - help="Skip generating individual session HTML files (only create combined transcript)", + help=( + "Skip generating individual session files (combined transcript only). " + "Back-compat alias for --combined only." + ), +) +@click.option( + "--combined", + "combined", + type=click.Choice(["yes", "no", "only"], case_sensitive=False), + default=None, + help=( + "Control combined-vs-individual transcript generation: " + "'yes' = both combined and per-session files (default for --all-projects); " + "'no' = only per-session files (recommended for Obsidian / vault use — " + "combined is dead weight); " + "'only' = only the combined file (= --no-individual-sessions). " + "When unset, defaults to 'no' under --expand-paths (Obsidian mode), " + "else 'yes'." + ), ) @click.option( "--no-cache", @@ -622,6 +640,7 @@ def main( output: Optional[Path], expand_paths: bool, filter_path: Optional[str], + combined: Optional[str], open_browser: bool, from_date: Optional[str], to_date: Optional[str], @@ -651,6 +670,31 @@ def main( # Configure logging to show warnings and above logging.basicConfig(level=logging.WARNING, format="%(levelname)s: %(message)s") + # Resolve --combined default and back-compat with --no-individual-sessions. + # `--combined` semantics: + # yes → write combined transcript AND per-session files + # no → write per-session files only (Obsidian-friendly) + # only → write combined transcript only (= --no-individual-sessions) + # Default: yes, except when --expand-paths is set (Obsidian mode → no). + if combined is None: + combined = "no" if expand_paths else "yes" + else: + combined = combined.lower() + if no_individual_sessions: + if combined == "no": + raise click.BadParameter( + "--no-individual-sessions conflicts with --combined no " + "(both attempt to skip per-session files but --no-individual-sessions " + "implies combined-only). Pick one.", + param_hint="--no-individual-sessions", + ) + # `--no-individual-sessions` is a strict alias for `--combined only`; + # honour it for back-compat (and prefer this over an unset --combined). + combined = "only" + # Derived flags actually consumed downstream. + write_combined = combined in ("yes", "only") + write_individual = combined in ("yes", "no") + # Loud rejection of relative `--filter-path` when paired with # `--expand-paths` (#151). Without this, a user typing # `--filter-path home/joe` (forgetting the leading `/`) would @@ -911,7 +955,7 @@ def main( from_date, to_date, not no_cache, - not no_individual_sessions, + write_individual, output_format, image_export_mode, page_size=page_size, @@ -920,6 +964,7 @@ def main( output_dir=output_dir_for_projects, expand_paths=expand_paths, filter_path=filter_path, + write_combined=write_combined, ) # Count processed projects @@ -968,7 +1013,7 @@ def main( output, from_date, to_date, - not no_individual_sessions, + write_individual, not no_cache, image_export_mode=image_export_mode, page_size=page_size, @@ -977,12 +1022,13 @@ def main( # User's `-o` path is a one-off export, not a cached artifact: # don't occupy a cache slot keyed by an arbitrary destination. update_cache=output is None, + write_combined=write_combined, ) if input_path.is_file(): click.echo(f"Successfully converted {input_path} to {output_path}") else: jsonl_count = len(list(input_path.glob("*.jsonl"))) - if not no_individual_sessions: + if write_individual: ext = get_file_extension(output_format) session_files = list(input_path.glob(f"session-*.{ext}")) click.echo( diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 75373a44..7e479095 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -1526,6 +1526,7 @@ def convert_jsonl_to( compact: bool = False, update_cache: bool = True, output_root: Optional[Path] = None, + write_combined: bool = True, ) -> Path: """Convert JSONL transcript(s) to the specified format. @@ -1677,7 +1678,14 @@ def convert_jsonl_to( # Use pagination if total messages exceed page_size or there are existing pages use_pagination = total_message_count > page_size or existing_page_count > 1 - if use_pagination: + # `write_combined=False` (#151 follow-up: --combined no) skips + # combined-transcript generation entirely. Per-session files (if + # requested) are still produced by `_generate_individual_session_files` + # below. The function still returns `output_path` for the caller's + # index linking, but the file at that path is not (re-)written. + if not write_combined: + pass + elif use_pagination: # Use paginated HTML generation assert cache_manager is not None # Ensured by use_pagination condition # Use cached session data if available, otherwise build from messages @@ -2482,6 +2490,7 @@ def process_projects_hierarchy( output_dir: Optional[Path] = None, expand_paths: bool = False, filter_path: Optional[str] = None, + write_combined: bool = True, ) -> Path: """Process the entire ~/.claude/projects/ hierarchy and create linked output files. @@ -2672,13 +2681,20 @@ def _rel_to_index(p: Path) -> Path: else: combined_stale = True - # Determine if we need to do any work - needs_work = ( - bool(modified_files) - or bool(stale_sessions) - or combined_stale - or not output_path.exists() - ) + # Determine if we need to do any work. With + # `write_combined=False`, the combined-transcript file + # isn't produced — its staleness / on-disk presence is + # irrelevant; only modified sources / stale per-session + # files matter. + if write_combined: + needs_work = ( + bool(modified_files) + or bool(stale_sessions) + or combined_stale + or not output_path.exists() + ) + else: + needs_work = bool(modified_files) or bool(stale_sessions) # Build archived suffix for output (shown on both cached and work paths) archived_suffix = ( @@ -2719,6 +2735,7 @@ def _rel_to_index(p: Path) -> Path: detail=detail, compact=compact, output_root=(dest_dir if dest_dir != project_dir else None), + write_combined=write_combined, ) # Track timing @@ -2778,6 +2795,7 @@ def _rel_to_index(p: Path) -> Path: "earliest_timestamp": cached_project_data.earliest_timestamp, "working_directories": cache_manager.get_working_directories(), "is_archived": False, + "combined_suppressed": not write_combined, "sessions": [ { "id": session_data.session_id, @@ -2794,6 +2812,14 @@ def _rel_to_index(p: Path) -> Path: "message_count": session_data.message_count, "first_user_message": session_data.first_user_message or "[No user message found in session.]", + # Per-session link relative to the index + # root. Used by the index renderer when + # `combined_suppressed` is True so the + # index can link directly to the + # `session-{id}.{ext}` files. + "file": ( + f"{rel_dest}/session-{session_data.session_id}.{combined_ext}" + ), } for session_data in cached_project_data.sessions.values() # Filter out warmup-only and empty sessions (agent-only) @@ -2912,6 +2938,13 @@ def _rel_to_index(p: Path) -> Path: team_names_set: set[str] = set(team_name_per_session.values()) rel_dest = _rel_to_index(dest_dir) + # Post-decorate `sessions_data` with per-session file links + # (matches the cached path's shape so the index renderer + # can use `session.file` uniformly under + # `combined_suppressed`). + for _sd in sessions_data: + if "file" not in _sd: + _sd["file"] = f"{rel_dest}/session-{_sd['id']}.{combined_ext}" project_summaries.append( { "name": project_dir.name, @@ -2933,6 +2966,7 @@ def _rel_to_index(p: Path) -> Path: if cache_manager else [], "is_archived": False, + "combined_suppressed": not write_combined, "sessions": sessions_data, "team_names": sorted(team_names_set), } @@ -3009,6 +3043,7 @@ def _rel_to_index(p: Path) -> Path: "earliest_timestamp": cached_project_data.earliest_timestamp, "working_directories": cache_manager.get_working_directories(), "is_archived": True, + "combined_suppressed": not write_combined, "sessions": [ { "id": session_data.session_id, @@ -3022,6 +3057,9 @@ def _rel_to_index(p: Path) -> Path: "message_count": session_data.message_count, "first_user_message": session_data.first_user_message or "[No user message found in session.]", + "file": ( + f"{archived_rel}/session-{session_data.session_id}.{combined_ext}" + ), } for session_data in cached_project_data.sessions.values() if session_data.first_user_message @@ -3053,8 +3091,14 @@ def _rel_to_index(p: Path) -> Path: renderer = get_renderer(output_format, image_export_mode) index_regenerated = False if renderer.is_outdated(index_path) or from_date or to_date or any_cache_updated: + # Markdown index renders as a nested bullet-list directory + # tree under `--expand-paths` (the natural Obsidian-vault + # shape). HTML and JSON renderers ignore the kwarg. + index_kwargs: dict[str, Any] = {} + if output_format in ("md", "markdown") and expand_paths: + index_kwargs["expand_paths_tree"] = True index_content = renderer.generate_projects_index( - project_summaries, from_date, to_date + project_summaries, from_date, to_date, **index_kwargs ) assert index_content is not None # Ensure the index root exists when projecting into a fresh dir. diff --git a/claude_code_log/markdown/renderer.py b/claude_code_log/markdown/renderer.py index ea20082a..5e549f32 100644 --- a/claude_code_log/markdown/renderer.py +++ b/claude_code_log/markdown/renderer.py @@ -256,6 +256,78 @@ def _protect_html_tags(text: str) -> str: return str(rendered).rstrip("\n") +def _render_expand_paths_tree(template_projects: list[Any]) -> list[str]: + """Render `--expand-paths` Markdown index as a nested bullet-list + directory tree. + + Each unique path-component along the way becomes a bullet at its + nesting depth; sessions are leaf bullets under their parent dir. + The path is derived from `project.html_file` (the combined-link + relative path) — when ``combined_suppressed`` is set the per-session + `file` entries replace the project-level combined link. + + Returns a list of Markdown lines (joined by `\\n` by the caller), + or an empty list when no tree-shapable data is present (signal to + fall back to the flat layout). + """ + # Tree node: + # {"_links": list[(label, url, timestamp_range)], # at this level + # "": } + root: dict[str, Any] = {} + + def _insert(path_parts: list[str], label: str, url: str, ts: str) -> None: + """Walk `path_parts` down the tree; record the link at the leaf + directory. The leaf is the second-to-last path component (the + last is the filename itself).""" + node = root + # The last part of path_parts is the filename; walk up to its + # parent directory. + for part in path_parts[:-1]: + if part not in node or not isinstance(node[part], dict): + node[part] = {} + node = node[part] + node.setdefault("_links", []).append((label, url, ts)) + + for project in template_projects: + if project.combined_suppressed and project.sessions: + # Use the per-session links — each session becomes a leaf + # under its parent dir. + for session in project.sessions: + url = session.get("file") + if not url: + continue + summary = session.get("summary") + short_id = (session.get("id") or "")[:8] + label = summary if summary else f"session {short_id}" + ts = session.get("timestamp_range") or "" + _insert(url.split("/"), label, url, ts) + else: + # Combined-link mode: use the project's html_file as the + # single leaf under its parent dir. Translate `.html` + # filenames so the Markdown index links land at `.md` peers. + url = project.html_file.replace(".html", ".md") + ts = project.formatted_time_range or "" + _insert(url.split("/"), project.display_name, url, ts) + + if not root: + return [] + + lines: list[str] = [] + + def _emit(node: dict[str, Any], depth: int) -> None: + indent = " " * depth + # Directories first (alphabetical), then leaf-link entries. + for name in sorted(k for k in node if k != "_links"): + lines.append(f"{indent}- **{name}/**") + _emit(node[name], depth + 1) + for label, url, ts in node.get("_links", []): + ts_suffix = f" — *{ts}*" if ts else "" + lines.append(f"{indent}- [{label}]({url}){ts_suffix}") + + _emit(root, 0) + return lines + + class MarkdownRenderer(Renderer): """Markdown renderer for Claude Code transcripts.""" @@ -1822,8 +1894,20 @@ def generate_projects_index( project_summaries: list[dict[str, Any]], from_date: Optional[str] = None, to_date: Optional[str] = None, + expand_paths_tree: bool = False, ) -> str: - """Generate a Markdown projects index page.""" + """Generate a Markdown projects index page. + + Args: + project_summaries: Per-project summary dicts. + from_date / to_date: Date-filter labels for the title. + expand_paths_tree: When True (Obsidian mode — `--expand-paths`), + render the index as a nested bullet-list directory tree + derived from each project's `html_file` (or per-session + `file`) path. Each directory level becomes a bullet, + with sessions as nested leaf bullets. Falls back to the + flat per-project layout otherwise. + """ title = title_for_projects_index(project_summaries, from_date, to_date) template_projects, template_summary = prepare_projects_index(project_summaries) @@ -1838,11 +1922,28 @@ def generate_projects_index( ) parts.append("") + # `--expand-paths` Obsidian mode: render the index as a nested + # bullet-list mirroring the projected directory tree. Each path + # component becomes a bullet; per-session files are the leaves. + # The flat per-project listing remains the default for HTML and + # for non-expand-paths Markdown. + if expand_paths_tree: + tree_lines = _render_expand_paths_tree(template_projects) + if tree_lines: + parts.extend(tree_lines) + return "\n".join(parts) + # Project list for project in template_projects: - # Derive markdown link from html_file path - md_link = project.html_file.replace(".html", ".md") - parts.append(f"## [{project.display_name}]({md_link})") + if project.combined_suppressed: + # `--combined no` mode: header is a plain heading (no + # link to the non-existent combined file); per-session + # bullets link directly to `session-{id}.md` files. + parts.append(f"## {project.display_name}") + else: + # Derive markdown link from html_file path + md_link = project.html_file.replace(".html", ".md") + parts.append(f"## [{project.display_name}]({md_link})") # Use actual session count (filtered) like HTML does session_count = ( len(project.sessions) if project.sessions else project.jsonl_count @@ -1851,6 +1952,26 @@ def generate_projects_index( parts.append(f"- Messages: {project.message_count}") if project.formatted_time_range: parts.append(f"- Date range: {project.formatted_time_range}") + # Per-session bullet links (only when combined is + # suppressed — otherwise the combined-link header already + # serves as the project's single entry point). + if project.combined_suppressed and project.sessions: + parts.append("") + parts.append("### Sessions") + for session in project.sessions: + file_link = session.get("file") + if not file_link: + continue + # Derive label: prefer summary / title, fall back + # to the short session-id prefix. + summary = session.get("summary") + short_id = (session.get("id") or "")[:8] + label = summary if summary else f"session {short_id}" + timestamp_range = session.get("timestamp_range") or "" + timestamp_suffix = ( + f" — *{timestamp_range}*" if timestamp_range else "" + ) + parts.append(f"- [{label}]({file_link}){timestamp_suffix}") parts.append("") return "\n".join(parts) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 16ab6281..e90a9faa 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -479,6 +479,12 @@ def __init__(self, project_data: dict[str, Any]): self.earliest_timestamp = project_data.get("earliest_timestamp", "") self.sessions = project_data.get("sessions", []) self.working_directories = project_data.get("working_directories", []) + # `--combined no` (#151 follow-up): when set, the index should + # link directly to per-session files (`session["file"]`) rather + # than the (skipped) combined-transcript file. + self.combined_suppressed: bool = bool( + project_data.get("combined_suppressed", False) + ) # Teammates feature — distinct team names across this project's # sessions. Computed in get_all_cached_projects from each # SessionCacheData.team_name. diff --git a/test/test_obsidian_output.py b/test/test_obsidian_output.py index 1bfd23a1..33fc079e 100644 --- a/test/test_obsidian_output.py +++ b/test/test_obsidian_output.py @@ -266,7 +266,11 @@ def test_absolute_filter_path_with_expand_is_accepted( ], ) assert result.exit_code == 0, result.output - assert (out / "project/A/combined_transcripts.md").exists() + # `--expand-paths` defaults `--combined` to `no` (Obsidian + # mode), so the combined file is suppressed; check for the + # per-session output instead. + sessions = list((out / "project/A").glob("session-*.md")) + assert sessions, "expected per-session output under the expanded tree" def test_warns_when_flags_used_without_all_projects( self, fake_projects: Path, isolated_cache: Path, tmp_path: Path @@ -323,3 +327,193 @@ def test_warns_when_expand_paths_with_file_output( # Warning, not error — single-file path still runs successfully. assert result.exit_code == 0, result.output assert "require --output to be a directory" in result.output + + +# ----------------------------------------------------------------------------- +# --combined yes/no/only flag (#151 follow-up) +# ----------------------------------------------------------------------------- + + +class TestCombinedFlag: + """The `--combined` flag controls whether the combined-transcript + and per-session files are emitted. Default is `yes` except when + `--expand-paths` is set, in which case it switches to `no` + (Obsidian-vault-friendly default — combined is dead weight when + each session has its own .md file).""" + + def test_combined_yes_emits_both( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + out = tmp_path / "out-both" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + write_combined=True, + generate_individual_sessions=True, + ) + assert (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + # Per-session file too. Filename is session-{session_id}.md. + sessions = list((out / "-home-joe-project-A").glob("session-*.md")) + assert sessions, "expected at least one per-session file" + + def test_combined_no_skips_combined( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + out = tmp_path / "out-none" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + write_combined=False, + generate_individual_sessions=True, + ) + # Combined file MUST NOT exist. + assert not (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + # Per-session files SHOULD exist. + sessions = list((out / "-home-joe-project-A").glob("session-*.md")) + assert sessions + + def test_combined_only_skips_per_session( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + out = tmp_path / "out-only" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + write_combined=True, + generate_individual_sessions=False, + ) + assert (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + # Per-session files SHOULD NOT exist. + sessions = list((out / "-home-joe-project-A").glob("session-*.md")) + assert not sessions, "per-session files leaked through --combined only" + + def test_cli_expand_paths_default_is_combined_no( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """The default for `--combined` when `--expand-paths` is set + should be `no` — Obsidian users want per-session files only.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out-default" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--expand-paths", + "--format", + "md", + ], + ) + assert result.exit_code == 0, result.output + # Combined files should NOT have been emitted under the + # expanded tree. + combined_files = list(out.rglob("combined_transcripts*.md")) + assert not combined_files, ( + f"--combined no should be the default with --expand-paths, " + f"but {len(combined_files)} combined files were written" + ) + # Per-session files SHOULD be present. + session_files = list(out.rglob("session-*.md")) + assert session_files + + def test_cli_expand_paths_yields_bullet_tree_index( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """Markdown index under `--expand-paths` renders as a nested + bullet-list directory tree (each path component a bullet, + sessions as leaf bullets).""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out-tree" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--expand-paths", + "--format", + "md", + ], + ) + assert result.exit_code == 0, result.output + index_md = (out / "index.md").read_text(encoding="utf-8") + # Directory bullets — bold, trailing slash. + assert "- **home/**" in index_md + assert "- **joe/**" in index_md + # Leaf session links (markdown link syntax pointing into the + # expanded tree). + assert "(home/joe/project/A/session-" in index_md + # The traditional flat `## [project](combined.md)` heading + # shape must NOT appear in tree mode. + assert "## [home/joe/project/A]" not in index_md + + def test_cli_combined_only_alias_with_no_individual_sessions( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """`--no-individual-sessions` is the back-compat alias for + `--combined only`.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out-noindividual" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--no-individual-sessions", + "--format", + "md", + ], + ) + assert result.exit_code == 0, result.output + # Combined files present, per-session files absent. + assert list(out.rglob("combined_transcripts*.md")) + assert not list(out.rglob("session-*.md")) + + def test_cli_conflicting_combined_no_and_no_individual_sessions_rejected( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """`--combined no` + `--no-individual-sessions` is a conflict + (both attempt to skip per-session files, but --no-individual-sessions + implies combined-only). Should be rejected.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out-conflict" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--no-individual-sessions", + "--combined", + "no", + "--format", + "md", + ], + ) + assert result.exit_code != 0 + assert "conflicts" in result.output.lower() or "no-individual" in result.output From 1d013d0cbda8d6aa331d223a62c09150d56d3ee9 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Thu, 14 May 2026 18:26:34 +0200 Subject: [PATCH 11/11] Fix Windows path separators in bullet-tree index (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_rel_to_index` returned a `Path`, and `str(Path("home/joe"))` on Windows yields `home\joe`. That backslash leaked into per-session `file` URLs and project `html_file` strings, then into the Markdown bullet-tree builder which splits on `/` only — producing a single flat leaf line per project instead of the nested directory tree. Fix at the source by returning the posix-form string from `_rel_to_index`, and add a defensive `\\` → `/` fold in the tree builder. New `test_bullet_tree_normalises_backslash_separators` exercises the builder directly with both separator styles so the regression catches without a Windows runner. Co-Authored-By: Claude Opus 4.7 --- claude_code_log/converter.py | 21 ++++++++----- claude_code_log/markdown/renderer.py | 10 +++++- test/test_obsidian_output.py | 47 ++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 9 deletions(-) diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 7e479095..87543372 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -2578,19 +2578,24 @@ def process_projects_hierarchy( # tree). Per-project `html_file` entries are relative to this root. index_root = output_dir if output_dir is not None else projects_path - def _rel_to_index(p: Path) -> Path: - """Path of `p` relative to the index root. + def _rel_to_index(p: Path) -> str: + """Posix-form path of `p` relative to the index root. - Unreachable under the documented flag matrix: every + Returned as a forward-slash string so downstream f-strings + (`f"{rel_dest}/..."`) embed cleanly in Markdown links and + HTML hrefs on Windows too — `str(Path("home/joe"))` is + `home\\joe` there, which broke the Markdown bullet-tree + index that splits on `/`. + + The `relative_to` fallback is a paranoia rail: every ``project_destination`` shape produces a ``dest_dir`` that lives under ``index_root`` (legacy → ``projects_path``; - ``--output`` modes → ``output_dir``). Kept as a paranoia rail - for future code paths that might inject an unexpected - absolute ``dest_dir`` (e.g. via a test seam).""" + ``--output`` modes → ``output_dir``).""" try: - return p.relative_to(index_root) + rel = p.relative_to(index_root) except ValueError: - return p + rel = p + return rel.as_posix() for project_dir in sorted(project_dirs): project_start_time = time.time() diff --git a/claude_code_log/markdown/renderer.py b/claude_code_log/markdown/renderer.py index 5e549f32..c7448dc7 100644 --- a/claude_code_log/markdown/renderer.py +++ b/claude_code_log/markdown/renderer.py @@ -288,6 +288,13 @@ def _insert(path_parts: list[str], label: str, url: str, ts: str) -> None: node = node[part] node.setdefault("_links", []).append((label, url, ts)) + def _to_posix(s: str) -> str: + """Defensive normalisation: split on `/` only after folding + any backslashes the upstream may have produced (Windows + `Path` stringification, historical regressions, callers that + construct URLs from native paths).""" + return s.replace("\\", "/") + for project in template_projects: if project.combined_suppressed and project.sessions: # Use the per-session links — each session becomes a leaf @@ -296,6 +303,7 @@ def _insert(path_parts: list[str], label: str, url: str, ts: str) -> None: url = session.get("file") if not url: continue + url = _to_posix(url) summary = session.get("summary") short_id = (session.get("id") or "")[:8] label = summary if summary else f"session {short_id}" @@ -305,7 +313,7 @@ def _insert(path_parts: list[str], label: str, url: str, ts: str) -> None: # Combined-link mode: use the project's html_file as the # single leaf under its parent dir. Translate `.html` # filenames so the Markdown index links land at `.md` peers. - url = project.html_file.replace(".html", ".md") + url = _to_posix(project.html_file).replace(".html", ".md") ts = project.formatted_time_range or "" _insert(url.split("/"), project.display_name, url, ts) diff --git a/test/test_obsidian_output.py b/test/test_obsidian_output.py index 33fc079e..10ce389c 100644 --- a/test/test_obsidian_output.py +++ b/test/test_obsidian_output.py @@ -461,6 +461,53 @@ def test_cli_expand_paths_yields_bullet_tree_index( # shape must NOT appear in tree mode. assert "## [home/joe/project/A]" not in index_md + def test_bullet_tree_normalises_backslash_separators(self): + """Regression: on Windows, `str(Path("home/joe"))` is + `home\\joe`, so any leaked native-separator URL would land + in the bullet-tree as a single un-split leaf line. The + builder must fold backslashes to `/` before splitting.""" + from types import SimpleNamespace + + from claude_code_log.markdown.renderer import _render_expand_paths_tree + + project_backslash = SimpleNamespace( + combined_suppressed=True, + html_file="ignored.html", + display_name="proj", + formatted_time_range="2026-05-10 10:00:00", + sessions=[ + { + "id": "abcdef1234", + "summary": "S1", + "timestamp_range": "2026-05-10 10:00:00", + "file": r"home\joe\project\B\session-abcdef1234.md", + } + ], + ) + project_forward = SimpleNamespace( + combined_suppressed=True, + html_file="ignored.html", + display_name="proj", + formatted_time_range="2026-05-10 10:00:00", + sessions=[ + { + "id": "deadbeef99", + "summary": "S2", + "timestamp_range": "2026-05-10 11:00:00", + "file": "home/joe/project/A/session-deadbeef99.md", + } + ], + ) + lines = _render_expand_paths_tree([project_backslash, project_forward]) + joined = "\n".join(lines) + assert "- **home/**" in joined + assert "- **joe/**" in joined + assert "- **project/**" in joined + # Both leaf links must be present, with forward slashes only. + assert "(home/joe/project/B/session-abcdef1234.md)" in joined + assert "(home/joe/project/A/session-deadbeef99.md)" in joined + assert "\\" not in joined + def test_cli_combined_only_alias_with_no_individual_sessions( self, fake_projects: Path, isolated_cache: Path, tmp_path: Path ):