diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 8f7005e3..250ce08c 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -474,7 +474,40 @@ def _clear_output_files( "-o", "--output", type=click.Path(path_type=Path), - help="Output file path (default: input file with format extension, or combined_transcripts.{html,md} for directories)", + help=( + "Output destination. With a recognised file suffix " + "(.html/.md/.markdown/.json) treated as a single output file; " + "otherwise treated as a directory root (and now also honoured " + "for --all-projects, where outputs land at " + "//...). Pair with --expand-paths to project " + "back to the real on-disk tree." + ), +) +@click.option( + "--expand-paths", + is_flag=True, + help=( + "When set with --output and --all-projects, expand each " + "project's flat encoded dir name (e.g. '-home-joe-project-A') " + "back to its real path under /. Resolves the encoded " + "name via the cache's recorded `cwd`, falling back to a peek " + "of the first JSONL when the cache is empty. Useful for " + "projecting transcripts into Obsidian-style Markdown vaults." + ), +) +@click.option( + "--filter-path", + type=str, + default=None, + help=( + "Restrict --all-projects to projects matching a path prefix. " + "With --expand-paths, the prefix is matched against the " + "expanded real path AND truncated from the destination " + "(`/home/joe/project/A` with --filter-path /home/joe lands at " + "/project/A/). Without --expand-paths, matches the " + "flat encoded dir name (e.g. '-home-joe' selects projects " + "starting with '-home-joe-')." + ), ) @click.option( "--open-browser", @@ -499,7 +532,25 @@ def _clear_output_files( @click.option( "--no-individual-sessions", is_flag=True, - help="Skip generating individual session HTML files (only create combined transcript)", + help=( + "Skip generating individual session files (combined transcript only). " + "Back-compat alias for --combined only." + ), +) +@click.option( + "--combined", + "combined", + type=click.Choice(["yes", "no", "only"], case_sensitive=False), + default=None, + help=( + "Control combined-vs-individual transcript generation: " + "'yes' = both combined and per-session files (default for --all-projects); " + "'no' = only per-session files (recommended for Obsidian / vault use — " + "combined is dead weight); " + "'only' = only the combined file (= --no-individual-sessions). " + "When unset, defaults to 'no' under --expand-paths (Obsidian mode), " + "else 'yes'." + ), ) @click.option( "--no-cache", @@ -587,6 +638,9 @@ def _clear_output_files( def main( input_path: Optional[Path], output: Optional[Path], + expand_paths: bool, + filter_path: Optional[str], + combined: Optional[str], open_browser: bool, from_date: Optional[str], to_date: Optional[str], @@ -616,6 +670,79 @@ def main( # Configure logging to show warnings and above logging.basicConfig(level=logging.WARNING, format="%(levelname)s: %(message)s") + # Resolve --combined default and back-compat with --no-individual-sessions. + # `--combined` semantics: + # yes → write combined transcript AND per-session files + # no → write per-session files only (Obsidian-friendly) + # only → write combined transcript only (= --no-individual-sessions) + # Default: yes, except when --expand-paths is set (Obsidian mode → no). + if combined is None: + combined = "no" if expand_paths else "yes" + else: + combined = combined.lower() + if no_individual_sessions: + if combined == "no": + raise click.BadParameter( + "--no-individual-sessions conflicts with --combined no " + "(both attempt to skip per-session files but --no-individual-sessions " + "implies combined-only). Pick one.", + param_hint="--no-individual-sessions", + ) + # `--no-individual-sessions` is a strict alias for `--combined only`; + # honour it for back-compat (and prefer this over an unset --combined). + combined = "only" + # Derived flags actually consumed downstream. + write_combined = combined in ("yes", "only") + write_individual = combined in ("yes", "no") + + # Loud rejection of relative `--filter-path` when paired with + # `--expand-paths` (#151). Without this, a user typing + # `--filter-path home/joe` (forgetting the leading `/`) would + # match against an absolute resolved path via `Path.relative_to`, + # which raises ValueError for *any* mismatch including + # "argument is relative" — so the silent failure mode is "every + # project skipped". Reject up-front instead. + # + # `path_looks_absolute` is host-OS-agnostic (accepts POSIX `/` + # OR Windows `C:\` form), so a Linux-recorded `/home/joe` + # processed on Windows still passes the guard. + from .utils import path_looks_absolute as _path_looks_absolute + + if filter_path and expand_paths and not _path_looks_absolute(filter_path): + raise click.BadParameter( + f"--filter-path must be an absolute path when --expand-paths is set; " + f"got {filter_path!r}", + param_hint="--filter-path", + ) + + # Warn early if Obsidian-friendly flags (#151) were passed in a + # context where they're no-ops. `--all-projects` (explicit or + # implicit via no input_path) is the only mode that consumes them; + # `--output` must be a directory (file-suffixed output goes + # through the single-file path which doesn't honour these flags). + from .utils import output_path_is_file as _output_path_is_file + + will_run_all_projects = all_projects or input_path is None + if (expand_paths or filter_path) and tui: + click.echo( + "Warning: --expand-paths / --filter-path are ignored in --tui mode.", + err=True, + ) + elif (expand_paths or filter_path) and not will_run_all_projects: + click.echo( + "Warning: --expand-paths / --filter-path require --all-projects " + "(or omitting INPUT_PATH); ignoring.", + err=True, + ) + elif (expand_paths or filter_path) and ( + output is None or _output_path_is_file(output) + ): + click.echo( + "Warning: --expand-paths / --filter-path require --output to be a " + "directory (no recognised file suffix); ignoring.", + err=True, + ) + from .models import DetailLevel detail_level = DetailLevel(detail.lower()) @@ -813,17 +940,31 @@ def main( raise FileNotFoundError(f"Projects directory not found: {input_path}") click.echo(f"Processing all projects in {input_path}...") + # `--output` for `--all-projects` (#151): pass a *directory* + # to project per-project outputs into. File-suffixed values + # are routed to the single-file path elsewhere; here we + # only honour directory-shaped `--output`. + from .utils import output_path_is_file + + output_dir_for_projects: Optional[Path] = None + if output is not None and not output_path_is_file(output): + output_dir_for_projects = output + output_path = process_projects_hierarchy( input_path, from_date, to_date, not no_cache, - not no_individual_sessions, + write_individual, output_format, image_export_mode, page_size=page_size, detail=detail_level, compact=compact, + output_dir=output_dir_for_projects, + expand_paths=expand_paths, + filter_path=filter_path, + write_combined=write_combined, ) # Count processed projects @@ -872,7 +1013,7 @@ def main( output, from_date, to_date, - not no_individual_sessions, + write_individual, not no_cache, image_export_mode=image_export_mode, page_size=page_size, @@ -881,12 +1022,13 @@ def main( # User's `-o` path is a one-off export, not a cached artifact: # don't occupy a cache slot keyed by an arbitrary destination. update_cache=output is None, + write_combined=write_combined, ) if input_path.is_file(): click.echo(f"Successfully converted {input_path} to {output_path}") else: jsonl_count = len(list(input_path.glob("*.jsonl"))) - if not no_individual_sessions: + if write_individual: ext = get_file_extension(output_format) session_files = list(input_path.glob(f"session-*.{ext}")) click.echo( diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index cd111376..87543372 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -1525,6 +1525,8 @@ def convert_jsonl_to( detail: DetailLevel = DetailLevel.FULL, compact: bool = False, update_cache: bool = True, + output_root: Optional[Path] = None, + write_combined: bool = True, ) -> Path: """Convert JSONL transcript(s) to the specified format. @@ -1567,6 +1569,11 @@ def convert_jsonl_to( suffix = _variant_suffix(detail, compact, format) + # Output destination decoupled from `input_path` (#151). Both + # branches below assign to `effective_output_dir`; declare it + # upfront so pyright sees it as defined unconditionally. + effective_output_dir: Path = output_root if output_root is not None else input_path + if input_path.is_file(): # Single file mode - cache only available for directory mode if output_path is None: @@ -1582,8 +1589,16 @@ def convert_jsonl_to( cache_was_updated = False # No cache in single file mode else: # Directory mode - Cache-First Approach + # `output_root` (#151) decouples the output destination from + # the source `input_path` so we can write under e.g. + # ~/Documents/Obsidian// while still reading + # from ~/.claude/projects//. (`effective_output_dir` + # is declared above the if/else; this branch only ensures the + # destination dir exists and supplies the default output_path.) + if output_root is not None: + effective_output_dir.mkdir(parents=True, exist_ok=True) if output_path is None: - output_path = input_path / f"combined_transcripts{suffix}.{ext}" + output_path = effective_output_dir / f"combined_transcripts{suffix}.{ext}" # Phase 1: Ensure cache is fresh and populated cache_was_updated = ensure_fresh_cache( @@ -1663,7 +1678,14 @@ def convert_jsonl_to( # Use pagination if total messages exceed page_size or there are existing pages use_pagination = total_message_count > page_size or existing_page_count > 1 - if use_pagination: + # `write_combined=False` (#151 follow-up: --combined no) skips + # combined-transcript generation entirely. Per-session files (if + # requested) are still produced by `_generate_individual_session_files` + # below. The function still returns `output_path` for the caller's + # index linking, but the file at that path is not (re-)written. + if not write_combined: + pass + elif use_pagination: # Use paginated HTML generation assert cache_manager is not None # Ensured by use_pagination condition # Use cached session data if available, otherwise build from messages @@ -1687,7 +1709,7 @@ def convert_jsonl_to( session_data = _build_session_data_from_messages(messages) output_path = _generate_paginated_html( messages, - input_path, + effective_output_dir, title, page_size, cache_manager, @@ -1749,7 +1771,7 @@ def convert_jsonl_to( _generate_individual_session_files( format, messages, - input_path, + effective_output_dir, from_date, to_date, cache_manager, @@ -2465,6 +2487,10 @@ def process_projects_hierarchy( page_size: int = 2000, detail: DetailLevel = DetailLevel.FULL, compact: bool = False, + output_dir: Optional[Path] = None, + expand_paths: bool = False, + filter_path: Optional[str] = None, + write_combined: bool = True, ) -> Path: """Process the entire ~/.claude/projects/ hierarchy and create linked output files. @@ -2478,6 +2504,14 @@ def process_projects_hierarchy( image_export_mode: Image export mode for markdown silent: If True, suppress verbose per-file logging (show summary only) page_size: Maximum messages per page for combined transcript pagination + output_dir: Optional destination root for projected outputs (#151). + When None, outputs land under each source ``project_dir`` as + before (legacy in-place behaviour). + expand_paths: When True (and ``output_dir`` is set), expand each + project's flat encoded dir name to its real on-disk path + under ``output_dir``. + filter_path: When set, restrict to projects matching the prefix. + See ``utils.project_destination`` for the matching semantics. """ import time @@ -2522,6 +2556,47 @@ def process_projects_hierarchy( # Per-project stats for summary output project_stats: List[tuple[str, GenerationStats]] = [] + # `--filter-path` selection happens at the top of the loop + # (#151). Resolve once per project — using the cache when + # populated, else a quick JSONL peek — so `_collect_project_sessions` + # / cache rebuilds are skipped for filtered-out projects entirely. + from .utils import project_destination, variant_suffix as _variant_suffix + + # Combined-transcript filename. `convert_jsonl_to` writes + # `combined_transcripts{variant}.{ext}` (e.g. + # `combined_transcripts.low.compact.md`); the cache lookup keys, + # `output_path` existence check, and `html_file` index entries + # all need to use the same name. Hard-coding "combined_transcripts.html" + # would make non-default --format / --detail / --compact + # combinations cache-miss forever and link to the wrong file. + variant = _variant_suffix(detail, compact, output_format) + combined_ext = get_file_extension(output_format) + combined_name = f"combined_transcripts{variant}.{combined_ext}" + + # Index page lives at the root of whatever output destination we + # use (either `--output` if set, or the legacy in-place projects + # tree). Per-project `html_file` entries are relative to this root. + index_root = output_dir if output_dir is not None else projects_path + + def _rel_to_index(p: Path) -> str: + """Posix-form path of `p` relative to the index root. + + Returned as a forward-slash string so downstream f-strings + (`f"{rel_dest}/..."`) embed cleanly in Markdown links and + HTML hrefs on Windows too — `str(Path("home/joe"))` is + `home\\joe` there, which broke the Markdown bullet-tree + index that splits on `/`. + + The `relative_to` fallback is a paranoia rail: every + ``project_destination`` shape produces a ``dest_dir`` that + lives under ``index_root`` (legacy → ``projects_path``; + ``--output`` modes → ``output_dir``).""" + try: + rel = p.relative_to(index_root) + except ValueError: + rel = p + return rel.as_posix() + for project_dir in sorted(project_dirs): project_start_time = time.time() stats = GenerationStats() @@ -2535,6 +2610,29 @@ def process_projects_hierarchy( except Exception as e: stats.add_warning(f"Failed to initialize cache: {e}") + # Per-project destination (#151). When `output_dir` / + # `expand_paths` / `filter_path` are unset this returns + # `project_dir` (legacy in-place behaviour). When the + # filter excludes this project, returns None. + cached_working_dirs: Optional[list[str]] = None + if cache_manager is not None: + try: + cached_working_dirs = cache_manager.get_working_directories() + except Exception: + cached_working_dirs = None + dest_dir = project_destination( + project_dir, + output_dir=output_dir, + expand_paths=expand_paths, + filter_path=filter_path, + cached_working_directories=cached_working_dirs, + ) + if dest_dir is None: + # Filter-out: don't process this project at all. + if not silent: + print(f" {project_dir.name}: skipped (filter)") + continue + # Phase 1: Fast check if anything needs updating (mtime comparison only) # Exclude agent files - they are loaded via session references, not directly jsonl_files = [ @@ -2560,30 +2658,48 @@ def process_projects_hierarchy( else 0 ) total_archived += archived_count - output_path = project_dir / "combined_transcripts.html" + # Output destination — `dest_dir` for #151's `--output` / + # `--expand-paths` / `--filter-path`, falling back to the + # source project_dir for legacy in-place behaviour. Filename + # uses the same {variant}.{ext} convention as + # `convert_jsonl_to`. + output_path = dest_dir / combined_name # Check combined_stale using the appropriate cache: # - Paginated projects store data in html_pages table (via save_page_cache) # - Non-paginated projects store data in html_cache table (via update_html_cache) if cache_manager is not None: - existing_page_count = cache_manager.get_page_count() + existing_page_count = cache_manager.get_page_count(variant) if existing_page_count > 0: - # Paginated project: check page 1 staleness - combined_stale = cache_manager.is_page_stale(1, page_size)[0] + # Paginated project: check page 1 staleness for the + # current --format/--detail/--compact variant. + combined_stale = cache_manager.is_page_stale(1, page_size, variant)[ + 0 + ] else: - # Non-paginated project: check html_cache + # Non-paginated project: check html_cache for the + # variant-specific filename (e.g. + # `combined_transcripts.low.compact.md`), not the + # default `combined_transcripts.html`. combined_stale = cache_manager.is_html_stale( output_path.name, None )[0] else: combined_stale = True - # Determine if we need to do any work - needs_work = ( - bool(modified_files) - or bool(stale_sessions) - or combined_stale - or not output_path.exists() - ) + # Determine if we need to do any work. With + # `write_combined=False`, the combined-transcript file + # isn't produced — its staleness / on-disk presence is + # irrelevant; only modified sources / stale per-session + # files matter. + if write_combined: + needs_work = ( + bool(modified_files) + or bool(stale_sessions) + or combined_stale + or not output_path.exists() + ) + else: + needs_work = bool(modified_files) or bool(stale_sessions) # Build archived suffix for output (shown on both cached and work paths) archived_suffix = ( @@ -2623,6 +2739,8 @@ def process_projects_hierarchy( page_size=page_size, detail=detail, compact=compact, + output_root=(dest_dir if dest_dir != project_dir else None), + write_combined=write_combined, ) # Track timing @@ -2658,14 +2776,18 @@ def process_projects_hierarchy( if cached_project_data is not None: # Track total sessions for stats stats.sessions_total = len(cached_project_data.sessions) + # Path the index uses to link to this project's + # combined transcript (and to enumerate variants). + # Same as `project_dir.name` in legacy mode. + rel_dest = _rel_to_index(dest_dir) # Use cached aggregation data project_summaries.append( { "name": project_dir.name, "path": project_dir, - "html_file": f"{project_dir.name}/{output_path.name}", + "html_file": f"{rel_dest}/{output_path.name}", "html_variants": _enumerate_project_variants( - project_dir, project_dir.name + dest_dir, str(rel_dest) ), "jsonl_count": jsonl_count, "message_count": cached_project_data.total_message_count, @@ -2678,6 +2800,7 @@ def process_projects_hierarchy( "earliest_timestamp": cached_project_data.earliest_timestamp, "working_directories": cache_manager.get_working_directories(), "is_archived": False, + "combined_suppressed": not write_combined, "sessions": [ { "id": session_data.session_id, @@ -2694,6 +2817,14 @@ def process_projects_hierarchy( "message_count": session_data.message_count, "first_user_message": session_data.first_user_message or "[No user message found in session.]", + # Per-session link relative to the index + # root. Used by the index renderer when + # `combined_suppressed` is True so the + # index can link directly to the + # `session-{id}.{ext}` files. + "file": ( + f"{rel_dest}/session-{session_data.session_id}.{combined_ext}" + ), } for session_data in cached_project_data.sessions.values() # Filter out warmup-only and empty sessions (agent-only) @@ -2811,13 +2942,21 @@ def process_projects_hierarchy( team_name_per_session[_sid] = _tn team_names_set: set[str] = set(team_name_per_session.values()) + rel_dest = _rel_to_index(dest_dir) + # Post-decorate `sessions_data` with per-session file links + # (matches the cached path's shape so the index renderer + # can use `session.file` uniformly under + # `combined_suppressed`). + for _sd in sessions_data: + if "file" not in _sd: + _sd["file"] = f"{rel_dest}/session-{_sd['id']}.{combined_ext}" project_summaries.append( { "name": project_dir.name, "path": project_dir, - "html_file": f"{project_dir.name}/{output_path.name}", + "html_file": f"{rel_dest}/{output_path.name}", "html_variants": _enumerate_project_variants( - project_dir, project_dir.name + dest_dir, str(rel_dest) ), "jsonl_count": jsonl_count, "message_count": len(messages), @@ -2832,6 +2971,7 @@ def process_projects_hierarchy( if cache_manager else [], "is_archived": False, + "combined_suppressed": not write_combined, "sessions": sessions_data, "team_names": sorted(team_names_set), } @@ -2862,19 +3002,40 @@ def process_projects_hierarchy( if cached_project_data is None: continue + # Apply --filter-path / --expand-paths to archived + # projects too. Note: archived dirs have no JSONLs to peek, + # so resolution falls back to cache (which exists for + # archived projects) or naive last-resort. + archived_cached_dirs: Optional[list[str]] = None + try: + archived_cached_dirs = cache_manager.get_working_directories() + except Exception: + archived_cached_dirs = None + archived_dest = project_destination( + archived_dir, + output_dir=output_dir, + expand_paths=expand_paths, + filter_path=filter_path, + cached_working_directories=archived_cached_dirs, + ) + if archived_dest is None: + continue + archived_project_count += 1 print( f" {archived_dir.name}: [ARCHIVED] ({len(cached_project_data.sessions)} sessions)" ) - # Add archived project to summaries + # Index entry for an archived project; the file may not + # exist at the projected path until the user re-renders. + archived_rel = _rel_to_index(archived_dest) project_summaries.append( { "name": archived_dir.name, "path": archived_dir, - "html_file": f"{archived_dir.name}/combined_transcripts.html", + "html_file": f"{archived_rel}/{combined_name}", "html_variants": _enumerate_project_variants( - archived_dir, archived_dir.name + archived_dest, str(archived_rel) ), "jsonl_count": 0, "message_count": cached_project_data.total_message_count, @@ -2887,6 +3048,7 @@ def process_projects_hierarchy( "earliest_timestamp": cached_project_data.earliest_timestamp, "working_directories": cache_manager.get_working_directories(), "is_archived": True, + "combined_suppressed": not write_combined, "sessions": [ { "id": session_data.session_id, @@ -2900,6 +3062,9 @@ def process_projects_hierarchy( "message_count": session_data.message_count, "first_user_message": session_data.first_user_message or "[No user message found in session.]", + "file": ( + f"{archived_rel}/session-{session_data.session_id}.{combined_ext}" + ), } for session_data in cached_project_data.sessions.values() if session_data.first_user_message @@ -2923,16 +3088,26 @@ def process_projects_hierarchy( # Update total projects count to include archived total_projects = len(project_dirs) + archived_project_count - # Generate index (always regenerate if outdated) + # Generate index (always regenerate if outdated). Index lives at + # the root of the output destination — `output_dir` if set + # (#151), else the legacy `projects_path` location. ext = get_file_extension(output_format) - index_path = projects_path / get_index_filename(output_format) + index_path = index_root / get_index_filename(output_format) renderer = get_renderer(output_format, image_export_mode) index_regenerated = False if renderer.is_outdated(index_path) or from_date or to_date or any_cache_updated: + # Markdown index renders as a nested bullet-list directory + # tree under `--expand-paths` (the natural Obsidian-vault + # shape). HTML and JSON renderers ignore the kwarg. + index_kwargs: dict[str, Any] = {} + if output_format in ("md", "markdown") and expand_paths: + index_kwargs["expand_paths_tree"] = True index_content = renderer.generate_projects_index( - project_summaries, from_date, to_date + project_summaries, from_date, to_date, **index_kwargs ) assert index_content is not None + # Ensure the index root exists when projecting into a fresh dir. + index_path.parent.mkdir(parents=True, exist_ok=True) # See issue #139: errors="replace" for lone-surrogate safety. index_path.write_text(index_content, encoding="utf-8", errors="replace") index_regenerated = True diff --git a/claude_code_log/markdown/renderer.py b/claude_code_log/markdown/renderer.py index ea20082a..c7448dc7 100644 --- a/claude_code_log/markdown/renderer.py +++ b/claude_code_log/markdown/renderer.py @@ -256,6 +256,86 @@ def _protect_html_tags(text: str) -> str: return str(rendered).rstrip("\n") +def _render_expand_paths_tree(template_projects: list[Any]) -> list[str]: + """Render `--expand-paths` Markdown index as a nested bullet-list + directory tree. + + Each unique path-component along the way becomes a bullet at its + nesting depth; sessions are leaf bullets under their parent dir. + The path is derived from `project.html_file` (the combined-link + relative path) — when ``combined_suppressed`` is set the per-session + `file` entries replace the project-level combined link. + + Returns a list of Markdown lines (joined by `\\n` by the caller), + or an empty list when no tree-shapable data is present (signal to + fall back to the flat layout). + """ + # Tree node: + # {"_links": list[(label, url, timestamp_range)], # at this level + # "": } + root: dict[str, Any] = {} + + def _insert(path_parts: list[str], label: str, url: str, ts: str) -> None: + """Walk `path_parts` down the tree; record the link at the leaf + directory. The leaf is the second-to-last path component (the + last is the filename itself).""" + node = root + # The last part of path_parts is the filename; walk up to its + # parent directory. + for part in path_parts[:-1]: + if part not in node or not isinstance(node[part], dict): + node[part] = {} + node = node[part] + node.setdefault("_links", []).append((label, url, ts)) + + def _to_posix(s: str) -> str: + """Defensive normalisation: split on `/` only after folding + any backslashes the upstream may have produced (Windows + `Path` stringification, historical regressions, callers that + construct URLs from native paths).""" + return s.replace("\\", "/") + + for project in template_projects: + if project.combined_suppressed and project.sessions: + # Use the per-session links — each session becomes a leaf + # under its parent dir. + for session in project.sessions: + url = session.get("file") + if not url: + continue + url = _to_posix(url) + summary = session.get("summary") + short_id = (session.get("id") or "")[:8] + label = summary if summary else f"session {short_id}" + ts = session.get("timestamp_range") or "" + _insert(url.split("/"), label, url, ts) + else: + # Combined-link mode: use the project's html_file as the + # single leaf under its parent dir. Translate `.html` + # filenames so the Markdown index links land at `.md` peers. + url = _to_posix(project.html_file).replace(".html", ".md") + ts = project.formatted_time_range or "" + _insert(url.split("/"), project.display_name, url, ts) + + if not root: + return [] + + lines: list[str] = [] + + def _emit(node: dict[str, Any], depth: int) -> None: + indent = " " * depth + # Directories first (alphabetical), then leaf-link entries. + for name in sorted(k for k in node if k != "_links"): + lines.append(f"{indent}- **{name}/**") + _emit(node[name], depth + 1) + for label, url, ts in node.get("_links", []): + ts_suffix = f" — *{ts}*" if ts else "" + lines.append(f"{indent}- [{label}]({url}){ts_suffix}") + + _emit(root, 0) + return lines + + class MarkdownRenderer(Renderer): """Markdown renderer for Claude Code transcripts.""" @@ -1822,8 +1902,20 @@ def generate_projects_index( project_summaries: list[dict[str, Any]], from_date: Optional[str] = None, to_date: Optional[str] = None, + expand_paths_tree: bool = False, ) -> str: - """Generate a Markdown projects index page.""" + """Generate a Markdown projects index page. + + Args: + project_summaries: Per-project summary dicts. + from_date / to_date: Date-filter labels for the title. + expand_paths_tree: When True (Obsidian mode — `--expand-paths`), + render the index as a nested bullet-list directory tree + derived from each project's `html_file` (or per-session + `file`) path. Each directory level becomes a bullet, + with sessions as nested leaf bullets. Falls back to the + flat per-project layout otherwise. + """ title = title_for_projects_index(project_summaries, from_date, to_date) template_projects, template_summary = prepare_projects_index(project_summaries) @@ -1838,11 +1930,28 @@ def generate_projects_index( ) parts.append("") + # `--expand-paths` Obsidian mode: render the index as a nested + # bullet-list mirroring the projected directory tree. Each path + # component becomes a bullet; per-session files are the leaves. + # The flat per-project listing remains the default for HTML and + # for non-expand-paths Markdown. + if expand_paths_tree: + tree_lines = _render_expand_paths_tree(template_projects) + if tree_lines: + parts.extend(tree_lines) + return "\n".join(parts) + # Project list for project in template_projects: - # Derive markdown link from html_file path - md_link = project.html_file.replace(".html", ".md") - parts.append(f"## [{project.display_name}]({md_link})") + if project.combined_suppressed: + # `--combined no` mode: header is a plain heading (no + # link to the non-existent combined file); per-session + # bullets link directly to `session-{id}.md` files. + parts.append(f"## {project.display_name}") + else: + # Derive markdown link from html_file path + md_link = project.html_file.replace(".html", ".md") + parts.append(f"## [{project.display_name}]({md_link})") # Use actual session count (filtered) like HTML does session_count = ( len(project.sessions) if project.sessions else project.jsonl_count @@ -1851,6 +1960,26 @@ def generate_projects_index( parts.append(f"- Messages: {project.message_count}") if project.formatted_time_range: parts.append(f"- Date range: {project.formatted_time_range}") + # Per-session bullet links (only when combined is + # suppressed — otherwise the combined-link header already + # serves as the project's single entry point). + if project.combined_suppressed and project.sessions: + parts.append("") + parts.append("### Sessions") + for session in project.sessions: + file_link = session.get("file") + if not file_link: + continue + # Derive label: prefer summary / title, fall back + # to the short session-id prefix. + summary = session.get("summary") + short_id = (session.get("id") or "")[:8] + label = summary if summary else f"session {short_id}" + timestamp_range = session.get("timestamp_range") or "" + timestamp_suffix = ( + f" — *{timestamp_range}*" if timestamp_range else "" + ) + parts.append(f"- [{label}]({file_link}){timestamp_suffix}") parts.append("") return "\n".join(parts) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 16ab6281..e90a9faa 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -479,6 +479,12 @@ def __init__(self, project_data: dict[str, Any]): self.earliest_timestamp = project_data.get("earliest_timestamp", "") self.sessions = project_data.get("sessions", []) self.working_directories = project_data.get("working_directories", []) + # `--combined no` (#151 follow-up): when set, the index should + # link directly to per-session files (`session["file"]`) rather + # than the (skipped) combined-transcript file. + self.combined_suppressed: bool = bool( + project_data.get("combined_suppressed", False) + ) # Teammates feature — distinct team names across this project's # sessions. Computed in get_all_cached_projects from each # SessionCacheData.team_name. diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py index f6a3022d..c1b07823 100644 --- a/claude_code_log/utils.py +++ b/claude_code_log/utils.py @@ -155,6 +155,266 @@ def get_project_display_name( return display_name +def path_looks_absolute(s: str) -> bool: + """True if ``s`` looks like an absolute path on either POSIX or + Windows. Decoupled from the host OS so JSONL-stored cwds don't + silently mismatch when a Linux-recorded transcript is processed + on Windows or vice versa (#151).""" + if not s: + return False + from pathlib import PurePosixPath, PureWindowsPath + + return PurePosixPath(s).is_absolute() or PureWindowsPath(s).is_absolute() + + +def _split_real_path_for_join(real_path_str: str) -> list[str]: + """Decompose a real-path string into the parts that should be + joined under ``output_dir`` for ``--expand-paths``. + + Form-aware: POSIX-shaped strings (``/foo/bar``) yield + ``['foo', 'bar']``; Windows-shaped strings (``C:\\foo\\bar``) + yield ``['C', 'foo', 'bar']`` (drive letter kept as a path + component, colon stripped). Relative inputs pass through as-is. + + Pure path-string inspection — no host-OS dependence; same JSONL + cwd produces the same destination tree on Linux, macOS, or + Windows. + """ + from pathlib import PurePosixPath, PureWindowsPath + + p_posix = PurePosixPath(real_path_str) + if p_posix.is_absolute(): + return list(p_posix.parts[1:]) # drop leading '/' + p_win = PureWindowsPath(real_path_str) + if p_win.is_absolute(): + # 'C:\foo\bar' → drive='C:', parts=('C:\\', 'foo', 'bar') + # Keep the drive as a leading dirname segment, strip the colon + # so it works as a real directory name on POSIX too. + drive = p_win.drive.rstrip(":") + rest = list(p_win.parts[1:]) + return [drive, *rest] if drive else rest + # Relative — POSIX-style component split. + return list(p_posix.parts) + + +def project_dir_to_real_path( + project_dir: Path, + cached_working_directories: Optional[list[str]] = None, +) -> Path: + """Recover the real on-disk path for a Claude project directory. + + Claude Code encodes project paths flatly: ``/`` and leading ``.`` + both become ``-`` (e.g. ``/home/joe/.claude`` → + ``-home-joe--claude``). The encoding is **lossy** — ``-home-joe-x-y`` + could mean either ``/home/joe/x/y`` or ``/home/joe/x-y``. The cache + (and live JSONLs) preserve the original ``cwd`` so we can disambiguate + without parsing the encoded name. + + Resolution strategy (issue #151): + + 1. **Cache hit** — if ``cached_working_directories`` is non-empty, + use its first entry. Authoritative — that's what Claude Code + recorded at session time. + 2. **JSONL peek** — open the project's first JSONL, scan up to a + handful of lines for the first entry with a ``cwd`` field, + return that. Cheap (one ``json.loads`` per line, no model + validation). + 3. **Naive last-resort** — strip the leading ``-`` and replace + remaining ``-``s with ``/``. Best-effort only; collapses + ambiguity in the lossy direction. Used when the project dir + has been emptied (orphan archived dir) and no cache survives. + + Args: + project_dir: The encoded project directory + (e.g. ``~/.claude/projects/-home-joe-project-A``). + cached_working_directories: Optional cached ``working_directories`` + list from the project's cache (``ProjectCache.working_directories``). + + Returns: + The recovered real path. May be a best-effort guess in the + last-resort case. + """ + # Tier 1: cache. Only accept absolute paths — relative or oddly + # shaped values fall through (e.g. test fixtures with synthetic + # `cwd` entries). Absoluteness check is form-aware (POSIX or + # Windows shapes), so a Linux-recorded cwd processed on Windows + # still resolves through this tier. + if cached_working_directories: + real_dirs = [ + wd + for wd in cached_working_directories + if not _is_temp_path(wd) and path_looks_absolute(wd) + ] + if real_dirs: + return Path(real_dirs[0]) + + # Tier 2: peek the first JSONL for a `cwd` field. Same + # form-aware absoluteness guard as tier 1. + if project_dir.is_dir(): + # Skip agent-* sidechain files; they may not carry the + # top-level project cwd. Take any other JSONL. + for jsonl_path in sorted(project_dir.glob("*.jsonl")): + if jsonl_path.name.startswith("agent-"): + continue + cwd_from_peek = _peek_jsonl_for_cwd(jsonl_path) + if cwd_from_peek and path_looks_absolute(cwd_from_peek): + return Path(cwd_from_peek) + # First non-agent JSONL exhausted with no usable cwd — + # bail out rather than scanning every file. + break + + # Tier 3: naive last-resort. Recovers leading-dot dir components + # via `--` → `/.` mapping (Claude Code encodes `/.foo` as `--foo`). + # Remaining ambiguity (`/foo-bar` vs `/foo/bar`) collapses toward + # the more-segments interpretation; documented as best-effort. + name = project_dir.name + if name.startswith("-"): + body = name[1:].replace("--", "/.").replace("-", "/") + return Path("/" + body) + return Path(name.replace("--", "/.").replace("-", "/")) + + +# Maximum number of lines we read from a project's first JSONL when +# trying to recover the project's `cwd`. Real-world JSONLs put `cwd` +# on the very first user/assistant entry, so 32 is generous. +_PEEK_JSONL_MAX_LINES = 32 + + +def _peek_jsonl_for_cwd(jsonl_path: Path) -> Optional[str]: + """Return the first non-empty ``cwd`` value found in the JSONL, + or ``None`` if none is found within the peek window.""" + import json + from typing import cast + + try: + with jsonl_path.open("r", encoding="utf-8", errors="replace") as fh: + for _ in range(_PEEK_JSONL_MAX_LINES): + line = fh.readline() + if not line: + return None + line = line.strip() + if not line: + continue + try: + entry: object = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(entry, dict): + continue + # `json.loads` produces Unknown-typed values; cast to + # a concrete shape for pyright. Runtime is unaffected. + cwd = cast("dict[str, object]", entry).get("cwd") + if isinstance(cwd, str) and cwd: + return cwd + except OSError: + return None + return None + + +# Recognised output format suffixes for the `--output` dir-vs-file +# heuristic. If a user passes ``--output /tmp/out.md`` we treat it as +# a file; ``--output /tmp/obsidian/`` is a directory. +_OUTPUT_FILE_SUFFIXES = frozenset({".html", ".md", ".markdown", ".json"}) + + +def output_path_is_file(output: Path) -> bool: + """Heuristic for ``--output`` interpretation (issue #151). + + A path is a *file* destination when its suffix is one of the + recognised output-format extensions; otherwise it's a *directory* + destination. Doesn't touch the filesystem — pure path-string + inspection. + """ + return output.suffix.lower() in _OUTPUT_FILE_SUFFIXES + + +def project_destination( + project_dir: Path, + *, + output_dir: Optional[Path], + expand_paths: bool, + filter_path: Optional[str], + cached_working_directories: Optional[list[str]] = None, +) -> Optional[Path]: + """Compute the per-project output destination directory (issue #151). + + Implements the flag interaction matrix from + ``work/obsidian-friendly-output.md``. Pure function — no I/O beyond + what ``project_dir_to_real_path`` may do (cache or one JSONL peek). + + Args: + project_dir: The source project directory under + ``~/.claude/projects/`` (e.g. ``-home-joe-project-A``). + output_dir: Target root, or None for legacy in-place behaviour. + expand_paths: When True, project's flat name is expanded back + to its real on-disk path under ``output_dir``. + filter_path: When set, restrict to projects whose path + (real path if ``expand_paths``, else flat dir name) + starts with the prefix. With ``expand_paths``, the + matched prefix is also truncated from the destination. + cached_working_directories: Optional cached working dirs for + ``project_dir_to_real_path``. + + Returns: + Destination directory, or ``None`` if the project should be + skipped (filter excluded it). + """ + # Legacy: no --output → write into the source dir (current behaviour). + if output_dir is None: + return project_dir + + # With --expand-paths: resolve the real path and (optionally) trim + # the filter prefix. Form-aware throughout — POSIX and Windows + # path strings are handled symmetrically so a transcript recorded + # on one platform projects predictably on the other. + if expand_paths: + from pathlib import PurePosixPath, PureWindowsPath + + real_path = project_dir_to_real_path(project_dir, cached_working_directories) + # `as_posix()` preserves the original form across platforms: + # POSIX-form paths stay `/home/...`, Windows-form paths stay + # `C:/Users/...`. The bare `str()` would convert `/home/joe` + # to `\home\joe` on Windows, which then mismatches our + # form-aware detection and joins to drive root. + real_str = real_path.as_posix() + if filter_path: + # Match using the same path-shape family as the real path + # (POSIX-form `/home/joe` filters POSIX-form real paths; + # Windows-form `C:\Users\joe` filters Windows-form real + # paths). Mixing forms is a user error and produces None. + if PurePosixPath(real_str).is_absolute(): + pp_cls = PurePosixPath + elif PureWindowsPath(real_str).is_absolute(): + pp_cls = PureWindowsPath + else: + pp_cls = PurePosixPath + try: + rel = pp_cls(real_str).relative_to(pp_cls(filter_path)) + except ValueError: + # Real path is not under filter prefix — skip. + return None + return output_dir.joinpath(*rel.parts) + # Real-path tree directly under output_dir. Decompose the + # path string in a form-aware way: POSIX shapes drop the + # leading '/', Windows shapes keep the drive letter as a + # leading path component (so `C:\foo\bar` lands at + # `/C/foo/bar`). + rel_parts = _split_real_path_for_join(real_str) + return output_dir.joinpath(*rel_parts) if rel_parts else output_dir + + # No --expand-paths: filter against the flat dir name (per Q2), + # destination keeps the flat name. Require an exact match OR a + # `-`-terminated prefix so `--filter-path -home-joe` doesn't also + # accept sibling-prefix names like `-home-joe-bar` style + # (matches) but reject `-home-joet-...` (would over-match without + # the boundary). + if filter_path: + name = project_dir.name + if name != filter_path and not name.startswith(filter_path + "-"): + return None + return output_dir / project_dir.name + + def should_skip_message(text_content: str) -> bool: """ Determine if a message should be skipped in transcript rendering. diff --git a/test/test_obsidian_output.py b/test/test_obsidian_output.py new file mode 100644 index 00000000..10ce389c --- /dev/null +++ b/test/test_obsidian_output.py @@ -0,0 +1,566 @@ +"""End-to-end tests for the Obsidian-friendly output flags (issue #151). + +Drives the converter through ``process_projects_hierarchy`` with each +flag combination from the matrix and asserts the produced directory +tree. **Markdown-scoped per Q1 resolution** — the flag mechanics live +in ``converter.py``/``utils.py``, not the renderers, so HTML/JSON parity +is asserted by code inspection rather than by re-running the matrix +per format. +""" + +import json +from pathlib import Path + +import pytest + +from claude_code_log.converter import process_projects_hierarchy + + +def _build_fake_projects_dir( + root: Path, + projects: list[tuple[str, str]], +) -> Path: + """Create a fake `~/.claude/projects/`-shaped directory. + + Args: + root: tmp_path-style scratch directory. + projects: list of (encoded_name, real_cwd) pairs. + Returns: + The projects-dir path. + """ + projects_dir = root / "projects" + projects_dir.mkdir() + for encoded, cwd in projects: + proj = projects_dir / encoded + proj.mkdir() + # Minimal session JSONL — enough for the loader to find one + # session and produce one combined transcript. + entry = { + "parentUuid": None, + "isSidechain": False, + "userType": "external", + "cwd": cwd, + "sessionId": f"session-{encoded.lstrip('-')[:32]}", + "version": "2.1.0", + "type": "user", + "uuid": f"uuid-{encoded.lstrip('-')[:32]}", + "timestamp": "2026-05-10T10:00:00.000Z", + "message": { + "role": "user", + "content": [{"type": "text", "text": f"hi from {encoded}"}], + }, + } + (proj / "session.jsonl").write_text(json.dumps(entry) + "\n", encoding="utf-8") + return projects_dir + + +@pytest.fixture +def fake_projects(tmp_path: Path) -> Path: + """Three encoded projects with realistic absolute cwds (which is + what the JSONL-peek tier of `project_dir_to_real_path` will pick up). + """ + return _build_fake_projects_dir( + tmp_path, + projects=[ + ("-home-joe-project-A", "/home/joe/project/A"), + ("-home-joe-project-B", "/home/joe/project/B"), + ("-home-jane-project-C", "/home/jane/project/C"), + ], + ) + + +@pytest.fixture +def isolated_cache(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + """Steer the cache to tmp so the test doesn't pollute / depend on + the user's real `~/.claude/projects/` cache.""" + cache_path = tmp_path / "cache.db" + monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(cache_path)) + return cache_path + + +# Keep usage explicit so the fixture clearly applies even when its +# return value isn't read directly in the test body. +_ = isolated_cache + + +class TestObsidianOutputMatrix: + """The matrix from work/obsidian-friendly-output.md, end-to-end. + Each test asserts the produced directory shape under the relevant + flag combination.""" + + def test_legacy_no_output(self, fake_projects: Path, isolated_cache: Path): + """Legacy: `--output` unset → outputs land inside each + source project_dir under the projects tree (current behaviour + from before #151).""" + process_projects_hierarchy( + fake_projects, + output_format="md", + ) + + # Each project gets a combined_transcripts.md under its source. + for encoded in [ + "-home-joe-project-A", + "-home-joe-project-B", + "-home-jane-project-C", + ]: + assert (fake_projects / encoded / "combined_transcripts.md").exists() + # Index at the projects-dir root. + assert (fake_projects / "index.md").exists() + + def test_output_only_flat_copy( + self, + fake_projects: Path, + isolated_cache: Path, + tmp_path: Path, + ): + """`--output` alone → flat copy of each project under + //. Closes the implicit gap (`--output` was + previously silently ignored for `--all-projects`).""" + out = tmp_path / "out-flat" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + ) + assert (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + assert (out / "-home-joe-project-B" / "combined_transcripts.md").exists() + assert (out / "-home-jane-project-C" / "combined_transcripts.md").exists() + assert (out / "index.md").exists() + + def test_expand_paths_full_tree( + self, + fake_projects: Path, + isolated_cache: Path, + tmp_path: Path, + ): + """`--output --expand-paths` → expanded real-path tree under + /. Encoded names are resolved via JSONL peek (the + fixture's cwd field).""" + out = tmp_path / "out-expanded" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + expand_paths=True, + ) + assert (out / "home/joe/project/A/combined_transcripts.md").exists() + assert (out / "home/joe/project/B/combined_transcripts.md").exists() + assert (out / "home/jane/project/C/combined_transcripts.md").exists() + assert (out / "index.md").exists() + # The encoded-name flat directories must NOT exist — we + # expanded, didn't both expand and copy. + assert not (out / "-home-joe-project-A").exists() + + def test_expand_paths_filter_match_truncates( + self, + fake_projects: Path, + isolated_cache: Path, + tmp_path: Path, + ): + """`--filter-path /home/joe --expand-paths`: filter against + real path; truncate the prefix; matching projects land at + //.""" + out = tmp_path / "out-filtered" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + expand_paths=True, + filter_path="/home/joe", + ) + # Projects under /home/joe matched, prefix truncated. + assert (out / "project/A/combined_transcripts.md").exists() + assert (out / "project/B/combined_transcripts.md").exists() + # Project under /home/jane filtered out — no output produced. + assert not (out / "project/C").exists() + assert not (out / "home").exists() # would only exist if /home/joe survived + assert (out / "index.md").exists() + + def test_filter_flat_no_expand( + self, + fake_projects: Path, + isolated_cache: Path, + tmp_path: Path, + ): + """`--filter-path -home-joe`without `--expand-paths`: filter + against the encoded dir name; no truncation; matching + projects land at //.""" + out = tmp_path / "out-flat-filtered" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + expand_paths=False, + filter_path="-home-joe", + ) + # Two `-home-joe-...` projects matched; flat name preserved. + assert (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + assert (out / "-home-joe-project-B" / "combined_transcripts.md").exists() + # `-home-jane-...` doesn't start with `-home-joe`. + assert not (out / "-home-jane-project-C").exists() + + +# ----------------------------------------------------------------------------- +# CLI validation guards (#151 footgun fixes from monk's review) +# ----------------------------------------------------------------------------- + + +class TestCliValidationGuards: + """The CLI rejects relative `--filter-path` when paired with + `--expand-paths` (would otherwise silently exclude every project), + and warns when the new flags are passed in no-op contexts.""" + + def test_relative_filter_path_with_expand_is_rejected( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """Loud rejection — without this, `--filter-path home/joe` + (forgetting the leading `/`) would match no projects silently + because `Path.relative_to` raises for relative paths.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--expand-paths", + "--filter-path", + "home/joe", # relative — should be rejected + ], + ) + assert result.exit_code != 0 + assert "must be an absolute path" in result.output + # No projects rendered. + assert not out.exists() or not any( + (out / p).exists() for p in ["home", "project", "-home-joe-project-A"] + ) + + def test_absolute_filter_path_with_expand_is_accepted( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """Counterpart: absolute `--filter-path` passes the guard.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--expand-paths", + "--filter-path", + "/home/joe", + "--format", + "md", + ], + ) + assert result.exit_code == 0, result.output + # `--expand-paths` defaults `--combined` to `no` (Obsidian + # mode), so the combined file is suppressed; check for the + # per-session output instead. + sessions = list((out / "project/A").glob("session-*.md")) + assert sessions, "expected per-session output under the expanded tree" + + def test_warns_when_flags_used_without_all_projects( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """`--expand-paths` against a single-file/single-project + target (without `--all-projects`) is a no-op; user gets a + warning rather than silent ignore.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + # Point at a single project_dir with --expand-paths but no + # --all-projects (and explicitly no `output` to make the + # control flow predictable). Should warn. + single_project = fake_projects / "-home-joe-project-A" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(single_project), + "--expand-paths", + "--format", + "md", + ], + ) + # The exact stderr-output ordering is implementation-dependent, + # but the warning text must surface somewhere — and the + # invocation must still succeed (warning, not error). + assert result.exit_code == 0, result.output + assert "require --all-projects" in result.output + + def test_warns_when_expand_paths_with_file_output( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """`--output some-file.md --expand-paths` is a no-op (file + output goes through the single-file path); warn.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(tmp_path / "out.md"), # file-suffixed + "--expand-paths", + "--format", + "md", + ], + ) + # Warning, not error — single-file path still runs successfully. + assert result.exit_code == 0, result.output + assert "require --output to be a directory" in result.output + + +# ----------------------------------------------------------------------------- +# --combined yes/no/only flag (#151 follow-up) +# ----------------------------------------------------------------------------- + + +class TestCombinedFlag: + """The `--combined` flag controls whether the combined-transcript + and per-session files are emitted. Default is `yes` except when + `--expand-paths` is set, in which case it switches to `no` + (Obsidian-vault-friendly default — combined is dead weight when + each session has its own .md file).""" + + def test_combined_yes_emits_both( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + out = tmp_path / "out-both" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + write_combined=True, + generate_individual_sessions=True, + ) + assert (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + # Per-session file too. Filename is session-{session_id}.md. + sessions = list((out / "-home-joe-project-A").glob("session-*.md")) + assert sessions, "expected at least one per-session file" + + def test_combined_no_skips_combined( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + out = tmp_path / "out-none" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + write_combined=False, + generate_individual_sessions=True, + ) + # Combined file MUST NOT exist. + assert not (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + # Per-session files SHOULD exist. + sessions = list((out / "-home-joe-project-A").glob("session-*.md")) + assert sessions + + def test_combined_only_skips_per_session( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + out = tmp_path / "out-only" + process_projects_hierarchy( + fake_projects, + output_format="md", + output_dir=out, + write_combined=True, + generate_individual_sessions=False, + ) + assert (out / "-home-joe-project-A" / "combined_transcripts.md").exists() + # Per-session files SHOULD NOT exist. + sessions = list((out / "-home-joe-project-A").glob("session-*.md")) + assert not sessions, "per-session files leaked through --combined only" + + def test_cli_expand_paths_default_is_combined_no( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """The default for `--combined` when `--expand-paths` is set + should be `no` — Obsidian users want per-session files only.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out-default" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--expand-paths", + "--format", + "md", + ], + ) + assert result.exit_code == 0, result.output + # Combined files should NOT have been emitted under the + # expanded tree. + combined_files = list(out.rglob("combined_transcripts*.md")) + assert not combined_files, ( + f"--combined no should be the default with --expand-paths, " + f"but {len(combined_files)} combined files were written" + ) + # Per-session files SHOULD be present. + session_files = list(out.rglob("session-*.md")) + assert session_files + + def test_cli_expand_paths_yields_bullet_tree_index( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """Markdown index under `--expand-paths` renders as a nested + bullet-list directory tree (each path component a bullet, + sessions as leaf bullets).""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out-tree" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--expand-paths", + "--format", + "md", + ], + ) + assert result.exit_code == 0, result.output + index_md = (out / "index.md").read_text(encoding="utf-8") + # Directory bullets — bold, trailing slash. + assert "- **home/**" in index_md + assert "- **joe/**" in index_md + # Leaf session links (markdown link syntax pointing into the + # expanded tree). + assert "(home/joe/project/A/session-" in index_md + # The traditional flat `## [project](combined.md)` heading + # shape must NOT appear in tree mode. + assert "## [home/joe/project/A]" not in index_md + + def test_bullet_tree_normalises_backslash_separators(self): + """Regression: on Windows, `str(Path("home/joe"))` is + `home\\joe`, so any leaked native-separator URL would land + in the bullet-tree as a single un-split leaf line. The + builder must fold backslashes to `/` before splitting.""" + from types import SimpleNamespace + + from claude_code_log.markdown.renderer import _render_expand_paths_tree + + project_backslash = SimpleNamespace( + combined_suppressed=True, + html_file="ignored.html", + display_name="proj", + formatted_time_range="2026-05-10 10:00:00", + sessions=[ + { + "id": "abcdef1234", + "summary": "S1", + "timestamp_range": "2026-05-10 10:00:00", + "file": r"home\joe\project\B\session-abcdef1234.md", + } + ], + ) + project_forward = SimpleNamespace( + combined_suppressed=True, + html_file="ignored.html", + display_name="proj", + formatted_time_range="2026-05-10 10:00:00", + sessions=[ + { + "id": "deadbeef99", + "summary": "S2", + "timestamp_range": "2026-05-10 11:00:00", + "file": "home/joe/project/A/session-deadbeef99.md", + } + ], + ) + lines = _render_expand_paths_tree([project_backslash, project_forward]) + joined = "\n".join(lines) + assert "- **home/**" in joined + assert "- **joe/**" in joined + assert "- **project/**" in joined + # Both leaf links must be present, with forward slashes only. + assert "(home/joe/project/B/session-abcdef1234.md)" in joined + assert "(home/joe/project/A/session-deadbeef99.md)" in joined + assert "\\" not in joined + + def test_cli_combined_only_alias_with_no_individual_sessions( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """`--no-individual-sessions` is the back-compat alias for + `--combined only`.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out-noindividual" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--no-individual-sessions", + "--format", + "md", + ], + ) + assert result.exit_code == 0, result.output + # Combined files present, per-session files absent. + assert list(out.rglob("combined_transcripts*.md")) + assert not list(out.rglob("session-*.md")) + + def test_cli_conflicting_combined_no_and_no_individual_sessions_rejected( + self, fake_projects: Path, isolated_cache: Path, tmp_path: Path + ): + """`--combined no` + `--no-individual-sessions` is a conflict + (both attempt to skip per-session files, but --no-individual-sessions + implies combined-only). Should be rejected.""" + from click.testing import CliRunner + + from claude_code_log.cli import main + + out = tmp_path / "out-conflict" + runner = CliRunner() + result = runner.invoke( + main, + [ + str(fake_projects), + "--all-projects", + "--output", + str(out), + "--no-individual-sessions", + "--combined", + "no", + "--format", + "md", + ], + ) + assert result.exit_code != 0 + assert "conflicts" in result.output.lower() or "no-individual" in result.output diff --git a/test/test_path_projection.py b/test/test_path_projection.py new file mode 100644 index 00000000..297c36f0 --- /dev/null +++ b/test/test_path_projection.py @@ -0,0 +1,262 @@ +"""Unit tests for the path-projection helpers (issue #151). + +Covers ``project_dir_to_real_path`` (three-tier resolution: cache → +JSONL peek → naive last-resort) and ``project_destination`` (the +flag-interaction matrix from ``work/obsidian-friendly-output.md``). +""" + +import json +from pathlib import Path + +import pytest + +from claude_code_log.utils import ( + output_path_is_file, + project_destination, + project_dir_to_real_path, +) + + +def _write_jsonl_with_cwd(jsonl_path: Path, cwd: str) -> None: + """Write a minimal JSONL line carrying a `cwd` field — enough to + exercise the JSONL-peek tier of `project_dir_to_real_path`.""" + entry = { + "parentUuid": None, + "isSidechain": False, + "userType": "external", + "cwd": cwd, + "sessionId": "11111111-1111-1111-1111-111111111111", + "version": "2.1.0", + "type": "user", + "uuid": "22222222-2222-2222-2222-222222222222", + "timestamp": "2026-05-10T10:00:00.000Z", + "message": { + "role": "user", + "content": [{"type": "text", "text": "hi"}], + }, + } + jsonl_path.write_text(json.dumps(entry) + "\n", encoding="utf-8") + + +# ----------------------------------------------------------------------------- +# project_dir_to_real_path +# ----------------------------------------------------------------------------- + + +class TestProjectDirToRealPath: + """Three-tier resolution: cache → JSONL peek → naive last-resort.""" + + def test_uses_cache_cwd(self, tmp_path: Path): + """Tier 1: when cached_working_directories is supplied, the + first absolute entry wins.""" + result = project_dir_to_real_path( + tmp_path / "-anything", + cached_working_directories=["/home/joe/x/y"], + ) + assert result == Path("/home/joe/x/y") + + def test_skips_relative_cache_entries(self, tmp_path: Path): + """Tier 1 absoluteness guard: relative `cwd` values fall + through (test fixtures sometimes carry these).""" + project_dir = tmp_path / "-skipped" + project_dir.mkdir() + # Relative cache value should be rejected; with no JSONLs to + # peek, falls through to naive last-resort. + result = project_dir_to_real_path( + project_dir, + cached_working_directories=["relative-not-absolute"], + ) + # Naive: -skipped → /skipped + assert result == Path("/skipped") + + def test_skips_temp_paths_in_cache(self, tmp_path: Path): + """Tier 1: temp paths (/tmp/, macOS /private/var/folders/) + are filtered out — they're not the user's authoritative cwd.""" + project_dir = tmp_path / "-orphan" + project_dir.mkdir() + result = project_dir_to_real_path( + project_dir, + cached_working_directories=["/tmp/pytest-of-cboos/xyz"], + ) + # Filter dropped the /tmp/ entry → naive last-resort. + assert result == Path("/orphan") + + def test_peeks_jsonl_when_no_cache(self, tmp_path: Path): + """Tier 2: with no cache, the first JSONL's `cwd` is read.""" + project_dir = tmp_path / "-home-joe-x-y" + project_dir.mkdir() + _write_jsonl_with_cwd(project_dir / "session.jsonl", "/home/joe/x/y") + result = project_dir_to_real_path(project_dir) + assert result == Path("/home/joe/x/y") + + def test_peek_disambiguates_cache_collision(self, tmp_path: Path): + """Two `-home-joe-x-y` dirs with different real cwds: each + resolves correctly because the cache (or JSONL) is consulted.""" + # Same encoded name, different cwds → different real paths. + cache_a = ["/home/joe/x/y"] # subdir interpretation + cache_b = ["/home/joe/x-y"] # single-dir interpretation + result_a = project_dir_to_real_path( + Path("/anywhere/-home-joe-x-y"), + cached_working_directories=cache_a, + ) + result_b = project_dir_to_real_path( + Path("/anywhere/-home-joe-x-y"), + cached_working_directories=cache_b, + ) + assert result_a == Path("/home/joe/x/y") + assert result_b == Path("/home/joe/x-y") + + def test_peek_skips_agent_files(self, tmp_path: Path): + """`agent-*.jsonl` files (sidechains) are skipped during peek + because they may not carry the project's top-level cwd.""" + project_dir = tmp_path / "-peek-test" + project_dir.mkdir() + # Agent file FIRST alphabetically — would be picked if not + # skipped. Real session JSONL has the right cwd. + _write_jsonl_with_cwd(project_dir / "agent-aaaa.jsonl", "/wrong/path") + _write_jsonl_with_cwd(project_dir / "session-bbbb.jsonl", "/right/path") + result = project_dir_to_real_path(project_dir) + assert result == Path("/right/path") + + @pytest.mark.parametrize( + "encoded,expected", + [ + ("-home-cboos-bin", "/home/cboos/bin"), + # Double-dash → leading-dot dir component (`/.foo`). + ("-home-cboos--claude", "/home/cboos/.claude"), + ( + "-home-cboos-Documents-Obsidian-Work--git", + "/home/cboos/Documents/Obsidian/Work/.git", + ), + ("-home-joe-project-A", "/home/joe/project/A"), + ], + ) + def test_naive_last_resort(self, tmp_path: Path, encoded: str, expected: str): + """Tier 3: no cache, no JSONLs, no fallback file. Naive + `/`-for-`-` inversion with `--` → `/.` for dotfile dirs. + Sampled from real `~/.claude/projects/` corpus.""" + project_dir = tmp_path / encoded + # Don't mkdir — `is_dir()` returns False, so peek tier is + # skipped and we go straight to naive. + result = project_dir_to_real_path(project_dir) + assert result == Path(expected) + + +# ----------------------------------------------------------------------------- +# project_destination — the flag interaction matrix +# ----------------------------------------------------------------------------- + + +class TestProjectDestination: + """Per-project destination logic. Six matrix rows.""" + + SRC = Path("/proj/-home-joe-project-A") + OUT = Path("/tmp/obsidian") + + def test_legacy_no_output_dir(self): + """No `--output` → write into the source dir (current + behaviour — strict backwards compatibility).""" + dest = project_destination( + self.SRC, + output_dir=None, + expand_paths=False, + filter_path=None, + ) + assert dest == self.SRC + + def test_flat_copy(self): + """`--output` only → flat copy under output_dir, project + keeps its encoded name. Closes the previously-implicit gap + where `--output` was silently ignored in `--all-projects`.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=False, + filter_path=None, + ) + assert dest == self.OUT / "-home-joe-project-A" + + def test_expand_no_filter(self): + """`--output --expand-paths` → full real-path expansion + under output_dir.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=True, + filter_path=None, + cached_working_directories=["/home/joe/project/A"], + ) + assert dest == self.OUT / "home/joe/project/A" + + def test_expand_filter_match(self): + """`--expand-paths --filter-path /home/joe`: filter against + real path, truncate the prefix from the destination.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=True, + filter_path="/home/joe", + cached_working_directories=["/home/joe/project/A"], + ) + assert dest == self.OUT / "project/A" + + def test_expand_filter_miss(self): + """When the real path doesn't start with the filter prefix, + the project is excluded (returns None).""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=True, + filter_path="/home/jane", # different user + cached_working_directories=["/home/joe/project/A"], + ) + assert dest is None + + def test_filter_match_flat(self): + """`--filter-path` without `--expand-paths` matches the flat + encoded dir name (per Q2 resolution); no truncation.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=False, + filter_path="-home-joe", + ) + assert dest == self.OUT / "-home-joe-project-A" + + def test_filter_miss_flat(self): + """Flat-name filter that doesn't match the encoded prefix + excludes the project.""" + dest = project_destination( + self.SRC, + output_dir=self.OUT, + expand_paths=False, + filter_path="-home-jane", + ) + assert dest is None + + +# ----------------------------------------------------------------------------- +# output_path_is_file (--output suffix heuristic, Q4 resolution) +# ----------------------------------------------------------------------------- + + +class TestOutputPathIsFile: + @pytest.mark.parametrize( + "value,is_file", + [ + ("/tmp/out.md", True), + ("/tmp/out.markdown", True), + ("/tmp/out.html", True), + ("/tmp/out.json", True), + # Case-insensitive + ("/tmp/Out.HTML", True), + # No recognised suffix → directory + ("/tmp/out", False), + ("/tmp/obsidian-vault", False), + # Suffix that isn't a recognised output format + ("/tmp/out.txt", False), + ("/tmp/out.tar.gz", False), + ], + ) + def test_suffix_heuristic(self, value: str, is_file: bool): + assert output_path_is_file(Path(value)) is is_file diff --git a/work/obsidian-friendly-output.md b/work/obsidian-friendly-output.md new file mode 100644 index 00000000..95c5df54 --- /dev/null +++ b/work/obsidian-friendly-output.md @@ -0,0 +1,525 @@ +# Obsidian-friendly output (issue #151) + +## Status: Shipped (impl + tests in this PR; follow-ups recorded below) + +## Context + +Issue #151 wants three CLI flags that project Claude Code transcripts into +the same Markdown-vault topology Obsidian (and similar Markdown-based KM +tools) expect: + +``` +claude-code-log --output ~/Documents/Obsidian/ClaudeProjects \ + --expand-paths --filter-path /home/joe \ + --format md --detail low --compact +``` + +Should land sessions at: + +``` +~/Documents/Obsidian/ClaudeProjects/project/A/.low.md +``` + +The use case is the user's wider knowledge-management workflow — SAM +(the federation coordinator at `~/SAM`) and the Obsidian vault at +`~/Documents/Obsidian/Work` keep cross-project knowledge in a Markdown +tree; this feature gives Claude Code transcripts a clean projection +into that topology. + +## What's already there vs. what's missing + +The issue's framing implies `--output` already works for `--all-projects` +mode and produces a flat structure. **Empirically that's not what the +code does.** `claude_code_log/converter.py:process_projects_hierarchy` +writes `combined_transcripts.html` directly into each *source* +`project_dir` (e.g. `~/.claude/projects/-home-joe-project-A/`), and the +index lands at `~/.claude/projects/index.html`. `--output` is honoured +by the single-file/single-project paths (`convert_jsonl_to`, +`convert_single_session`) but is **not threaded through to +`process_projects_hierarchy`** (cli.py around line 816). + +So #151's three "flag flavours" actually decompose into: + +1. `--output ` honoured in `--all-projects` mode (currently a gap). +2. `--expand-paths` — undo Claude Code's flat encoding of project dirs. +3. `--filter-path ` — select subset + truncate prefix. + +The plan addresses all three; (1) is partially a prerequisite of (2)/(3). + +## Implementation surface + +### `claude_code_log/cli.py` + +- Add two `@click.option` declarations: + - `--expand-paths` — `is_flag=True`, default False. + - `--filter-path` — `type=str` (path-like), default None. + Optional but `--expand-paths` is a soft-prerequisite (filter + truncation only meaningful with expansion). Decision: allow either + flag standalone; document the behaviour matrix (see §Scope). +- Pass both into `main()` and forward to `process_projects_hierarchy` + (and to `convert_jsonl_to` if we decide to support flat-output for + the single-directory path too). +- Validation: warn if `--expand-paths` / `--filter-path` are given + without `--output` *and* without `--all-projects` (no-op flags). + +### `claude_code_log/converter.py` + +- `process_projects_hierarchy` gains four new parameters: + - `output_dir: Optional[Path]` — destination root (was missing entirely). + - `expand_paths: bool` — flag. + - `filter_path: Optional[str]` — prefix. + - (Optional) `path_resolver: Optional[Callable]` — injection point + for tests, defaulting to a real implementation that consults the + cache for the authoritative `cwd`. +- Inside the per-project loop, just before computing `output_path`, + decide the **destination directory for this project's outputs** + using a small helper (see §Path-projection logic). Replace the + current hard-coded `project_dir / "combined_transcripts.html"` with + `dest_dir / "combined_transcripts.html"` where `dest_dir = project_dir` + in the legacy flat case and the projected path in the new case. +- The index file (`projects_path / get_index_filename(...)`) likewise + needs a destination decision (see §Index page question). +- Filtering: when `filter_path` is set and a project's resolved path + does not start with the prefix, **skip it** (don't emit anything). + +### Renderers + +Format-agnostic: HTML, Markdown, and JSON renderers all consume the +final destination path from `converter.py`. None of them need changes +for #151. The flag is triggered via the CLI; the renderer doesn't +care whether its output lives in `~/.claude/projects//` or +`~/Obsidian//`. + +### Tests + +- New `test/test_path_projection.py` (unit) — exercises the helper + with a mix of real-corpus names from `~/.claude/projects/` plus + synthesised edge cases. +- New `test/test_obsidian_output.py` (integration) — drives the CLI + end-to-end with a tmp `--output` and asserts the directory tree + matches the expected projected shape for each flag combination. + +--- + +## Path-projection logic + +This is the load-bearing piece. Three subtleties make it more than a +mechanical inverse: + +### Subtlety 1: Claude Code's encoding is lossy + +The forward direction (real path → flat name) is documented in +`cli.py:convert_project_path_to_claude_dir`: + +- `/` → `-` +- `.` → `-` (effectively — see real-corpus samples below) +- Leading `-` is the path-root marker. + +Confirmed against `~/.claude/projects/`: + +| Real path | Encoded form | +|---|---| +| `/home/cboos/bin` | `-home-cboos-bin` | +| `/home/cboos/.claude` | `-home-cboos--claude` | +| `/home/cboos/Documents/Obsidian/Work/.git` | `-home-cboos-Documents-Obsidian-Work--git` | + +Inverting is **fundamentally ambiguous**: `-home-joe-x-y` could mean +either `/home/joe/x/y` (a four-segment path with x and y as dirs) or +`/home/joe/x-y` (a three-segment path with `x-y` as a single dir). +A naïve "dash-as-separator" inverse cannot tell them apart. + +### Subtlety 2: The cache *has* the real path; if it doesn't, peek a session + +Claude Code records the actual `cwd` in every JSONL entry. Our +SQLite cache aggregates these into `cache.ProjectCache.working_directories` +and `SessionCacheData.cwd`. `convert_project_path_to_claude_dir`'s +forward direction is irrelevant here — for the inverse, we should read +the cache as the source of truth, not parse the encoded name. + +When the cache hasn't been populated yet, **peek the first JSONL** in +the project directory: open the file, read just enough lines to find +one entry with a `cwd` field, extract it. No need for the full +`parser.py` model-validation pipeline — we want a single string field, +the entry shape is stable and well-known, a tiny `json.loads(line) +.get("cwd")` loop suffices. + +Helper signature: + +```python +def project_dir_to_real_path( + project_dir: Path, + cache_manager: Optional[CacheManager] = None, +) -> Path: + """Recover the real on-disk path for a Claude project directory. + + Strategy (in order): + 1. If a cache_manager is available and the project has cached + `working_directories`, return the first entry. Authoritative + — that's the actual `cwd` Claude Code recorded at session time. + 2. Otherwise, peek the first JSONL file: read up to N lines, + json.loads each, return the first non-empty `cwd`. Cheap + (O(few KB) read, no validation overhead). + 3. Fall back to naïve `/`-for-`-` inversion only as a last + resort (e.g. project dir has no JSONLs left — archived but + cache evicted). + + Returns: + Path representing the recovered real path. + """ +``` + +Worked examples: + +| project_dir.name | cache hit | JSONL peek | Result | +|---|---|---|---| +| `-home-joe-project-A` | `["/home/joe/project/A"]` | — | `/home/joe/project/A` | +| `-home-cboos--claude` | `["/home/cboos/.claude"]` | — | `/home/cboos/.claude` | +| `-home-joe-x-y` (cache empty) | — | `cwd: "/home/joe/x-y"` | `/home/joe/x-y` | +| `-home-joe-x-y` (cache empty) | — | `cwd: "/home/joe/x/y"` | `/home/joe/x/y` | +| `-home-joe-orphan` (no cache, no JSONLs) | — | — | `/home/joe/orphan` (naïve last-resort) | + +Filesystem-existence-testing as a fallback was considered and rejected: +the *target* path may have moved/been deleted since the session was +recorded, and we shouldn't make resolution depend on the local FS state +in a way that produces different output for the same project on +different machines. + +### Subtlety 3: Filter-path semantics + +When `filter_path` is set: + +- **Selection**: skip projects whose resolved real path does not + satisfy `Path.is_relative_to(filter_path)` (Python 3.9+). +- **Truncation**: the surviving project's destination becomes + `output_dir / resolved.relative_to(filter_path)`. + +Worked examples for `--filter-path /home/joe --output /tmp/obsidian +--expand-paths`: + +| Resolved real path | Selected? | Destination | +|---|---|---| +| `/home/joe/project/A` | yes | `/tmp/obsidian/project/A/` | +| `/home/joe/.claude` | yes | `/tmp/obsidian/.claude/` | +| `/home/joe` | yes (matches itself) | `/tmp/obsidian/` (root) | +| `/home/jane/project/B` | no | (skipped) | + +### Subtlety 4: Flag interaction matrix + +`--filter-path` operates on **whatever path representation we're using**: +expanded real paths when `--expand-paths` is set, the flat encoded +project-dir name otherwise. This keeps the filter consistent with the +"current view" of project paths and avoids the surprise of a filter +silently consulting the cache when the user thought they were just +matching dir-name prefixes. + +| --output | --expand-paths | --filter-path | Behaviour | +|---|---|---|---| +| ✗ | ✗ | ✗ | Legacy: write into `~/.claude/projects//`. | +| ✓ | ✗ | ✗ | Flat copy under `//`. (Closes the implicit gap.) | +| ✓ | ✓ | ✗ | Expanded under `//`. | +| ✓ | ✓ | ✓ | Expanded + filtered: filter against real path, truncate prefix, land under `//`. | +| ✓ | ✗ | ✓ | Filter against the flat encoded name (e.g. `--filter-path -home-joe` selects projects starting with `-home-joe-`). No prefix truncation (truncation only meaningful with `--expand-paths`). Result lands under `//`. | +| ✗ | (any) | (any) | Warn that the new flags are no-ops; proceed with legacy behaviour. | + +### Helper API + +```python +def project_dir_to_real_path( + project_dir: Path, + cache_manager: Optional[CacheManager] = None, +) -> Path: ... + +def project_destination( + project_dir: Path, + *, + output_dir: Optional[Path], + expand_paths: bool, + filter_path: Optional[str], + cache_manager: Optional[CacheManager] = None, +) -> Optional[Path]: + """Compute the per-project destination directory. + + Returns: + The destination Path, or None if the project should be skipped + (filter_path is set and the project doesn't match). + """ +``` + +Both pure (no I/O beyond reading the cache, which is read-only here); +both trivially testable with mocked CacheManager. + +--- + +## Index page question + +The current code writes the index to +`projects_path / get_index_filename(output_format)`. With `--output`, +the natural choice is `output_dir / get_index_filename(output_format)`. + +Two open questions: + +1. **Should the index even exist in Obsidian-friendly mode?** Obsidian + discovers files by walking its vault tree. A separate index page is + redundant in the common Obsidian use case. Recommendation: emit it + anyway (cheap; users can ignore or `.gitignore`-equivalent it), but + add a `--no-index` flag as a follow-up if users complain. + +2. **Where does the index live when `--filter-path` truncates the + tree?** The index naturally goes at `output_dir/`, which is *above* + the truncated tree. Recommendation: keep it at `output_dir/`. The + alternative — putting it at the deepest common ancestor of the + filtered projects — would surprise users (the path depends on which + projects matched, which depends on cache state). + +--- + +## Backwards compatibility + +- Default behaviour with no new flags is **byte-identical** to current + output (verified by snapshot tests after the change). +- Closing the `--output` gap for `--all-projects` is *not* a + behaviour change because `--output` was previously silently ignored + in that mode — users who passed it got the legacy path anyway. + Documenting this in the changelog. +- `convert_project_path_to_claude_dir` (the forward direction) is + unchanged. The new helper is the inverse and lives alongside it. + +--- + +## Tests + +### Unit (`test/test_path_projection.py`) + +- `test_project_dir_to_real_path_uses_cache_cwd` — cache populated + with explicit `cwd`; helper returns it verbatim. +- `test_project_dir_to_real_path_peeks_jsonl_when_no_cache` — + no cache, but project dir has a JSONL whose first user/assistant + entry carries `cwd`. Helper peeks, extracts, returns. Sampled + corpus shapes: + - `-home-cboos-bin` → JSONL with `cwd: "/home/cboos/bin"` → `/home/cboos/bin` + - `-home-cboos--claude` → JSONL with `cwd: "/home/cboos/.claude"` → `/home/cboos/.claude` +- `test_project_dir_to_real_path_naive_last_resort` — no cache AND + no JSONLs left (orphan archived dir); helper returns naïve + `/`-for-`-` inversion. Documented as best-effort. +- `test_project_dir_to_real_path_disambiguates_via_cache` — two + flat-encoded names that collide (both `-home-joe-x-y`) but the + cache stores different `cwd`s; helper returns the right one for + each. +- `test_project_destination_filter_match_expanded` — `--expand-paths + --filter-path /home/joe`: filter against real path, destination + is `output / relpath`. +- `test_project_destination_filter_miss_expanded` — same but real + path doesn't match prefix; helper returns None. +- `test_project_destination_filter_match_flat` — `--filter-path + -home-joe` (no expand): filter against flat name, destination is + `output / ` for matching projects. +- `test_project_destination_no_expand_no_filter` — flat copy under + `output_dir`. +- `test_project_destination_expand_no_filter` — full real-path + expansion under `output_dir`. + +### Integration (`test/test_obsidian_output.py`) + +- Mock `~/.claude/projects/` with two-three project shapes (using the + existing test_data fixtures pattern; e.g. tmp_path with a couple + `-home-fixture-*` dirs each with one JSONL). +- Drive the CLI with each flag combination from the matrix above; + assert the produced directory tree. +- Format coverage: **Markdown only** for the integration test + matrix. The flag mechanics are format-agnostic (no per-renderer + logic), so HTML/JSON parity is asserted by inspection of the + shared converter.py path rather than by re-running the matrix + per format. + +### Snapshot + +`test/test_snapshot_html.py` should not need changes — only the +output destination changes, not the rendered content. + +--- + +## Open questions for main — *resolved by user* + +1. **JSON output**: format-agnostic (mechanics live in converter.py, + not the renderers); test only the Markdown path and trust parity + for HTML/JSON by code inspection. + +2. **Filter without expand**: filter against the **unexpanded** flat + project-dir name (`-home-joe-...`), not the resolved real path. + No prefix truncation in this mode — truncation only meaningful + with `--expand-paths`. + +3. **No-cache fallback**: peek the first JSONL in the project dir, + read just enough lines to find one entry with a `cwd` field, return + it. Cheap, deterministic, no full-parse overhead. Naïve `/`-for-`-` + inversion stays as the last resort (orphan dirs with no JSONLs). + +4. **`--output ` vs ``**: simpler heuristic — if the + `--output` value ends with a recognised extension suffix + (`.html` / `.md` / `.markdown` / `.json`), treat as a file; + otherwise treat as a directory. Both `--expand-paths` and + `--filter-path` apply only in the directory case. + +5. **Python 3.10 baseline**: confirmed; `Path.is_relative_to` + (3.9+) is safe to use. + +6. **Index page location with filter**: confirmed — keep at + `output_dir/index.{html,md,json}`. Predictable, doesn't depend + on which projects matched the filter. + +--- + +## Follow-up / Open points + +### Cache-freshness checks resolve against `project_path` (source), not the output destination + +`cache.is_html_stale(html_path, ...)` and `cache.is_page_stale(...)` both compute their `actual_file` check as `self.project_path / html_path` — the **source** project dir under `~/.claude/projects/`, not the actual output destination (`dest_dir`). With the legacy in-place behaviour the two are identical, so the check works as intended. With `--output` projecting to a different tree, the source path never has a `combined_transcripts.html`, so `is_html_stale` returns "file_missing" / "stale" on every run. + +**Practical implication** — both runs of the same source against two different `--output` dirs both produce correct output (the `not output_path.exists()` term in `process_projects_hierarchy`'s `needs_work` and the per-session-file existence checks force regeneration). But every `--output` switch always re-renders, even when the destination is already up-to-date. JSONL parsing is still cache-hit ("X sessions" instead of "X files updated"), only rendering re-runs. + +``` +Run 1 (--output /tmp/A): 4.4s (8 projects updated) +Run 2 (--output /tmp/B): 2.3s (cache-hit on JSONL parse, + but rendering re-ran) +Run 3 (--output /tmp/A): ~2.3s (same — A's existing files + are not consulted) +``` + +**Future optimisation** — make the html-cache row's freshness check destination-aware (e.g. record the absolute destination path when writing, compare against it on next run). Bounded value: only matters when users alternate between several `--output` destinations on the same source. Not worth the complexity until someone hits the slowdown in practice. + +### Other follow-ups (already noted in the implementation) + +- **Archived projects with `--output`** — index links point to projected paths whose files won't exist until the user re-renders. Two plausible mitigations: exclude archived projects from the index in `--output` mode, or always link to the original on-disk location regardless of `--output` / `--expand-paths`. (Surfaced by monk; left for follow-up.) +- **`_peek_jsonl_for_cwd` debug logging** — current shape is silent on tier-2→tier-3 fallthroughs; a `logger.debug(...)` would help when someone is debugging an unexpected naive-tier hit. Zero-noise default kept. + +### User-surfaced ergonomics gaps + +#### Absolute `--filter-path` without `--expand-paths` silently excludes everything + +Symmetric inverse of the footgun monk caught (relative `--filter-path` with `--expand-paths` excludes everything via `Path.relative_to`). Reproduced empirically: + +``` +$ uv run claude-code-log -o .examples/.../ccl --all-projects \ + --filter-path /home/cboos/Workspace/github/daain \ + --detail low --compact --format md +Processed 665 projects in 1.3s + Index regenerated +$ ls .examples/.../ccl +index.md # ← no per-project output +``` + +The Q2 resolution says: without `--expand-paths`, the filter matches against the encoded flat dir name (`-home-cboos-...`). An absolute path starting with `/` matches no encoded name, so all 665 projects filter out. No error, no warning — only the index lands. + +Two fixes to consider (same shape as the existing footgun guards): + +- **(A) Reject** at click parse time when `--filter-path.startswith("/")` and `--expand-paths` is unset. Symmetric with monk's relative-filter rejection. +- **(B) Auto-imply `--expand-paths`** when `--filter-path` is absolute. Friendlier; encoded-form filtering is the niche case. + +Lean toward (B). Either is straightforward. + +#### `--filter-path` should imply `--all-projects` + +Filtering only makes sense over a set of projects — without `--all-projects` there's nothing for `--filter-path` to filter. Currently it's warned-about-and-ignored; auto-imply is friendlier. + +**Asymmetry note** (worth recording): `--expand-paths` *cannot* safely imply `--all-projects` because the flag has independent meaning in single-session / single-project mode (next item — project one artefact under `//`). Implying `--all-projects` from `--expand-paths` would silently switch from "expand this one input" to "scan ~/.claude/projects/", which is a much bigger surprise than `--filter-path` could ever be. So the auto-imply is `--filter-path` only; `--expand-paths` keeps the current behaviour matrix. + +#### `--expand-paths` for single-session / single-project mode + +Today `--expand-paths` is wired only through `process_projects_hierarchy`. Reasonable extension: when a single-session or single-project export is requested with `--output ` and `--expand-paths`, project that one artefact into `//` using the same path-projection helper. Same convention, same matrix shape — just narrower scope. + +#### `--dry-run` mode + +Show what would be generated (projected destinations, filter selections) without actually rendering or writing. Useful for sanity-checking a flag combination — especially with the path-projection logic where the destination depends on cache state and JSONL peek results. Pairs naturally with `--filter-path` + `--expand-paths` exploration. + +Implementation sketch: a top-level CLI flag that, when set, prints the per-project decision (`source -> dest` or `: filter excluded`) and exits before any file I/O. Cheap to implement on top of `project_destination()` since the helper is already pure. + +#### `--combined yes/no/only` (or `both/none/only`) — suppress combined transcripts + +For Obsidian usage, having *both* the combined `combined_transcripts.md` and the per-session `session-{id}.md` files is pointless duplication — Obsidian discovers sessions individually via the file tree, and the combined file is just dead weight that confuses graph view. The current default emits both. + +Proposed flag: `--combined yes|no|only` (or equivalent `both|none|only`): + +| Value | Combined | Per-session | Default for | +|---|---|---|---| +| `yes` / `both` | ✓ | ✓ | Current behaviour (HTML / non-Obsidian flow) | +| `no` / `none` | ✗ | ✓ | **Recommended default for `--expand-paths`** | +| `only` | ✓ | ✗ | When the user explicitly wants the rollup-only view | + +When combined is suppressed, the index page must link **directly to each `session-{id}.md`** rather than to `combined_transcripts*.md`. The `html_file` field in `project_summaries` would become a list of session links instead of one combined link. + +#### Markdown index: bullet-list directory hierarchy under `--expand-paths` + +In Markdown + `--expand-paths` mode, the natural index shape is a nested bullet list mirroring the directory tree: + +```markdown +- home/joe + - project/A + - [session-aabbccdd](home/joe/project/A/session-aabbccdd.md) — 2026-03-21 *14 messages* + - [session-eeff0011](home/joe/project/A/session-eeff0011.md) — 2026-03-22 *9 messages* + - project/B + - [session-22334455](home/joe/project/B/session-22334455.md) — 2026-03-23 *31 messages* +- home/jane + - project/C + - [session-66778899](home/jane/project/C/session-66778899.md) — 2026-03-20 *5 messages* +``` + +Each directory appears as a parent bullet with its sessions (or sub-dirs) as nested children. Walks the same path-projection tree the file system was projected into, but at the index level. Renders nicely in Obsidian's preview AND in plain Markdown viewers. Especially good when combined with the no-combined-transcripts mode (above), since each leaf bullet then directly points to the session file the user wants to open. + +#### **CRITICAL**: Markdown renderer must emit per-message timestamps + +This is "absolutely need" tier, not a nice-to-have — it's what enables a cross-session narrative / episodic-memory layer in Obsidian. Without per-message timestamps in the Markdown output, the user can't reconstruct *when* something happened, which kills the whole "transcript as Obsidian note" workflow. + +**Current Markdown output (with `--compact`):** + +```markdown +## 🤷 User: *Nice! Please commit and reply to bob that…* + +Nice! Please commit and reply to bob that you did it. + +### 🤖 Assistant: *Done! I've:* + +> Done! I've: +> +> 1. **Committed** the WebFetch tool renderer implementation (commit `da363b8`) … +> 2. **Replied** to bob (mail #250) … + +> No response requested. +``` + +**Required:** + +```markdown +## 🤷 User: *Nice! Please commit and reply to bob that…* +*2026-03-21 18:40:44* + +Nice! Please commit and reply to bob that you did it. + +### 🤖 Assistant: *Done! I've:* +*2026-03-21 18:44:22* + +> Done! I've: +> +> 1. **Committed** the WebFetch tool renderer implementation (commit `da363b8`) … +> 2. **Replied** to bob (mail #250) … + +> No response requested. +``` + +One italics line per message, immediately after the heading. Format: `*YYYY-MM-DD HH:MM:SS*` (matches the existing HTML timestamp rendering at the message level). + +The HTML renderer already emits timestamps; this is purely a Markdown-side omission to fix. Should be a small change in `claude_code_log/markdown/renderer.py` at the per-message header emission point. + +Considered out of scope for #151 (the path-projection PR), but should land **before** anyone seriously uses the Obsidian-friendly output for narrative work. Worth its own issue. + +--- + +## Out of scope (mention for completeness) + +- Obsidian-specific frontmatter (YAML at top of each `.md` for tags / + links). Could be a follow-up `--obsidian-frontmatter` flag; not + part of #151's bullet list. +- Wikilink generation (`[[…]]`) for cross-references between + sessions. Same — follow-up. +- Symlink-based projection (write once, link from many places). The + current write-then-copy model is fine for Obsidian; symlinks would + complicate cache invalidation.