From 26e1cf93dd944053a2649ebd0829b73717a1ab25 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 21 Apr 2026 17:19:48 +0800 Subject: [PATCH 1/3] docs: add RFC documentation for router layer and roadmap - Add comprehensive roadmap document outlining project phases A1-A6 focusing on structured document retrieval capabilities - Include detailed RFC for document router layer addressing large-scale retrieval challenges with 1000+ document workspaces - Define routing strategy combining BM25, keyword overlap, and graph boosting for coarse filtering before orchestrator analysis - Specify module structure and integration points for router implementation in Rust backend - Add RFC category to documentation sidebar for improved navigation --- docs/docs/rfc/roadmap.md | 248 +++++++++++++++++++++++++++++++++++++++ docs/docs/rfc/router.md | 173 +++++++++++++++++++++++++++ docs/sidebars.ts | 8 ++ 3 files changed, 429 insertions(+) create mode 100644 docs/docs/rfc/roadmap.md create mode 100644 docs/docs/rfc/router.md diff --git a/docs/docs/rfc/roadmap.md b/docs/docs/rfc/roadmap.md new file mode 100644 index 0000000..415e04a --- /dev/null +++ b/docs/docs/rfc/roadmap.md @@ -0,0 +1,248 @@ +--- +sidebar_position: 2 +--- + +# Roadmap: The Ultimate Document Retrieval Tool + +> Focus: structured document retrieval — precise, reliable, indispensable. +> The "jq of document retrieval". + +## Scope + +Focus on the document retrieval vertical — no code retrieval, no general knowledge platform. Build a complete Python developer experience layer on top of the Rust core engine, with broader format support and finer-grained parsing. + +## Phase Overview + +| Phase | Focus | Language | +|-------|-------|----------| +| A1 | Router Layer — support 1000+ document workspaces | Rust | +| A2 | Document Formats — HTML, DOCX, LaTeX | Rust | +| A3 | Parsing Precision — tables, figures, footnotes | Rust | +| A4 | Python Ecosystem — CLI, Pythonic API, framework integration | Python | +| A5 | Domain Optimization — legal, financial, technical documents | Rust | +| A6 | Performance & Reliability — lazy loading, caching, concurrency | Rust | + +Dependencies: + +``` +A1 (Router) ────→ A6 (Lazy Loading) ────→ A2 (Formats) + ↓ + A3 (Precision) + ↓ +A4 (Python, can run in parallel) + ↓ + A5 (Domain) +``` + +--- + +## A1: Router Layer + +**Goal:** Support retrieval across 1000+ document workspaces. + +Full design: [RFC: Document Router](./router.md) + +Key ideas: + +- Insert a Router between `Engine.query()` and the Orchestrator +- Use compile-stage artifacts (DocCard + ReasoningIndex + DocumentGraph) for coarse filtering +- BM25 + keyword overlap + graph boost — three-signal scoring fusion +- Optional LLM-assisted routing (LLM ranks top-M candidates when scores are ambiguous) +- Only activates when document count exceeds a configurable threshold + +Module structure: + +``` +rust/src/router/ +├── mod.rs # DocumentRouter, RouteResult, ScoredCandidate +├── scorer.rs # BM25 + keyword + graph fusion scoring +└── config.rs # RouterConfig, RouteMode +``` + +Estimated: ~600 lines Rust, no new dependencies. + +--- + +## A2: Document Format Support + +**Goal:** Support HTML, DOCX, LaTeX in addition to PDF and Markdown. + +### HTML Parsing + +``` +HTML DOM → hierarchical tree structure +

→ depth-mapped nodes +

,

  • , → content nodes + → special handling (text + structure) + ,
     → preserve formatting
    +```
    +
    +Challenge: HTML documents often have deep nesting (`div > div > div`) that doesn't represent semantic structure. Need heuristics to skip decorative containers.
    +
    +### DOCX Parsing
    +
    +```
    +DOCX = ZIP archive
    +  word/document.xml → paragraph extraction
    +   → heading level
    +   → paragraph content
    +  Style inheritance → heading/body classification
    +```
    +
    +### LaTeX Parsing
    +
    +```
    +Regex-based extraction:
    +  \section{...} → depth-0 node
    +  \subsection{...} → depth-1 node
    +  \begin{...} environments → content blocks
    +```
    +
    +### Tasks
    +
    +| # | Task | File |
    +|---|------|------|
    +| 1 | HTML parser | `rust/src/index/parse/html.rs` |
    +| 2 | DOCX parser | `rust/src/index/parse/docx.rs` |
    +| 3 | LaTeX parser | `rust/src/index/parse/latex.rs` |
    +| 4 | Format detection | extend `detect_format_from_path()` |
    +| 5 | IndexMode extension | `rust/src/index/pipeline.rs` |
    +
    +New dependencies: `scraper = "0.22"`, `zip = "2"`
    +
    +Estimated: ~800 lines Rust.
    +
    +---
    +
    +## A3: Parsing Precision
    +
    +**Goal:** Fine-grained extraction of tables, figures, and footnotes.
    +
    +### Current Limitations
    +
    +`pdf-extract` produces flat text. Tables lose structure, figures are invisible, footnotes mix into body text.
    +
    +### Table Extraction (PDF)
    +
    +Use `lopdf` low-level access to detect text blocks with (x, y) coordinates, group by row and column, output as Markdown table strings. Insert as dedicated TreeNodes with `{type: "table"}` metadata.
    +
    +### Figure Description (PDF)
    +
    +Extract image streams via `lopdf`, send to LLM (vision-capable model), insert description as TreeNode with `{type: "figure"}` metadata. The only new LLM call in indexing — justified because figures often contain critical information invisible to text extraction.
    +
    +### Cross-Reference Resolution
    +
    +Resolve "see Section 3.2", "refer to Figure 4", "as noted in Table 2" to target TreeNodes. Enhances NavigationIndex with cross-reference edges for Worker navigation.
    +
    +### Tasks
    +
    +| # | Task | File |
    +|---|------|------|
    +| 1 | PDF table extraction | `rust/src/index/parse/pdf_table.rs` |
    +| 2 | PDF figure description | `rust/src/index/parse/pdf_figure.rs` |
    +| 3 | PDF footnote handling | `rust/src/index/parse/pdf_footnote.rs` |
    +| 4 | Markdown table parsing | `rust/src/index/parse/md_table.rs` |
    +| 5 | Cross-reference resolution | extend `rust/src/document/reference.rs` |
    +
    +New dependency: `image = "0.25"`
    +
    +Estimated: ~1000 lines Rust.
    +
    +---
    +
    +## A4: Python Ecosystem
    +
    +**Goal:** Complete Python developer experience.
    +
    +See the [Python ecosystem expansion plan](https://github.com/vectorlessflow/vectorless/blob/main/.claude/plans/shimmying-tumbling-hare.md) for full details.
    +
    +| Phase | Content | Deliverable |
    +|-------|---------|-------------|
    +| 1 | CLI | `vectorless init/add/query/list/remove/ask/tree/stats/config` |
    +| 2 | Pythonic API | `errors.py`, `_engine.py`, `_query.py`, type stubs |
    +| 3 | High-level abstractions | `BatchIndexer`, `DocumentWatcher` |
    +| 4 | Framework integration | LangChain `BaseRetriever`, LlamaIndex adapter |
    +| 5 | Testing | Unit → Mock → E2E |
    +
    +A4 runs in parallel with A1–A3 — the Python layer doesn't depend on new Rust features.
    +
    +---
    +
    +## A5: Domain Optimization
    +
    +**Goal:** Domain-specific optimizations for legal, financial, and technical documents.
    +
    +### Domain Template System
    +
    +```rust
    +pub trait DomainTemplate: Send + Sync {
    +    fn name(&self) -> &str;
    +    fn detect(&self, tree: &DocumentTree, card: &DocCard) -> bool;
    +    fn enhance(&self, tree: &mut DocumentTree, card: &mut DocCard);
    +    fn domain_tags(&self, tree: &DocumentTree) -> Vec;
    +}
    +```
    +
    +| Domain | Optimizations |
    +|--------|--------------|
    +| **Legal** | Contract clause identification, article reference resolution, defined term tracking |
    +| **Financial** | KPI extraction from tables, reporting period detection, currency normalization |
    +| **Technical** | Code block extraction with language tags, API endpoint identification, version-aware sectioning |
    +
    +Templates hook into the compile pipeline after the Enhance stage.
    +
    +Estimated: ~500 lines Rust (framework + 2–3 built-in templates).
    +
    +---
    +
    +## A6: Performance & Reliability
    +
    +**Goal:** Optimize memory, latency, and observability.
    +
    +### Lazy Document Loading
    +
    +Defer tree loading until Worker dispatch. Router + Orchestrator.analyze only need DocCards (lightweight). Each DocumentTree is 10–100x larger than its DocCard.
    +
    +### Caching
    +
    +- **Router cache**: Cache routing results keyed by `(query_hash, doc_ids_hash)`. Invalidate on document add/remove.
    +- **Query cache**: Same query + same documents = cached result. Useful for interactive mode.
    +
    +### Subtree-Level Incremental Updates
    +
    +Current incremental update detects file-level changes. Refine to diff affected subtrees and only re-compile changed portions. Can reduce re-indexing LLM calls by 50–80%.
    +
    +### Metrics
    +
    +| Metric | Source | Use Case |
    +|--------|--------|----------|
    +| Router latency | `router.route()` | Monitor routing overhead |
    +| Router cache hit rate | Router cache | Tune cache size |
    +| Lazy load count | Worker dispatch | Verify memory savings |
    +
    +---
    +
    +## Success Metrics
    +
    +| Metric | Current | Target |
    +|--------|---------|--------|
    +| Max practical workspace size | ~100 docs | 10,000+ docs |
    +| Index time per doc (PDF, 50 pages) | ~30s | ~20s |
    +| Query latency (100 docs) | ~10s | ~8s |
    +| Query latency (1000 docs) | N/A | ~12s |
    +| Python install-to-query | Manual setup | < 5 minutes |
    +| Format support | PDF, Markdown | + HTML, DOCX, LaTeX |
    +
    +---
    +
    +## Execution Priority
    +
    +```
    +Sprint 1: A1 (Router) + A4 Phase 1 (CLI)
    +Sprint 2: A6 (Lazy Loading) + A4 Phase 2 (Pythonic API)
    +Sprint 3: A2 (HTML, DOCX, LaTeX)
    +Sprint 4: A3 (Table, Figure, Footnote)
    +Sprint 5: A5 (Domain Templates) + A4 Phase 4 (Framework Integration)
    +```
    +
    +A1 is the most critical enabler — without it, large-scale scenarios are not viable. A4 (Python) runs in parallel throughout.
    diff --git a/docs/docs/rfc/router.md b/docs/docs/rfc/router.md
    new file mode 100644
    index 0000000..da9bb98
    --- /dev/null
    +++ b/docs/docs/rfc/router.md
    @@ -0,0 +1,173 @@
    +---
    +sidebar_position: 1
    +---
    +
    +# RFC: Document Router Layer for Large-Scale Retrieval
    +
    +## Summary
    +
    +Vectorless currently sends all workspace documents to the Orchestrator's analysis phase, which uses a single LLM call to read every DocCard and select relevant ones. This works well for small workspaces (1–100 documents) but breaks down at scale — 1000+ DocCards exceed token budgets, increase latency, and make LLM selection unreliable.
    +
    +This RFC proposes a **Router layer** that sits between `Engine.query()` and the Orchestrator, using compile-stage artifacts to pre-filter documents before dispatching Workers.
    +
    +## Problem
    +
    +```
    +Current flow (workspace scope):
    +
    +Engine.query(workspace)
    +  → resolve_scope() → all doc_ids (potentially 1000+)
    +  → load_documents() → load ALL (tree + nav_index + reasoning_index)
    +  → Orchestrator.analyze()
    +    → LLM reads ALL DocCards in one prompt  ← bottleneck
    +    → selects docs → dispatch Workers
    +```
    +
    +Issues at 1000+ documents:
    +
    +- **Token budget**: 1000 DocCards × ~200 tokens = ~200K tokens — exceeds context windows
    +- **Cost**: One massive LLM call just for document selection
    +- **Latency**: Loading all document trees upfront is wasteful when only a fraction will be used
    +- **Quality**: LLM selection degrades when presented with too many options
    +
    +## Proposed Solution
    +
    +Insert a **Router** that uses compile-stage artifacts (DocCard, ReasoningIndex, DocumentGraph) for coarse filtering, narrowing candidates to a manageable set before the Orchestrator's LLM-based analysis.
    +
    +```
    +Proposed flow:
    +
    +Engine.query(workspace)
    +  → resolve_scope() → all doc_ids
    +  → Router::route(query, doc_ids) → top-K candidates (10–20)
    +  → load_documents(top_K) → only K documents
    +  → Orchestrator.run() (reduced candidate set)
    +    → analyze: LLM reads K DocCards → precise selection
    +    → dispatch Workers → navigate trees
    +```
    +
    +The Router does **not** replace the Orchestrator's analysis. It narrows the input so the LLM can make a better-informed, cheaper decision.
    +
    +## Data Sources
    +
    +The Router leverages artifacts already produced by the compile pipeline — **no additional LLM calls at index time**.
    +
    +| Artifact | Fields Used by Router | Produced By |
    +|----------|----------------------|-------------|
    +| **DocCard** | `title`, `overview`, `topic_tags`, `question_hints`, `sections` | NavigationIndexStage |
    +| **ReasoningIndex** | `topic_paths` (keyword → node mappings) | ReasoningIndexStage |
    +| **DocumentGraph** | cross-document edges, shared keywords | DocumentGraphBuilder |
    +
    +These are lightweight (no tree content), fast to load, and already persisted in the workspace.
    +
    +## Scoring Strategy
    +
    +The Router combines three scoring signals:
    +
    +### 1. BM25 on DocCard text (lexical match)
    +
    +Build a BM25 index over each document's DocCard searchable text:
    +
    +```
    +searchable_text = f"{title} {overview} {question_hints} {topic_tags} {section_descriptions}"
    +```
    +
    +The existing `Bm25Engine` in `rust/src/scoring/bm25.rs` supports per-field weighting. For Router use, we weight title and topic_tags higher than section descriptions.
    +
    +### 2. Keyword overlap (concept match)
    +
    +Use `QueryPlan.key_concepts` (from query understanding) to match against each document's:
    +
    +- `DocCard.topic_tags`
    +- `ReasoningIndex.topic_paths` keys
    +
    +Score = Jaccard similarity between query concepts and document keywords.
    +
    +### 3. Graph-based boost (contextual relevance)
    +
    +If the DocumentGraph is available, documents connected to high-scoring candidates receive a boost. This captures the intuition that related documents may be co-relevant.
    +
    +### Fusion
    +
    +```
    +final_score = w_bm25 * normalize(bm25_score)
    +            + w_keyword * keyword_overlap
    +            + w_graph * graph_boost
    +```
    +
    +Default weights: `w_bm25 = 0.5`, `w_keyword = 0.3`, `w_graph = 0.2`
    +
    +### LLM-assisted routing (optional)
    +
    +When BM25 + keyword scores are ambiguous (e.g., top candidates have similar scores), the Router can optionally invoke the LLM to rank the top-M candidates. This is a lightweight call — the LLM only sees K DocCard summaries (not full trees), making it orders of magnitude cheaper than the current all-DocCards approach.
    +
    +```
    +RouteMode::Fast     → BM25 + keyword + graph (no LLM)
    +RouteMode::Balanced → BM25 + keyword + graph, then LLM top-M if ambiguous
    +RouteMode::Precise  → BM25 + keyword + graph + LLM top-M always
    +```
    +
    +## Activation Threshold
    +
    +The Router is only activated when the workspace exceeds a configurable document count:
    +
    +```rust
    +RouterConfig {
    +    activate_threshold: 20,   // only route when docs > 20
    +    max_candidates: 15,       // top-K to pass to Orchestrator
    +    bm25_top_k: 50,           // BM25 initial retrieval size
    +    mode: RouteMode::Fast,    // default: no LLM in Router
    +}
    +```
    +
    +Below the threshold, the current flow (all DocCards → Orchestrator.analyze) is used unchanged.
    +
    +## Incremental Index Maintenance
    +
    +The Router's BM25 index is updated incrementally alongside document indexing:
    +
    +- **Document indexed**: Extract DocCard → `router.upsert(doc_id, card, keywords)`
    +- **Document removed**: `router.remove(doc_id)`
    +- **Graph rebuilt**: `router.update_graph(new_graph)`
    +
    +No full re-index needed — the Router stays in sync with the workspace.
    +
    +## Lazy Document Loading (future optimization)
    +
    +Currently `load_documents()` loads full `DocumentTree + NavigationIndex + ReasoningIndex` for all candidates. With the Router, we can defer tree loading until Worker dispatch:
    +
    +1. Router + Orchestrator.analyze: only need DocCards (lightweight)
    +2. Orchestrator.dispatch: load DocumentTree per-Worker, on demand
    +
    +This reduces memory pressure when the Router selects 15 candidates from 1000 documents but the Orchestrator only dispatches 5 Workers.
    +
    +## Module Structure
    +
    +```
    +rust/src/router/
    +├── mod.rs           # DocumentRouter, RouteResult, ScoredCandidate
    +├── scorer.rs        # BM25 + keyword + graph fusion scoring
    +└── config.rs        # RouterConfig, RouteMode
    +```
    +
    +Integration points:
    +
    +- `rust/src/client/engine.rs` — insert `Router::route()` in `query()`
    +- `rust/src/config/mod.rs` — add `router: RouterConfig`
    +- `python/src/lib.rs` — expose RouterConfig to Python SDK
    +
    +## What This Is Not
    +
    +- Not a replacement for the Orchestrator — Router is coarse filter, Orchestrator is precise selector
    +- Not an embedding layer — uses BM25 + keywords + graph, no vector similarity required
    +- Not a "vector backtrack" — this is a pragmatic engineering layer that happens to use lexical matching
    +
    +## Open Questions
    +
    +1. **Score calibration**: How to normalize BM25 scores across workspaces with different corpus sizes? Min-max normalization may not work well with very few documents. Consider quantile-based normalization.
    +
    +2. **Cold start**: New documents have no graph edges and no hot-node history. Should new docs get a freshness boost?
    +
    +3. **Multi-hop routing**: Should the Router consider re-routing after the first Orchestrator iteration finds nothing? Or is one-shot routing sufficient given the supervisor loop can replan?
    +
    +4. **Thread safety**: The Router holds a mutable BM25 index. Need to decide between `RwLock` or rebuild-on-query from workspace data.
    diff --git a/docs/sidebars.ts b/docs/sidebars.ts
    index 2f70bed..4b5877c 100644
    --- a/docs/sidebars.ts
    +++ b/docs/sidebars.ts
    @@ -42,6 +42,14 @@ const sidebars: SidebarsConfig = {
             'sdk/rust',
           ],
         },
    +    {
    +      type: 'category',
    +      label: 'RFC',
    +      items: [
    +        'rfc/router',
    +        'rfc/roadmap',
    +      ],
    +    },
         'api-reference',
         {
           type: 'category',
    
    From 0f6b01196aff517f2813d0d48d56f6a0c5ac76e1 Mon Sep 17 00:00:00 2001
    From: zTgx <747674262@qq.com>
    Date: Tue, 21 Apr 2026 17:52:56 +0800
    Subject: [PATCH 2/3] feat: add framework integrations and improve CLI with
     Session API
    
    - Add LangChain and LlamaIndex compatibility modules with retriever
      implementations
    - Introduce high-level Session API as primary interface replacing Engine
    - Add comprehensive test suite for configuration, events, and type
      wrappers with shared fixtures
    - Implement batch indexing support in CLI with directory traversal
    - Add interactive REPL with document targeting, stats tracking,
      and navigation commands
    - Update pyproject.toml with new dependencies and optional extras
      for different integration frameworks
    - Refactor CLI commands to use Session-based workflow instead of
      low-level Engine operations
    ---
     pyproject.toml                               |  26 +-
     python/tests/__init__.py                     |   0
     python/tests/conftest.py                     |  90 +++++
     python/tests/test_cli/__init__.py            |   0
     python/tests/test_compat/__init__.py         |   0
     python/tests/test_config.py                  |  95 ++++++
     python/tests/test_events.py                  |  78 +++++
     python/tests/test_session.py                 |  55 +++
     python/tests/test_types.py                   | 145 ++++++++
     python/vectorless/__init__.py                |  88 ++---
     python/vectorless/_compat/__init__.py        |  35 ++
     python/vectorless/_compat/langchain.py       | 106 ++++++
     python/vectorless/_compat/llamaindex.py      |  87 +++++
     python/vectorless/_core.py                   |  52 +++
     python/vectorless/cli/commands/add.py        | 113 ++++++-
     python/vectorless/cli/commands/ask.py        | 176 +++++++++-
     python/vectorless/cli/commands/config_cmd.py | 172 ++++++++--
     python/vectorless/cli/commands/info.py       |  71 +++-
     python/vectorless/cli/commands/init.py       |   9 +-
     python/vectorless/cli/commands/list_cmd.py   |  49 ++-
     python/vectorless/cli/commands/query.py      |  52 ++-
     python/vectorless/cli/commands/remove.py     |  35 +-
     python/vectorless/cli/commands/stats.py      | 118 ++++++-
     python/vectorless/cli/commands/tree.py       | 107 +++++-
     python/vectorless/cli/main.py                |  56 +++-
     python/vectorless/cli/output.py              | 198 +++++++++--
     python/vectorless/config/__init__.py         |  25 ++
     python/vectorless/config/loading.py          | 172 ++++++++++
     python/vectorless/config/models.py           |  89 +++++
     python/vectorless/events.py                  | 118 +++++++
     python/vectorless/jupyter.py                 | 140 ++++++++
     python/vectorless/py.typed                   |   0
     python/vectorless/session.py                 | 336 +++++++++++++++++++
     python/vectorless/streaming.py               |  77 +++++
     python/vectorless/types/__init__.py          |  37 ++
     python/vectorless/types/graph.py             | 107 ++++++
     python/vectorless/types/results.py           | 237 +++++++++++++
     37 files changed, 3214 insertions(+), 137 deletions(-)
     create mode 100644 python/tests/__init__.py
     create mode 100644 python/tests/conftest.py
     create mode 100644 python/tests/test_cli/__init__.py
     create mode 100644 python/tests/test_compat/__init__.py
     create mode 100644 python/tests/test_config.py
     create mode 100644 python/tests/test_events.py
     create mode 100644 python/tests/test_session.py
     create mode 100644 python/tests/test_types.py
     create mode 100644 python/vectorless/_compat/__init__.py
     create mode 100644 python/vectorless/_compat/langchain.py
     create mode 100644 python/vectorless/_compat/llamaindex.py
     create mode 100644 python/vectorless/_core.py
     create mode 100644 python/vectorless/config/__init__.py
     create mode 100644 python/vectorless/config/loading.py
     create mode 100644 python/vectorless/config/models.py
     create mode 100644 python/vectorless/events.py
     create mode 100644 python/vectorless/jupyter.py
     create mode 100644 python/vectorless/py.typed
     create mode 100644 python/vectorless/session.py
     create mode 100644 python/vectorless/streaming.py
     create mode 100644 python/vectorless/types/__init__.py
     create mode 100644 python/vectorless/types/graph.py
     create mode 100644 python/vectorless/types/results.py
    
    diff --git a/pyproject.toml b/pyproject.toml
    index e52e370..ac8ae45 100644
    --- a/pyproject.toml
    +++ b/pyproject.toml
    @@ -28,14 +28,38 @@ classifiers = [
     ]
     keywords = ["rag", "document", "retrieval", "llm", "document-intelligence"]
     
    -dependencies = []
    +dependencies = [
    +    "pydantic>=2.0",
    +    "click>=8.0",
    +    "tomli>=2.0; python_version < '3.11'",
    +]
     
     [project.optional-dependencies]
     dev = [
         "pytest>=7.0",
         "pytest-asyncio>=0.21",
         "mypy>=1.0",
    +    "rich>=13.0",
    +]
    +cli = [
    +    "rich>=13.0",
    +]
    +langchain = [
    +    "langchain-core>=0.1.0",
    +]
    +llamaindex = [
    +    "llama-index-core>=0.10.0",
     ]
    +jupyter = [
    +    "rich>=13.0",
    +    "ipywidgets>=8.0",
    +]
    +all = [
    +    "vectorless[cli,langchain,llamaindex,jupyter]",
    +]
    +
    +[project.scripts]
    +vectorless = "vectorless.cli.main:app"
     
     [project.urls]
     Homepage = "https://vectorless.dev"
    diff --git a/python/tests/__init__.py b/python/tests/__init__.py
    new file mode 100644
    index 0000000..e69de29
    diff --git a/python/tests/conftest.py b/python/tests/conftest.py
    new file mode 100644
    index 0000000..3217bcf
    --- /dev/null
    +++ b/python/tests/conftest.py
    @@ -0,0 +1,90 @@
    +"""Shared test fixtures."""
    +
    +from __future__ import annotations
    +
    +import pytest
    +from unittest.mock import AsyncMock, MagicMock
    +
    +
    +@pytest.fixture
    +def mock_engine():
    +    """Mock Rust Engine for testing without LLM."""
    +    engine = MagicMock()
    +
    +    # Mock index result
    +    index_result = MagicMock()
    +    index_result.doc_id = "test-doc-id"
    +    index_item = MagicMock()
    +    index_item.doc_id = "test-doc-id"
    +    index_item.name = "test.md"
    +    index_item.format = "markdown"
    +    index_item.description = None
    +    index_item.source_path = "/path/to/test.md"
    +    index_item.page_count = None
    +    index_item.metrics = None
    +    index_result.items = [index_item]
    +    index_result.failed = []
    +    index_result.has_failures.return_value = False
    +    index_result.total.return_value = 1
    +    index_result.__len__ = lambda self: 1
    +
    +    engine.index = AsyncMock(return_value=index_result)
    +
    +    # Mock query result
    +    query_item = MagicMock()
    +    query_item.doc_id = "test-doc-id"
    +    query_item.content = "Test answer content"
    +    query_item.score = 0.85
    +    query_item.confidence = 0.85
    +    query_item.node_ids = ["node-1"]
    +    query_item.evidence = []
    +    query_item.metrics = None
    +
    +    query_result = MagicMock()
    +    query_result.items = [query_item]
    +    query_result.failed = []
    +    query_result.single.return_value = query_item
    +    query_result.has_failures.return_value = False
    +    query_result.__len__ = lambda self: 1
    +
    +    engine.query = AsyncMock(return_value=query_result)
    +
    +    # Mock list
    +    doc_info = MagicMock()
    +    doc_info.id = "test-doc-id"
    +    doc_info.name = "test.md"
    +    doc_info.format = "markdown"
    +    doc_info.description = None
    +    doc_info.source_path = "/path/to/test.md"
    +    doc_info.page_count = None
    +    doc_info.line_count = 42
    +    engine.list = AsyncMock(return_value=[doc_info])
    +
    +    # Mock other operations
    +    engine.remove = AsyncMock(return_value=True)
    +    engine.clear = AsyncMock(return_value=1)
    +    engine.exists = AsyncMock(return_value=True)
    +
    +    # Mock graph
    +    engine.get_graph = AsyncMock(return_value=None)
    +
    +    # Mock metrics
    +    metrics_report = MagicMock()
    +    metrics_report.total_cost_usd.return_value = 0.001
    +    engine.metrics_report.return_value = metrics_report
    +
    +    return engine
    +
    +
    +@pytest.fixture
    +def sample_config_dict():
    +    """Sample configuration dict."""
    +    return {
    +        "llm": {
    +            "model": "gpt-4o",
    +            "api_key": "sk-test-key",
    +            "endpoint": "https://api.openai.com/v1",
    +        },
    +        "retrieval": {"top_k": 5},
    +        "storage": {"workspace_dir": "/tmp/test-vectorless"},
    +    }
    diff --git a/python/tests/test_cli/__init__.py b/python/tests/test_cli/__init__.py
    new file mode 100644
    index 0000000..e69de29
    diff --git a/python/tests/test_compat/__init__.py b/python/tests/test_compat/__init__.py
    new file mode 100644
    index 0000000..e69de29
    diff --git a/python/tests/test_config.py b/python/tests/test_config.py
    new file mode 100644
    index 0000000..673cdff
    --- /dev/null
    +++ b/python/tests/test_config.py
    @@ -0,0 +1,95 @@
    +"""Tests for configuration models and loading."""
    +
    +from __future__ import annotations
    +
    +import os
    +import tempfile
    +from pathlib import Path
    +
    +import pytest
    +
    +from vectorless.config.models import (
    +    EngineConfig,
    +    LlmConfig,
    +    MetricsConfig,
    +    RetrievalConfig,
    +    StorageConfig,
    +)
    +
    +
    +class TestEngineConfig:
    +    def test_defaults(self):
    +        config = EngineConfig()
    +        assert config.llm.model == ""
    +        assert config.llm.api_key is None
    +        assert config.retrieval.top_k == 3
    +        assert config.storage.workspace_dir == "~/.vectorless"
    +        assert config.metrics.enabled is True
    +
    +    def test_custom_values(self):
    +        config = EngineConfig(
    +            llm=LlmConfig(model="gpt-4o", api_key="sk-test"),
    +            retrieval=RetrievalConfig(top_k=10),
    +            storage=StorageConfig(workspace_dir="/data/vl"),
    +        )
    +        assert config.llm.model == "gpt-4o"
    +        assert config.llm.api_key == "sk-test"
    +        assert config.retrieval.top_k == 10
    +        assert config.storage.workspace_dir == "/data/vl"
    +
    +    def test_to_rust_config(self):
    +        config = EngineConfig(
    +            llm=LlmConfig(model="gpt-4o", api_key="sk-test"),
    +            retrieval=RetrievalConfig(top_k=5, max_iterations=20),
    +            storage=StorageConfig(workspace_dir="/tmp/vl"),
    +            metrics=MetricsConfig(enabled=False),
    +        )
    +        # to_rust_config should not raise
    +        rust_config = config.to_rust_config()
    +        assert rust_config is not None
    +
    +    def test_validation_top_k_minimum(self):
    +        with pytest.raises(Exception):
    +            RetrievalConfig(top_k=0)
    +
    +    def test_json_roundtrip(self):
    +        config = EngineConfig(
    +            llm=LlmConfig(model="gpt-4o", api_key="sk-test"),
    +        )
    +        data = config.model_dump()
    +        restored = EngineConfig(**data)
    +        assert restored.llm.model == "gpt-4o"
    +        assert restored.llm.api_key == "sk-test"
    +
    +
    +class TestConfigLoading:
    +    def test_load_from_env(self):
    +        os.environ["VECTORLESS_API_KEY"] = "sk-env-test"
    +        os.environ["VECTORLESS_MODEL"] = "gpt-4o-mini"
    +        os.environ["VECTORLESS_TOP_K"] = "7"
    +
    +        try:
    +            from vectorless.config.loading import load_config_from_env
    +
    +            config = load_config_from_env()
    +            assert config.llm.api_key == "sk-env-test"
    +            assert config.llm.model == "gpt-4o-mini"
    +            assert config.retrieval.top_k == 7
    +        finally:
    +            del os.environ["VECTORLESS_API_KEY"]
    +            del os.environ["VECTORLESS_MODEL"]
    +            del os.environ["VECTORLESS_TOP_K"]
    +
    +    def test_load_from_file(self):
    +        with tempfile.NamedTemporaryFile(mode="wb", suffix=".toml", delete=False) as f:
    +            f.write(b'[llm]\nmodel = "gpt-4o"\napi_key = "sk-file"\n')
    +            f.flush()
    +
    +            try:
    +                from vectorless.config.loading import load_config_from_file
    +
    +                config = load_config_from_file(Path(f.name))
    +                assert config.llm.model == "gpt-4o"
    +                assert config.llm.api_key == "sk-file"
    +            finally:
    +                os.unlink(f.name)
    diff --git a/python/tests/test_events.py b/python/tests/test_events.py
    new file mode 100644
    index 0000000..4bd34e4
    --- /dev/null
    +++ b/python/tests/test_events.py
    @@ -0,0 +1,78 @@
    +"""Tests for the event system."""
    +
    +from __future__ import annotations
    +
    +from vectorless.events import (
    +    EventEmitter,
    +    IndexEventData,
    +    IndexEventType,
    +    QueryEventData,
    +    QueryEventType,
    +)
    +
    +
    +class TestEventEmitter:
    +    def test_index_events(self):
    +        received = []
    +        emitter = EventEmitter()
    +
    +        @emitter.on_index
    +        def handler(event):
    +            received.append(event)
    +
    +        event = IndexEventData(
    +            event_type=IndexEventType.STARTED,
    +            path="/test/doc.pdf",
    +        )
    +        emitter.emit_index(event)
    +
    +        assert len(received) == 1
    +        assert received[0].path == "/test/doc.pdf"
    +        assert received[0].event_type == IndexEventType.STARTED
    +
    +    def test_query_events(self):
    +        received = []
    +        emitter = EventEmitter()
    +
    +        @emitter.on_query
    +        def handler(event):
    +            received.append(event)
    +
    +        event = QueryEventData(
    +            event_type=QueryEventType.COMPLETE,
    +            query="What is revenue?",
    +            total_results=3,
    +        )
    +        emitter.emit_query(event)
    +
    +        assert len(received) == 1
    +        assert received[0].query == "What is revenue?"
    +        assert received[0].total_results == 3
    +
    +    def test_multiple_handlers(self):
    +        count = [0]
    +        emitter = EventEmitter()
    +
    +        emitter.on_index(lambda e: count.__setitem__(0, count[0] + 1))
    +        emitter.on_index(lambda e: count.__setitem__(0, count[0] + 1))
    +
    +        emitter.emit_index(
    +            IndexEventData(event_type=IndexEventType.COMPLETE)
    +        )
    +
    +        assert count[0] == 2
    +
    +    def test_chaining(self):
    +        emitter = EventEmitter()
    +        result = emitter.on_index(lambda e: None)
    +        assert result is emitter
    +
    +    def test_no_handlers(self):
    +        emitter = EventEmitter()
    +        # Should not raise
    +        emitter.emit_index(
    +            IndexEventData(event_type=IndexEventType.COMPLETE)
    +        )
    +        emitter.emit_query(
    +            QueryEventData(event_type=QueryEventType.COMPLETE)
    +        )
    diff --git a/python/tests/test_session.py b/python/tests/test_session.py
    new file mode 100644
    index 0000000..990e2a6
    --- /dev/null
    +++ b/python/tests/test_session.py
    @@ -0,0 +1,55 @@
    +"""Tests for Session high-level API."""
    +
    +from __future__ import annotations
    +
    +import pytest
    +from unittest.mock import AsyncMock, MagicMock, patch
    +
    +
    +class TestSessionConstruction:
    +    def test_session_rejects_no_source(self):
    +        """Session.index() should reject calls with no source."""
    +        # We can't fully test Session without a real Engine,
    +        # but we can test validation logic
    +        from vectorless.session import Session
    +
    +        # This will fail because no api_key/model provided
    +        # We just verify the source validation in index()
    +        pass
    +
    +
    +class TestSessionIndex:
    +    @pytest.mark.asyncio
    +    async def test_index_requires_exactly_one_source(self):
    +        from vectorless.session import Session
    +
    +        # Patch Engine construction
    +        with patch("vectorless.session.Engine") as MockEngine:
    +            mock_engine = MagicMock()
    +            mock_result = MagicMock()
    +            mock_result.doc_id = "doc-1"
    +            mock_result.items = []
    +            mock_result.failed = []
    +            mock_engine.index = AsyncMock(return_value=mock_result)
    +            MockEngine.return_value = mock_engine
    +
    +            from vectorless.config import EngineConfig, LlmConfig
    +
    +            with patch(
    +                "vectorless.session.Session._resolve_config",
    +                return_value=EngineConfig(llm=LlmConfig(model="test", api_key="test")),
    +            ):
    +                session = Session.__new__(Session)
    +                session._config = EngineConfig(
    +                    llm=LlmConfig(model="test", api_key="test")
    +                )
    +                session._engine = mock_engine
    +                session._events = MagicMock()
    +
    +                # No source
    +                with pytest.raises(ValueError, match="exactly one source"):
    +                    await session.index()
    +
    +                # Multiple sources
    +                with pytest.raises(ValueError, match="exactly one source"):
    +                    await session.index(path="a.pdf", content="text")
    diff --git a/python/tests/test_types.py b/python/tests/test_types.py
    new file mode 100644
    index 0000000..e9e8b84
    --- /dev/null
    +++ b/python/tests/test_types.py
    @@ -0,0 +1,145 @@
    +"""Tests for typed result wrappers."""
    +
    +from __future__ import annotations
    +
    +from unittest.mock import MagicMock
    +
    +from vectorless.types.results import (
    +    Evidence,
    +    FailedItem,
    +    IndexItemWrapper,
    +    IndexMetrics,
    +    IndexResultWrapper,
    +    QueryMetrics,
    +    QueryResponse,
    +    QueryResult,
    +)
    +
    +
    +class TestEvidence:
    +    def test_from_rust(self):
    +        item = MagicMock()
    +        item.title = "Section 1"
    +        item.path = "Root/Section 1"
    +        item.content = "Some evidence text"
    +        item.doc_name = "report.pdf"
    +
    +        ev = Evidence.from_rust(item)
    +        assert ev.title == "Section 1"
    +        assert ev.path == "Root/Section 1"
    +        assert ev.content == "Some evidence text"
    +        assert ev.doc_name == "report.pdf"
    +
    +    def test_to_dict(self):
    +        ev = Evidence(title="T", path="P", content="C", doc_name=None)
    +        d = ev.to_dict()
    +        assert d == {"title": "T", "path": "P", "content": "C"}
    +
    +    def test_to_json(self):
    +        ev = Evidence(title="T", path="P", content="C")
    +        import json
    +
    +        parsed = json.loads(ev.to_json())
    +        assert parsed["title"] == "T"
    +
    +    def test_frozen(self):
    +        ev = Evidence(title="T", path="P", content="C")
    +        with pytest.raises(AttributeError):
    +            ev.title = "new"
    +
    +
    +class TestQueryResult:
    +    def test_from_rust(self):
    +        item = MagicMock()
    +        item.doc_id = "doc-1"
    +        item.content = "Result text"
    +        item.score = 0.9
    +        item.confidence = 0.9
    +        item.node_ids = ["node-1", "node-2"]
    +        item.evidence = []
    +        item.metrics = None
    +
    +        result = QueryResult.from_rust(item)
    +        assert result.doc_id == "doc-1"
    +        assert result.content == "Result text"
    +        assert result.score == 0.9
    +        assert len(result.node_ids) == 2
    +        assert result.metrics is None
    +
    +    def test_to_dict(self):
    +        result = QueryResult(
    +            doc_id="doc-1",
    +            content="text",
    +            score=0.9,
    +            confidence=0.9,
    +            node_ids=["n1"],
    +            evidence=[],
    +            metrics=None,
    +        )
    +        d = result.to_dict()
    +        assert d["doc_id"] == "doc-1"
    +        assert "metrics" not in d
    +
    +
    +class TestQueryResponse:
    +    def test_from_rust(self):
    +        rust_result = MagicMock()
    +        rust_result.items = []
    +        rust_result.failed = []
    +
    +        response = QueryResponse.from_rust(rust_result)
    +        assert len(response) == 0
    +        assert response.single() is None
    +        assert not response.has_failures()
    +
    +    def test_single(self):
    +        item = QueryResult(
    +            doc_id="doc-1", content="text", score=0.9, confidence=0.9
    +        )
    +        response = QueryResponse(items=[item])
    +        assert response.single() == item
    +        assert len(response) == 1
    +
    +    def test_iteration(self):
    +        items = [
    +            QueryResult(doc_id=f"doc-{i}", content="t", score=0.5, confidence=0.5)
    +            for i in range(3)
    +        ]
    +        response = QueryResponse(items=items)
    +        assert list(response) == items
    +
    +    def test_to_dict(self):
    +        response = QueryResponse(
    +            items=[QueryResult(doc_id="d", content="t", score=0.5, confidence=0.5)],
    +            failed=[FailedItem(source="s", error="e")],
    +        )
    +        d = response.to_dict()
    +        assert len(d["items"]) == 1
    +        assert len(d["failed"]) == 1
    +        assert d["failed"][0]["source"] == "s"
    +
    +
    +class TestIndexResult:
    +    def test_from_rust(self):
    +        rust_result = MagicMock()
    +        rust_result.doc_id = "doc-1"
    +        item = MagicMock()
    +        item.doc_id = "doc-1"
    +        item.name = "test.md"
    +        item.format = "markdown"
    +        item.description = None
    +        item.source_path = None
    +        item.page_count = None
    +        item.metrics = None
    +        rust_result.items = [item]
    +        rust_result.failed = []
    +
    +        result = IndexResultWrapper.from_rust(rust_result)
    +        assert result.doc_id == "doc-1"
    +        assert len(result.items) == 1
    +        assert result.items[0].name == "test.md"
    +        assert not result.has_failures()
    +        assert result.total() == 1
    +
    +
    +import pytest
    diff --git a/python/vectorless/__init__.py b/python/vectorless/__init__.py
    index a7f599a..5855916 100644
    --- a/python/vectorless/__init__.py
    +++ b/python/vectorless/__init__.py
    @@ -1,64 +1,66 @@
     """
    -Vectorless - Reasoning-native document intelligence engine for AI.
    +Vectorless — Reasoning-native document engine.
     
    -An ultra-performant reasoning-native document intelligence engine
    -that transforms documents into rich semantic trees and uses LLMs to
    -intelligently traverse the hierarchy for accurate, explainable retrieval.
    +Every retrieval is a reasoning act.
     
     Quick Start:
    -    from vectorless import Engine, IndexContext, QueryContext
    +    from vectorless import Session
     
    -    # Create engine
    -    engine = Engine(api_key="sk-...", model="gpt-4o")
    -
    -    # Index a document
    -    ctx = IndexContext.from_path("./report.pdf")
    -    result = await engine.index(ctx)
    -    doc_id = result.doc_id
    -
    -    # Query
    -    answer = await engine.query(QueryContext("What is the revenue?").with_doc_ids([doc_id]))
    +    session = Session(api_key="sk-...", model="gpt-4o")
    +    result = await session.index(path="./report.pdf")
    +    answer = await session.ask("What is the revenue?", doc_ids=[result.doc_id])
         print(answer.single().content)
     """
     
    -from vectorless._vectorless import (
    -    Engine,
    -    IndexContext,
    -    IndexOptions,
    -    IndexResult,
    -    IndexItem,
    +# High-level API (recommended)
    +from vectorless.session import Session
    +from vectorless.config import EngineConfig, load_config, load_config_from_env, load_config_from_file
    +from vectorless.events import EventEmitter
    +from vectorless.types import (
    +    DocumentGraphWrapper,
    +    EdgeEvidence,
    +    Evidence,
    +    FailedItem,
    +    GraphEdge,
    +    GraphNode,
    +    IndexItemWrapper,
         IndexMetrics,
    -    QueryContext,
    +    IndexResultWrapper,
    +    QueryMetrics,
    +    QueryResponse,
         QueryResult,
    -    QueryResultItem,
    -    DocumentInfo,
    -    DocumentGraph,
    -    DocumentGraphNode,
    -    GraphEdge,
    -    EdgeEvidence,
         WeightedKeyword,
    -    FailedItem,
    -    VectorlessError,
    -    __version__,
     )
     
    +# Version and error types
    +from vectorless._vectorless import VectorlessError, __version__
    +
     __all__ = [
    -    "Engine",
    -    "IndexContext",
    -    "IndexOptions",
    -    "IndexResult",
    -    "IndexItem",
    -    "IndexMetrics",
    -    "QueryContext",
    +    # Primary API
    +    "Session",
    +    # Configuration
    +    "EngineConfig",
    +    "load_config",
    +    "load_config_from_env",
    +    "load_config_from_file",
    +    # Events
    +    "EventEmitter",
    +    # Result types
    +    "QueryResponse",
         "QueryResult",
    -    "QueryResultItem",
    -    "DocumentInfo",
    -    "DocumentGraph",
    -    "DocumentGraphNode",
    +    "QueryMetrics",
    +    "Evidence",
    +    "IndexResultWrapper",
    +    "IndexItemWrapper",
    +    "IndexMetrics",
    +    "FailedItem",
    +    # Graph types
    +    "DocumentGraphWrapper",
    +    "GraphNode",
         "GraphEdge",
         "EdgeEvidence",
         "WeightedKeyword",
    -    "FailedItem",
    +    # Error and version
         "VectorlessError",
         "__version__",
     ]
    diff --git a/python/vectorless/_compat/__init__.py b/python/vectorless/_compat/__init__.py
    new file mode 100644
    index 0000000..aa38e73
    --- /dev/null
    +++ b/python/vectorless/_compat/__init__.py
    @@ -0,0 +1,35 @@
    +"""Framework integrations — optional, loaded on demand."""
    +
    +
    +def get_langchain_retriever():
    +    """Get the LangChain VectorlessRetriever class.
    +
    +    Raises:
    +        ImportError: If langchain-core is not installed.
    +    """
    +    try:
    +        from vectorless._compat.langchain import VectorlessRetriever
    +
    +        return VectorlessRetriever
    +    except ImportError:
    +        raise ImportError(
    +            "LangChain integration requires langchain-core. "
    +            "Install with: pip install vectorless[langchain]"
    +        )
    +
    +
    +def get_llamaindex_retriever():
    +    """Get the LlamaIndex VectorlessRetriever class.
    +
    +    Raises:
    +        ImportError: If llama-index-core is not installed.
    +    """
    +    try:
    +        from vectorless._compat.llamaindex import VectorlessRetriever
    +
    +        return VectorlessRetriever
    +    except ImportError:
    +        raise ImportError(
    +            "LlamaIndex integration requires llama-index-core. "
    +            "Install with: pip install vectorless[llamaindex]"
    +        )
    diff --git a/python/vectorless/_compat/langchain.py b/python/vectorless/_compat/langchain.py
    new file mode 100644
    index 0000000..f277d96
    --- /dev/null
    +++ b/python/vectorless/_compat/langchain.py
    @@ -0,0 +1,106 @@
    +"""LangChain BaseRetriever integration for Vectorless."""
    +
    +from __future__ import annotations
    +
    +import asyncio
    +from typing import Any, List, Optional
    +
    +from langchain_core.callbacks import CallbackManagerForRetrieverRun
    +from langchain_core.documents import Document
    +from langchain_core.retrievers import BaseRetriever
    +
    +from vectorless.session import Session
    +
    +
    +class VectorlessRetriever(BaseRetriever):
    +    """LangChain retriever backed by Vectorless.
    +
    +    Usage::
    +
    +        from vectorless._compat import get_langchain_retriever
    +
    +        VectorlessRetriever = get_langchain_retriever()
    +
    +        retriever = VectorlessRetriever(
    +            api_key="sk-...",
    +            model="gpt-4o",
    +            endpoint="https://api.openai.com/v1",
    +            doc_ids=["doc-123"],
    +            top_k=3,
    +        )
    +
    +        docs = retriever.invoke("What is the revenue?")
    +    """
    +
    +    api_key: str = ""
    +    model: str = ""
    +    endpoint: str = ""
    +    doc_ids: List[str] = []
    +    top_k: int = 3
    +    workspace_scope: bool = False
    +
    +    class Config:
    +        arbitrary_types_allowed = True
    +
    +    def _build_session(self) -> Session:
    +        return Session(
    +            api_key=self.api_key or None,
    +            model=self.model or None,
    +            endpoint=self.endpoint or None,
    +        )
    +
    +    def _get_relevant_documents(
    +        self,
    +        query: str,
    +        *,
    +        run_manager: Optional[CallbackManagerForRetrieverRun] = None,
    +    ) -> List[Document]:
    +        """Synchronous retrieval."""
    +        session = self._build_session()
    +        response = asyncio.run(
    +            session.ask(
    +                query,
    +                doc_ids=self.doc_ids if self.doc_ids else None,
    +                workspace_scope=self.workspace_scope,
    +            )
    +        )
    +        return self._to_documents(response)
    +
    +    async def _aget_relevant_documents(
    +        self,
    +        query: str,
    +        *,
    +        run_manager: Optional[CallbackManagerForRetrieverRun] = None,
    +    ) -> List[Document]:
    +        """Async retrieval."""
    +        session = self._build_session()
    +        response = await session.ask(
    +            query,
    +            doc_ids=self.doc_ids if self.doc_ids else None,
    +            workspace_scope=self.workspace_scope,
    +        )
    +        return self._to_documents(response)
    +
    +    @staticmethod
    +    def _to_documents(response: Any) -> List[Document]:
    +        """Convert Vectorless QueryResponse to LangChain Documents."""
    +        documents = []
    +        for item in response.items:
    +            metadata = {
    +                "doc_id": item.doc_id,
    +                "score": item.score,
    +                "confidence": item.confidence,
    +                "node_ids": item.node_ids,
    +                "evidence_count": len(item.evidence),
    +            }
    +            if item.metrics:
    +                metadata["llm_calls"] = item.metrics.llm_calls
    +                metadata["rounds_used"] = item.metrics.rounds_used
    +                metadata["nodes_visited"] = item.metrics.nodes_visited
    +            documents.append(
    +                Document(
    +                    page_content=item.content,
    +                    metadata=metadata,
    +                )
    +            )
    +        return documents
    diff --git a/python/vectorless/_compat/llamaindex.py b/python/vectorless/_compat/llamaindex.py
    new file mode 100644
    index 0000000..f0cdcc1
    --- /dev/null
    +++ b/python/vectorless/_compat/llamaindex.py
    @@ -0,0 +1,87 @@
    +"""LlamaIndex retriever integration for Vectorless."""
    +
    +from __future__ import annotations
    +
    +import asyncio
    +from typing import Any, List, Optional
    +
    +from vectorless.session import Session
    +
    +
    +class VectorlessRetriever:
    +    """LlamaIndex-compatible retriever backed by Vectorless.
    +
    +    Usage::
    +
    +        from vectorless._compat import get_llamaindex_retriever
    +
    +        VectorlessRetriever = get_llamaindex_retriever()
    +
    +        retriever = VectorlessRetriever(
    +            api_key="sk-...",
    +            model="gpt-4o",
    +            endpoint="https://api.openai.com/v1",
    +            doc_ids=["doc-123"],
    +        )
    +
    +        nodes = retriever.retrieve("What is the revenue?")
    +    """
    +
    +    def __init__(
    +        self,
    +        api_key: str = "",
    +        model: str = "",
    +        endpoint: str = "",
    +        doc_ids: Optional[List[str]] = None,
    +        top_k: int = 3,
    +        workspace_scope: bool = False,
    +    ) -> None:
    +        self._session = Session(
    +            api_key=api_key or None,
    +            model=model or None,
    +            endpoint=endpoint or None,
    +        )
    +        self._doc_ids = doc_ids or []
    +        self._top_k = top_k
    +        self._workspace_scope = workspace_scope
    +
    +    def retrieve(self, query: str) -> List[Any]:
    +        """Synchronous retrieval, returns LlamaIndex NodeWithScore objects."""
    +        response = asyncio.run(self._query(query))
    +        return self._to_nodes(response)
    +
    +    async def aretrieve(self, query: str) -> List[Any]:
    +        """Async retrieval, returns LlamaIndex NodeWithScore objects."""
    +        response = await self._query(query)
    +        return self._to_nodes(response)
    +
    +    async def _query(self, query: str) -> Any:
    +        return await self._session.ask(
    +            query,
    +            doc_ids=self._doc_ids if self._doc_ids else None,
    +            workspace_scope=self._workspace_scope,
    +        )
    +
    +    @staticmethod
    +    def _to_nodes(response: Any) -> List[Any]:
    +        """Convert Vectorless QueryResponse to LlamaIndex NodeWithScore."""
    +        from llama_index.core.schema import NodeWithScore, TextNode
    +
    +        nodes = []
    +        for item in response.items:
    +            metadata = {
    +                "doc_id": item.doc_id,
    +                "confidence": item.confidence,
    +                "node_ids": item.node_ids,
    +            }
    +            text_node = TextNode(
    +                text=item.content,
    +                metadata=metadata,
    +            )
    +            nodes.append(
    +                NodeWithScore(
    +                    node=text_node,
    +                    score=item.score,
    +                )
    +            )
    +        return nodes
    diff --git a/python/vectorless/_core.py b/python/vectorless/_core.py
    new file mode 100644
    index 0000000..967bcff
    --- /dev/null
    +++ b/python/vectorless/_core.py
    @@ -0,0 +1,52 @@
    +"""Internal re-exports from the Rust PyO3 module.
    +
    +This module is NOT part of the public API. Use ``vectorless.Session`` instead.
    +"""
    +
    +from vectorless._vectorless import (
    +    Config,
    +    DocumentGraph,
    +    DocumentGraphNode,
    +    DocumentInfo,
    +    EdgeEvidence,
    +    Engine,
    +    EvidenceItem,
    +    FailedItem,
    +    GraphEdge,
    +    IndexContext,
    +    IndexItem,
    +    IndexMetrics,
    +    IndexOptions,
    +    IndexResult,
    +    QueryContext,
    +    QueryMetrics,
    +    QueryResult,
    +    QueryResultItem,
    +    VectorlessError,
    +    WeightedKeyword,
    +    __version__,
    +)
    +
    +__all__ = [
    +    "Config",
    +    "DocumentGraph",
    +    "DocumentGraphNode",
    +    "DocumentInfo",
    +    "EdgeEvidence",
    +    "Engine",
    +    "EvidenceItem",
    +    "FailedItem",
    +    "GraphEdge",
    +    "IndexContext",
    +    "IndexItem",
    +    "IndexMetrics",
    +    "IndexOptions",
    +    "IndexResult",
    +    "QueryContext",
    +    "QueryMetrics",
    +    "QueryResult",
    +    "QueryResultItem",
    +    "VectorlessError",
    +    "WeightedKeyword",
    +    "__version__",
    +]
    diff --git a/python/vectorless/cli/commands/add.py b/python/vectorless/cli/commands/add.py
    index 4b27f37..79368d0 100644
    --- a/python/vectorless/cli/commands/add.py
    +++ b/python/vectorless/cli/commands/add.py
    @@ -1,9 +1,32 @@
     """add command — index documents (maps to engine.index)."""
     
    +import asyncio
    +import os
    +from pathlib import Path
     from typing import Optional
     
     import click
     
    +from vectorless.cli.workspace import get_workspace_path
    +from vectorless.cli.output import format_json
    +
    +
    +def _create_session(workspace_dir: str):
    +    """Create a Session from workspace config.
    +
    +    Args:
    +        workspace_dir: Path to .vectorless/ directory.
    +
    +    Returns:
    +        Configured Session instance.
    +    """
    +    from vectorless.session import Session
    +
    +    config_path = os.path.join(workspace_dir, "config.toml")
    +    if os.path.exists(config_path):
    +        return Session.from_config_file(config_path)
    +    return Session.from_env()
    +
     
     def add_cmd(
         path: str,
    @@ -29,4 +52,92 @@ def add_cmd(
             IndexContext.from_path / from_paths / from_dir
             IndexOptions(mode="force" if force else "default")
         """
    -    raise NotImplementedError
    +    workspace = get_workspace_path()
    +
    +    try:
    +        session = _create_session(workspace)
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to create session: {e}") from e
    +
    +    target = Path(path).resolve()
    +    format_hint = fmt or "markdown"
    +
    +    async def _run():
    +        if target.is_dir():
    +            if recursive:
    +                # Collect all matching files in directory
    +                extensions = {".md", ".pdf", ".markdown"}
    +                file_paths = [
    +                    str(f)
    +                    for f in target.rglob("*")
    +                    if f.suffix.lower() in extensions and f.is_file()
    +                ]
    +            else:
    +                extensions = {".md", ".pdf", ".markdown"}
    +                file_paths = [
    +                    str(f)
    +                    for f in target.iterdir()
    +                    if f.suffix.lower() in extensions and f.is_file()
    +                ]
    +
    +            if not file_paths:
    +                raise click.ClickException(
    +                    f"No supported documents found in {target}"
    +                )
    +
    +            if verbose:
    +                click.echo(f"Found {len(file_paths)} document(s) to index")
    +
    +            results = await session.index_batch(
    +                file_paths, mode="force" if force else "default", jobs=jobs
    +            )
    +
    +            succeeded = [r for r in results if not r.has_failures()]
    +            failed = [r for r in results if r.has_failures()]
    +
    +            click.echo(f"Indexed {len(succeeded)}/{len(results)} document(s) successfully")
    +            if failed:
    +                click.echo(f"Failed: {len(failed)} document(s)")
    +                for f_result in failed:
    +                    for item in f_result.failed:
    +                        click.echo(f"  {item.source}: {item.error}")
    +
    +            if verbose:
    +                for r in succeeded:
    +                    for item in r.items:
    +                        click.echo(f"  {item.name} ({item.doc_id})")
    +        else:
    +            result = await session.index(
    +                path=str(target),
    +                format=format_hint,
    +                mode="force" if force else "default",
    +            )
    +
    +            if result.doc_id:
    +                click.echo(f"Indexed: {result.doc_id}")
    +            else:
    +                # Batch result from single file
    +                for item in result.items:
    +                    click.echo(f"Indexed: {item.name} ({item.doc_id})")
    +
    +            if result.has_failures():
    +                for item in result.failed:
    +                    click.echo(f"Failed: {item.source}: {item.error}")
    +
    +            if verbose and result.items:
    +                for item in result.items:
    +                    if item.metrics:
    +                        m = item.metrics
    +                        click.echo(
    +                            f"  Nodes: {m.nodes_processed}, "
    +                            f"Summaries: {m.summaries_generated}, "
    +                            f"LLM calls: {m.llm_calls}, "
    +                            f"Time: {m.total_time_ms}ms"
    +                        )
    +
    +    try:
    +        asyncio.run(_run())
    +    except click.ClickException:
    +        raise
    +    except Exception as e:
    +        raise click.ClickException(f"Indexing failed: {e}") from e
    diff --git a/python/vectorless/cli/commands/ask.py b/python/vectorless/cli/commands/ask.py
    index f494fdc..c8e3513 100644
    --- a/python/vectorless/cli/commands/ask.py
    +++ b/python/vectorless/cli/commands/ask.py
    @@ -1,9 +1,118 @@
     """ask command — interactive REPL for multi-turn queries."""
     
    +import asyncio
    +import os
     from typing import Optional
     
     import click
     
    +from vectorless.cli.workspace import get_workspace_path
    +from vectorless.cli.output import OutputFormat, format_query_result
    +
    +
    +def _create_session(workspace_dir: str):
    +    """Create a Session from workspace config."""
    +    from vectorless.session import Session
    +
    +    config_path = os.path.join(workspace_dir, "config.toml")
    +    if os.path.exists(config_path):
    +        return Session.from_config_file(config_path)
    +    return Session.from_env()
    +
    +
    +# Module-level mutable state for the REPL
    +_current_doc_id: Optional[str] = None
    +_verbose: bool = False
    +_total_llm_calls: int = 0
    +_total_queries: int = 0
    +
    +
    +def _print_welcome() -> None:
    +    """Print REPL welcome message with available commands."""
    +    click.echo("Vectorless Interactive REPL")
    +    click.echo("Type a question to query your documents.")
    +    click.echo("")
    +    click.echo("Dot-commands:")
    +    click.echo("  .help       Show available commands")
    +    click.echo("  .tree       Display current document tree")
    +    click.echo("  .stats      Show session statistics (LLM calls, tokens, cost)")
    +    click.echo("  .nav-log    Show navigation log for current conversation")
    +    click.echo("  .doc    Switch query target document")
    +    click.echo("  .doc        Show current target document")
    +    click.echo("  .verbose    Toggle verbose mode")
    +    click.echo("  .quit       Exit REPL")
    +    click.echo("")
    +
    +
    +def _handle_repl_command(
    +    line: str,
    +    session,
    +    workspace: str,
    +) -> Optional[bool]:
    +    """Handle a built-in REPL command (prefixed with .).
    +
    +    Args:
    +        line: Raw input line.
    +        session: Session instance.
    +        workspace: Workspace path.
    +
    +    Returns:
    +        True if the command was handled (should not be treated as query).
    +        False if it's a query.
    +        None if the REPL should exit.
    +    """
    +    global _current_doc_id, _verbose, _total_llm_calls, _total_queries
    +
    +    parts = line.strip().split(maxsplit=1)
    +    cmd = parts[0].lower()
    +    arg = parts[1] if len(parts) > 1 else None
    +
    +    if cmd == ".quit":
    +        return None
    +    elif cmd == ".help":
    +        _print_welcome()
    +        return True
    +    elif cmd == ".verbose":
    +        _verbose = not _verbose
    +        state = "on" if _verbose else "off"
    +        click.echo(f"Verbose mode: {state}")
    +        return True
    +    elif cmd == ".doc":
    +        if arg:
    +            _current_doc_id = arg
    +            click.echo(f"Now targeting document: {_current_doc_id}")
    +        else:
    +            if _current_doc_id:
    +                click.echo(f"Current document: {_current_doc_id}")
    +            else:
    +                click.echo("No document target set (querying all documents)")
    +        return True
    +    elif cmd == ".stats":
    +        click.echo(f"Session statistics:")
    +        click.echo(f"  Queries: {_total_queries}")
    +        click.echo(f"  LLM calls (from query metrics): {_total_llm_calls}")
    +
    +        try:
    +            report = session.metrics_report()
    +            if report:
    +                click.echo(f"  Engine metrics: {report}")
    +        except Exception:
    +            pass
    +        return True
    +    elif cmd == ".tree":
    +        if _current_doc_id:
    +            click.echo(f"Tree visualization for {_current_doc_id}:")
    +            click.echo("  (Use 'vectorless tree' command for full tree display)")
    +        else:
    +            click.echo("No document selected. Use .doc  to select one.")
    +        return True
    +    elif cmd == ".nav-log":
    +        click.echo("Navigation log is shown when verbose mode is on (.verbose)")
    +        return True
    +    else:
    +        click.echo(f"Unknown command: {cmd}. Type .help for available commands.")
    +        return True
    +
     
     def ask_cmd(*, doc_id: Optional[str] = None, verbose: bool = False) -> None:
         """Start an interactive query REPL.
    @@ -26,21 +135,64 @@ def ask_cmd(*, doc_id: Optional[str] = None, verbose: bool = False) -> None:
             .verbose    Toggle verbose mode
             .quit       Exit REPL
         """
    -    raise NotImplementedError
    +    global _current_doc_id, _verbose, _total_llm_calls, _total_queries
     
    +    _current_doc_id = doc_id
    +    _verbose = verbose
    +    _total_llm_calls = 0
    +    _total_queries = 0
     
    -def _handle_repl_command(line: str) -> bool:
    -    """Handle a built-in REPL command (prefixed with .).
    +    workspace = get_workspace_path()
     
    -    Args:
    -        line: Raw input line.
    +    try:
    +        session = _create_session(workspace)
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to create session: {e}") from e
     
    -    Returns:
    -        True if the command was handled, False if it's a query.
    -    """
    -    raise NotImplementedError
    +    _print_welcome()
     
    +    while True:
    +        try:
    +            line = input(">>> ").strip()
    +        except (EOFError, KeyboardInterrupt):
    +            click.echo("\nGoodbye!")
    +            break
     
    -def _print_welcome() -> None:
    -    """Print REPL welcome message with available commands."""
    -    raise NotImplementedError
    +        if not line:
    +            continue
    +
    +        # Handle dot-commands
    +        if line.startswith("."):
    +            result = _handle_repl_command(line, session, workspace)
    +            if result is None:
    +                click.echo("Goodbye!")
    +                break
    +            continue
    +
    +        # Treat as a query
    +        _total_queries += 1
    +
    +        try:
    +            # Build query arguments
    +            doc_ids = [_current_doc_id] if _current_doc_id else None
    +
    +            async def _run():
    +                return await session.ask(
    +                    line,
    +                    doc_ids=doc_ids,
    +                )
    +
    +            response = asyncio.run(_run())
    +
    +            # Accumulate metrics
    +            for item in response.items:
    +                if item.metrics:
    +                    _total_llm_calls += item.metrics.llm_calls
    +
    +            output = format_query_result(
    +                response, fmt=OutputFormat.TEXT, verbose=_verbose
    +            )
    +            click.echo(output)
    +
    +        except Exception as e:
    +            click.echo(f"Error: {e}")
    diff --git a/python/vectorless/cli/commands/config_cmd.py b/python/vectorless/cli/commands/config_cmd.py
    index fbf0ac5..d7a1d62 100644
    --- a/python/vectorless/cli/commands/config_cmd.py
    +++ b/python/vectorless/cli/commands/config_cmd.py
    @@ -1,9 +1,101 @@
     """config command — view and modify configuration."""
     
    -from typing import Optional
    +import sys
    +from pathlib import Path
    +from typing import Any, Dict, Optional
     
     import click
     
    +from vectorless.cli.workspace import get_workspace_path, load_config, save_config
    +
    +
    +def _default_config() -> dict:
    +    """Return default configuration values."""
    +    return {
    +        "llm": {
    +            "model": "",
    +            "api_key": "",
    +            "endpoint": "",
    +            "throttle": {
    +                "max_concurrent_requests": 10,
    +                "requests_per_minute": 500,
    +            },
    +        },
    +        "retrieval": {
    +            "top_k": 3,
    +            "max_iterations": 10,
    +        },
    +        "storage": {
    +            "workspace_dir": "~/.vectorless",
    +        },
    +        "metrics": {
    +            "enabled": True,
    +        },
    +    }
    +
    +
    +def _deep_get(cfg: dict, dotted_key: str) -> Any:
    +    """Get a nested value from a dict using dot-separated key.
    +
    +    Args:
    +        cfg: Configuration dict.
    +        dotted_key: Dot-separated key, e.g. "llm.model".
    +
    +    Returns:
    +        The value at the key path, or None if not found.
    +    """
    +    parts = dotted_key.split(".")
    +    current = cfg
    +    for part in parts:
    +        if not isinstance(current, dict) or part not in current:
    +            return None
    +        current = current[part]
    +    return current
    +
    +
    +def _deep_set(cfg: dict, dotted_key: str, value: Any) -> None:
    +    """Set a nested value in a dict using dot-separated key.
    +
    +    Args:
    +        cfg: Configuration dict.
    +        dotted_key: Dot-separated key, e.g. "llm.model".
    +        value: Value to set.
    +    """
    +    parts = dotted_key.split(".")
    +    current = cfg
    +    for part in parts[:-1]:
    +        if part not in current or not isinstance(current[part], dict):
    +            current[part] = {}
    +        current = current[part]
    +    current[parts[-1]] = value
    +
    +
    +def _coerce_value(value: str) -> Any:
    +    """Attempt to coerce a string value to its proper type.
    +
    +    Args:
    +        value: String value from CLI input.
    +
    +    Returns:
    +        Coerced value (bool, int, float, or str).
    +    """
    +    # Boolean
    +    if value.lower() in ("true", "yes", "1"):
    +        return True
    +    if value.lower() in ("false", "no", "0"):
    +        return False
    +    # Integer
    +    try:
    +        return int(value)
    +    except ValueError:
    +        pass
    +    # Float
    +    try:
    +        return float(value)
    +    except ValueError:
    +        pass
    +    return value
    +
     
     def config_cmd(
         key: Optional[str] = None,
    @@ -33,31 +125,59 @@ def config_cmd(
             index.summary       full | selective | lazy | navigation
             index.compact_mode  true | false
         """
    -    raise NotImplementedError
    -
    -
    -def _load_config(workspace: str) -> dict:
    -    """Load config.toml from workspace.
    +    workspace = get_workspace_path()
    +
    +    if init_config:
    +        defaults = _default_config()
    +        save_config(workspace, defaults)
    +        click.echo("Configuration reset to defaults.")
    +        return
    +
    +    config = load_config(workspace)
    +
    +    if key is None:
    +        # Show all config
    +        if not config:
    +            click.echo("Configuration is empty. Use --init to set defaults.")
    +            return
    +
    +        # Pretty-print the config
    +        _print_config(config)
    +        return
    +
    +    if value is None:
    +        # Show a specific key
    +        val = _deep_get(config, key)
    +        if val is None:
    +            raise click.ClickException(f"Config key not found: {key}")
    +        if isinstance(val, dict):
    +            _print_config(val, indent="  ")
    +        else:
    +            click.echo(f"{key} = {val}")
    +        return
    +
    +    # Set a key to a value
    +    coerced = _coerce_value(value)
    +    _deep_set(config, key, coerced)
    +    save_config(workspace, config)
    +    click.echo(f"Set {key} = {coerced}")
    +
    +
    +def _print_config(cfg: dict, indent: str = "") -> None:
    +    """Recursively print config dict.
     
         Args:
    -        workspace: Path to .vectorless/ directory.
    -
    -    Returns:
    -        Parsed config dict.
    +        cfg: Config dict to print.
    +        indent: Current indentation prefix.
         """
    -    raise NotImplementedError
    -
    -
    -def _save_config(workspace: str, config: dict) -> None:
    -    """Save config dict to config.toml.
    -
    -    Args:
    -        workspace: Path to .vectorless/ directory.
    -        config: Config dict to serialize.
    -    """
    -    raise NotImplementedError
    -
    -
    -def _default_config() -> dict:
    -    """Return default configuration values."""
    -    raise NotImplementedError
    +    for k, v in cfg.items():
    +        if isinstance(v, dict):
    +            click.echo(f"{indent}{k}:")
    +            _print_config(v, indent=indent + "  ")
    +        else:
    +            # Mask API keys
    +            if "api_key" in k.lower() and isinstance(v, str) and v:
    +                display = v[:8] + "..." if len(v) > 8 else "***"
    +            else:
    +                display = v
    +            click.echo(f"{indent}{k} = {display}")
    diff --git a/python/vectorless/cli/commands/info.py b/python/vectorless/cli/commands/info.py
    index cee109c..c6fbced 100644
    --- a/python/vectorless/cli/commands/info.py
    +++ b/python/vectorless/cli/commands/info.py
    @@ -1,7 +1,23 @@
     """info command — show document index details."""
     
    +import asyncio
    +import os
    +
     import click
     
    +from vectorless.cli.workspace import get_workspace_path
    +from vectorless.cli.output import format_json
    +
    +
    +def _create_session(workspace_dir: str):
    +    """Create a Session from workspace config."""
    +    from vectorless.session import Session
    +
    +    config_path = os.path.join(workspace_dir, "config.toml")
    +    if os.path.exists(config_path):
    +        return Session.from_config_file(config_path)
    +    return Session.from_env()
    +
     
     def info_cmd(doc_id: str) -> None:
         """Show detailed information about an indexed document.
    @@ -27,4 +43,57 @@ def info_cmd(doc_id: str) -> None:
               2. Authentication (8 leaves)
               3. Endpoints (18 leaves)
         """
    -    raise NotImplementedError
    +    workspace = get_workspace_path()
    +
    +    try:
    +        session = _create_session(workspace)
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to create session: {e}") from e
    +
    +    async def _run():
    +        return await session.list_documents()
    +
    +    try:
    +        documents = asyncio.run(_run())
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to list documents: {e}") from e
    +
    +    # Find matching document by doc_id
    +    doc = None
    +    for d in documents:
    +        d_id = getattr(d, "doc_id", None) or getattr(d, "id", None)
    +        if d_id == doc_id:
    +            doc = d
    +            break
    +
    +    if doc is None:
    +        raise click.ClickException(f"Document not found: {doc_id}")
    +
    +    # Display document details
    +    name = getattr(doc, "name", "Unknown")
    +    source = getattr(doc, "source_path", None)
    +    fmt = getattr(doc, "format", "unknown")
    +    pages = getattr(doc, "page_count", None)
    +    description = getattr(doc, "description", None)
    +    metrics = getattr(doc, "metrics", None)
    +
    +    click.echo(f"Document: {name} ({doc_id})")
    +    if source:
    +        click.echo(f"Source: {source}")
    +    click.echo(f"Format: {fmt}")
    +    if pages:
    +        click.echo(f"Pages: {pages}")
    +    if description:
    +        click.echo(f"Description: {description}")
    +
    +    if metrics:
    +        click.echo(f"Tree: {metrics.nodes_processed} nodes")
    +        click.echo(f"Summaries generated: {metrics.summaries_generated}")
    +        click.echo(f"LLM calls: {metrics.llm_calls}")
    +        click.echo(f"Total tokens: {metrics.total_tokens_generated}")
    +        click.echo(f"Topics indexed: {metrics.topics_indexed}")
    +        click.echo(f"Keywords indexed: {metrics.keywords_indexed}")
    +        click.echo(f"Indexing time: {metrics.total_time_ms}ms")
    +        click.echo(f"  Parse: {metrics.parse_time_ms}ms")
    +        click.echo(f"  Build: {metrics.build_time_ms}ms")
    +        click.echo(f"  Enhance: {metrics.enhance_time_ms}ms")
    diff --git a/python/vectorless/cli/commands/init.py b/python/vectorless/cli/commands/init.py
    index 8ddf832..a1eeba1 100644
    --- a/python/vectorless/cli/commands/init.py
    +++ b/python/vectorless/cli/commands/init.py
    @@ -2,6 +2,8 @@
     
     import click
     
    +from vectorless.cli.workspace import init_workspace
    +
     
     def init_cmd(workspace: str) -> None:
         """Create .vectorless/ directory structure with default config.
    @@ -15,4 +17,9 @@ def init_cmd(workspace: str) -> None:
         Args:
             workspace: Parent directory to create .vectorless/ in.
         """
    -    raise NotImplementedError
    +    try:
    +        path = init_workspace(workspace)
    +        click.echo(f"Initialized Vectorless workspace at {path}")
    +        click.echo("Edit config.toml to set your LLM API key and model.")
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to initialize workspace: {e}") from e
    diff --git a/python/vectorless/cli/commands/list_cmd.py b/python/vectorless/cli/commands/list_cmd.py
    index 4c479bc..bf7e230 100644
    --- a/python/vectorless/cli/commands/list_cmd.py
    +++ b/python/vectorless/cli/commands/list_cmd.py
    @@ -1,7 +1,23 @@
     """list command — list indexed documents."""
     
    +import asyncio
    +import os
    +
     import click
     
    +from vectorless.cli.workspace import get_workspace_path
    +from vectorless.cli.output import format_documents_table, format_json
    +
    +
    +def _create_session(workspace_dir: str):
    +    """Create a Session from workspace config."""
    +    from vectorless.session import Session
    +
    +    config_path = os.path.join(workspace_dir, "config.toml")
    +    if os.path.exists(config_path):
    +        return Session.from_config_file(config_path)
    +    return Session.from_env()
    +
     
     def list_cmd(*, fmt: str = "table") -> None:
         """List all indexed documents in the workspace.
    @@ -15,4 +31,35 @@ def list_cmd(*, fmt: str = "table") -> None:
         Table output:
             Doc ID | Title | Format | Nodes | Pages | Indexed At
         """
    -    raise NotImplementedError
    +    workspace = get_workspace_path()
    +
    +    try:
    +        session = _create_session(workspace)
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to create session: {e}") from e
    +
    +    async def _run():
    +        return await session.list_documents()
    +
    +    try:
    +        documents = asyncio.run(_run())
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to list documents: {e}") from e
    +
    +    if not documents:
    +        click.echo("No documents indexed.")
    +        return
    +
    +    if fmt == "json":
    +        # Convert document objects to dicts for JSON output
    +        doc_dicts = []
    +        for doc in documents:
    +            doc_dict = {}
    +            for attr in ("id", "doc_id", "name", "format", "source_path", "page_count"):
    +                val = getattr(doc, attr, None)
    +                if val is not None:
    +                    doc_dict[attr] = val
    +            doc_dicts.append(doc_dict)
    +        click.echo(format_json(doc_dicts))
    +    else:
    +        click.echo(format_documents_table(documents))
    diff --git a/python/vectorless/cli/commands/query.py b/python/vectorless/cli/commands/query.py
    index 7963866..7fe383f 100644
    --- a/python/vectorless/cli/commands/query.py
    +++ b/python/vectorless/cli/commands/query.py
    @@ -1,9 +1,24 @@
     """query command — single query (maps to engine.query)."""
     
    +import asyncio
    +import os
     from typing import Optional
     
     import click
     
    +from vectorless.cli.workspace import get_workspace_path
    +from vectorless.cli.output import OutputFormat, format_query_result, format_json
    +
    +
    +def _create_session(workspace_dir: str):
    +    """Create a Session from workspace config."""
    +    from vectorless.session import Session
    +
    +    config_path = os.path.join(workspace_dir, "config.toml")
    +    if os.path.exists(config_path):
    +        return Session.from_config_file(config_path)
    +    return Session.from_env()
    +
     
     def query_cmd(
         question: str,
    @@ -30,4 +45,39 @@ def query_cmd(
                 .with_timeout_secs(n))
             -> QueryResult
         """
    -    raise NotImplementedError
    +    workspace = get_workspace_path()
    +
    +    try:
    +        session = _create_session(workspace)
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to create session: {e}") from e
    +
    +    async def _run():
    +        return await session.ask(
    +            question,
    +            doc_ids=list(doc_ids) if doc_ids else None,
    +            workspace_scope=workspace_scope,
    +            timeout_secs=timeout_secs,
    +        )
    +
    +    try:
    +        result = asyncio.run(_run())
    +    except Exception as e:
    +        raise click.ClickException(f"Query failed: {e}") from e
    +
    +    output_fmt = OutputFormat.JSON if fmt == "json" else OutputFormat.TEXT
    +    output = format_query_result(result, fmt=output_fmt, verbose=verbose)
    +    click.echo(output)
    +
    +    # Show metrics in verbose mode
    +    if verbose:
    +        for item in result.items:
    +            if item.metrics:
    +                m = item.metrics
    +                click.echo(
    +                    f"\nMetrics ({item.doc_id}): "
    +                    f"LLM calls={m.llm_calls}, "
    +                    f"rounds={m.rounds_used}, "
    +                    f"nodes_visited={m.nodes_visited}, "
    +                    f"evidence={m.evidence_count}"
    +                )
    diff --git a/python/vectorless/cli/commands/remove.py b/python/vectorless/cli/commands/remove.py
    index b4285b6..219aea0 100644
    --- a/python/vectorless/cli/commands/remove.py
    +++ b/python/vectorless/cli/commands/remove.py
    @@ -1,7 +1,22 @@
     """remove command — remove document index."""
     
    +import asyncio
    +import os
    +
     import click
     
    +from vectorless.cli.workspace import get_workspace_path
    +
    +
    +def _create_session(workspace_dir: str):
    +    """Create a Session from workspace config."""
    +    from vectorless.session import Session
    +
    +    config_path = os.path.join(workspace_dir, "config.toml")
    +    if os.path.exists(config_path):
    +        return Session.from_config_file(config_path)
    +    return Session.from_env()
    +
     
     def remove_cmd(doc_id: str) -> None:
         """Remove a document from the index.
    @@ -12,4 +27,22 @@ def remove_cmd(doc_id: str) -> None:
         Uses:
             Engine.remove(doc_id)
         """
    -    raise NotImplementedError
    +    workspace = get_workspace_path()
    +
    +    try:
    +        session = _create_session(workspace)
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to create session: {e}") from e
    +
    +    async def _run():
    +        return await session.remove_document(doc_id)
    +
    +    try:
    +        removed = asyncio.run(_run())
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to remove document: {e}") from e
    +
    +    if removed:
    +        click.echo(f"Removed document: {doc_id}")
    +    else:
    +        raise click.ClickException(f"Document not found: {doc_id}")
    diff --git a/python/vectorless/cli/commands/stats.py b/python/vectorless/cli/commands/stats.py
    index be2d906..fa89151 100644
    --- a/python/vectorless/cli/commands/stats.py
    +++ b/python/vectorless/cli/commands/stats.py
    @@ -1,7 +1,44 @@
     """stats command — workspace statistics."""
     
    +import asyncio
    +import os
    +from pathlib import Path
    +
     import click
     
    +from vectorless.cli.workspace import get_workspace_path, get_data_dir, get_cache_dir
    +
    +
    +def _create_session(workspace_dir: str):
    +    """Create a Session from workspace config."""
    +    from vectorless.session import Session
    +
    +    config_path = os.path.join(workspace_dir, "config.toml")
    +    if os.path.exists(config_path):
    +        return Session.from_config_file(config_path)
    +    return Session.from_env()
    +
    +
    +def _dir_size(path: str) -> int:
    +    """Calculate total size of a directory in bytes."""
    +    total = 0
    +    try:
    +        for entry in Path(path).rglob("*"):
    +            if entry.is_file():
    +                total += entry.stat().st_size
    +    except OSError:
    +        pass
    +    return total
    +
    +
    +def _format_size(size_bytes: int) -> str:
    +    """Format bytes as human-readable size."""
    +    for unit in ("B", "KB", "MB", "GB"):
    +        if size_bytes < 1024:
    +            return f"{size_bytes:.1f} {unit}"
    +        size_bytes /= 1024
    +    return f"{size_bytes:.1f} TB"
    +
     
     def stats_cmd() -> None:
         """Show workspace statistics.
    @@ -19,4 +56,83 @@ def stats_cmd() -> None:
             Engine.metrics_report()
             Filesystem scan for size info
         """
    -    raise NotImplementedError
    +    workspace = get_workspace_path()
    +
    +    try:
    +        session = _create_session(workspace)
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to create session: {e}") from e
    +
    +    async def _run():
    +        documents = await session.list_documents()
    +        graph = await session.get_graph()
    +        return documents, graph
    +
    +    try:
    +        documents, graph = asyncio.run(_run())
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to retrieve workspace data: {e}") from e
    +
    +    # Compute aggregate stats
    +    total_nodes = 0
    +    total_summaries = 0
    +    total_tokens = 0
    +    total_llm_calls = 0
    +    total_keywords = 0
    +    total_topics = 0
    +
    +    for doc in documents:
    +        metrics = getattr(doc, "metrics", None)
    +        if metrics:
    +            total_nodes += metrics.nodes_processed
    +            total_summaries += metrics.summaries_generated
    +            total_tokens += metrics.total_tokens_generated
    +            total_llm_calls += metrics.llm_calls
    +            total_keywords += metrics.keywords_indexed
    +            total_topics += metrics.topics_indexed
    +
    +    # Calculate disk usage
    +    data_size = _dir_size(get_data_dir(workspace))
    +    cache_size = _dir_size(get_cache_dir(workspace))
    +
    +    # Display stats
    +    click.echo(f"Workspace: {workspace}")
    +    click.echo(f"Documents indexed: {len(documents)}")
    +    click.echo("")
    +
    +    if documents:
    +        click.echo("Index statistics:")
    +        click.echo(f"  Total nodes: {total_nodes}")
    +        click.echo(f"  Total summaries: {total_summaries}")
    +        click.echo(f"  Total tokens generated: {total_tokens:,}")
    +        click.echo(f"  Total LLM calls (indexing): {total_llm_calls}")
    +        click.echo(f"  Total keywords indexed: {total_keywords}")
    +        click.echo(f"  Total topics indexed: {total_topics}")
    +
    +    click.echo("")
    +    click.echo("Disk usage:")
    +    click.echo(f"  Data: {_format_size(data_size)}")
    +    click.echo(f"  Cache: {_format_size(cache_size)}")
    +
    +    # Graph info
    +    if graph and not graph.is_empty():
    +        click.echo("")
    +        click.echo("Document graph:")
    +        click.echo(f"  Nodes: {graph.node_count()}")
    +        click.echo(f"  Edges: {graph.edge_count()}")
    +
    +        doc_ids = graph.doc_ids()
    +        if doc_ids:
    +            click.echo(f"  Connected documents: {', '.join(doc_ids)}")
    +    else:
    +        click.echo("")
    +        click.echo("Document graph: (empty)")
    +
    +    # Engine metrics
    +    try:
    +        report = session.metrics_report()
    +        if report:
    +            click.echo("")
    +            click.echo(f"Engine metrics: {report}")
    +    except Exception:
    +        pass
    diff --git a/python/vectorless/cli/commands/tree.py b/python/vectorless/cli/commands/tree.py
    index 581f8ba..da12f8e 100644
    --- a/python/vectorless/cli/commands/tree.py
    +++ b/python/vectorless/cli/commands/tree.py
    @@ -1,9 +1,23 @@
     """tree command — visualize document tree structure."""
     
    +import asyncio
    +import os
     from typing import Optional
     
     import click
     
    +from vectorless.cli.workspace import get_workspace_path
    +
    +
    +def _create_session(workspace_dir: str):
    +    """Create a Session from workspace config."""
    +    from vectorless.session import Session
    +
    +    config_path = os.path.join(workspace_dir, "config.toml")
    +    if os.path.exists(config_path):
    +        return Session.from_config_file(config_path)
    +    return Session.from_env()
    +
     
     def tree_cmd(
         doc_id: str,
    @@ -29,4 +43,95 @@ def tree_cmd(
             │   └── 1.2.2 OAuth Flow
             └── 1.3 Endpoints (18 leaves)
         """
    -    raise NotImplementedError
    +    workspace = get_workspace_path()
    +
    +    try:
    +        session = _create_session(workspace)
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to create session: {e}") from e
    +
    +    async def _run():
    +        graph = await session.get_graph()
    +        documents = await session.list_documents()
    +        return graph, documents
    +
    +    try:
    +        graph, documents = asyncio.run(_run())
    +    except Exception as e:
    +        raise click.ClickException(f"Failed to retrieve document data: {e}") from e
    +
    +    # Find the matching document from the list
    +    doc_info = None
    +    for d in documents:
    +        d_id = getattr(d, "doc_id", None) or getattr(d, "id", None)
    +        if d_id == doc_id:
    +            doc_info = d
    +            break
    +
    +    if doc_info is None:
    +        raise click.ClickException(f"Document not found: {doc_id}")
    +
    +    name = getattr(doc_info, "name", "Unknown")
    +    fmt = getattr(doc_info, "format", "unknown")
    +    metrics = getattr(doc_info, "metrics", None)
    +
    +    # Display header
    +    node_count = metrics.nodes_processed if metrics else "?"
    +    click.echo(f"{name} ({doc_id})")
    +    click.echo(f"  Format: {fmt}")
    +
    +    if metrics:
    +        click.echo(
    +            f"  Nodes: {metrics.nodes_processed}, "
    +            f"Summaries: {metrics.summaries_generated}, "
    +            f"Keywords: {metrics.keywords_indexed}"
    +        )
    +
    +    click.echo("")
    +
    +    # Since Rust tree is not directly exposed, show graph-based structure
    +    if graph and not graph.is_empty():
    +        node = graph.get_node(doc_id)
    +        if node:
    +            click.echo(f"  Graph node: {node.title}")
    +            click.echo(f"  Format: {node.format}, Node count: {node.node_count}")
    +
    +            if show_keywords and node.top_keywords:
    +                kw_str = ", ".join(
    +                    f"{kw.keyword} ({kw.weight:.2f})" for kw in node.top_keywords[:10]
    +                )
    +                click.echo(f"  Top keywords: {kw_str}")
    +
    +            neighbors = graph.get_neighbors(doc_id)
    +            if neighbors:
    +                click.echo("")
    +                click.echo(f"  Related documents ({len(neighbors)} connections):")
    +                for edge in neighbors:
    +                    weight_str = f"weight={edge.weight:.2f}"
    +                    evidence_str = ""
    +                    if edge.evidence:
    +                        evidence_str = (
    +                            f", shared_keywords={edge.evidence.shared_keyword_count}"
    +                            f", jaccard={edge.evidence.keyword_jaccard:.3f}"
    +                        )
    +                    click.echo(f"    -> {edge.target_doc_id} ({weight_str}{evidence_str})")
    +
    +                    if show_keywords and edge.evidence:
    +                        shared = ", ".join(
    +                            kw for kw, _ in edge.evidence.shared_keywords[:5]
    +                        )
    +                        if shared:
    +                            click.echo(f"       shared: {shared}")
    +        else:
    +            click.echo(f"  (Document {doc_id} not found in graph)")
    +    else:
    +        click.echo("  (No graph data available)")
    +        click.echo("  The document tree is not directly accessible from the CLI.")
    +        click.echo("  Graph data will be populated as more documents are indexed.")
    +
    +    if show_summary and metrics:
    +        click.echo("")
    +        click.echo(f"  Index summary:")
    +        click.echo(f"    Total time: {metrics.total_time_ms}ms")
    +        click.echo(f"    LLM calls: {metrics.llm_calls}")
    +        click.echo(f"    Tokens generated: {metrics.total_tokens_generated}")
    diff --git a/python/vectorless/cli/main.py b/python/vectorless/cli/main.py
    index 844d380..f41d0fe 100644
    --- a/python/vectorless/cli/main.py
    +++ b/python/vectorless/cli/main.py
    @@ -5,6 +5,19 @@
     
     import click
     
    +from vectorless.cli.commands import (
    +    init_cmd,
    +    add_cmd,
    +    list_cmd,
    +    info_cmd,
    +    remove_cmd,
    +    query_cmd,
    +    ask_cmd,
    +    tree_cmd,
    +    stats_cmd,
    +    config_cmd,
    +)
    +
     
     @click.group()
     @click.version_option(package_name="vectorless")
    @@ -22,7 +35,7 @@ def app(ctx: click.Context, workspace: str) -> None:
     @click.option("--workspace", "-w", default=".", help="Directory to initialize.")
     def init(workspace: str) -> None:
         """Initialize a .vectorless/ workspace."""
    -    raise NotImplementedError
    +    init_cmd(workspace)
     
     
     @app.command()
    @@ -44,21 +57,28 @@ def add(
     
         PATH can be a file (.md, .pdf) or a directory.
         """
    -    raise NotImplementedError
    +    add_cmd(
    +        path,
    +        recursive=recursive,
    +        fmt=fmt,
    +        force=force,
    +        jobs=jobs,
    +        verbose=verbose,
    +    )
     
     
     @app.command("list")
     @click.option("--format", "fmt", type=click.Choice(["table", "json"]), default="table")
    -def list_cmd(fmt: str) -> None:
    +def list_documents(fmt: str) -> None:
         """List all indexed documents."""
    -    raise NotImplementedError
    +    list_cmd(fmt=fmt)
     
     
     @app.command()
     @click.argument("doc_id")
     def info(doc_id: str) -> None:
         """Show details of an indexed document."""
    -    raise NotImplementedError
    +    info_cmd(doc_id)
     
     
     @app.command()
    @@ -66,7 +86,7 @@ def info(doc_id: str) -> None:
     @click.confirmation_option(prompt="Remove this document index?")
     def remove(doc_id: str) -> None:
         """Remove a document from the index."""
    -    raise NotImplementedError
    +    remove_cmd(doc_id)
     
     
     # ── Query commands ──────────────────────────────────────────
    @@ -90,7 +110,14 @@ def query(
     
         QUESTION is the natural-language question to ask.
         """
    -    raise NotImplementedError
    +    query_cmd(
    +        question,
    +        doc_ids=doc,
    +        workspace_scope=workspace_scope,
    +        fmt=fmt,
    +        verbose=verbose,
    +        timeout_secs=max_tokens,
    +    )
     
     
     @app.command()
    @@ -101,7 +128,7 @@ def ask(doc: Optional[str], verbose: bool) -> None:
     
         Start a multi-turn conversation with your documents.
         """
    -    raise NotImplementedError
    +    ask_cmd(doc_id=doc, verbose=verbose)
     
     
     # ── Debug / tool commands ───────────────────────────────────
    @@ -113,20 +140,25 @@ def ask(doc: Optional[str], verbose: bool) -> None:
     @click.option("--show-keywords", is_flag=True, help="Show routing keywords.")
     def tree(doc_id: str, depth: Optional[int], show_summary: bool, show_keywords: bool) -> None:
         """Visualize document tree structure."""
    -    raise NotImplementedError
    +    tree_cmd(
    +        doc_id,
    +        depth=depth,
    +        show_summary=show_summary,
    +        show_keywords=show_keywords,
    +    )
     
     
     @app.command()
     def stats() -> None:
         """Show workspace statistics."""
    -    raise NotImplementedError
    +    stats_cmd()
     
     
     @app.command("config")
     @click.argument("key", required=False)
     @click.argument("value", required=False)
     @click.option("--init", "init_config", is_flag=True, help="Re-initialize default config.")
    -def config_cmd(key: Optional[str], value: Optional[str], init_config: bool) -> None:
    +def config_cli(key: Optional[str], value: Optional[str], init_config: bool) -> None:
         """View or modify configuration.
     
         \b
    @@ -135,4 +167,4 @@ def config_cmd(key: Optional[str], value: Optional[str], init_config: bool) -> N
         vectorless-cli config llm.model gpt-4o   Set a value
         vectorless-cli config --init             Reset to defaults
         """
    -    raise NotImplementedError
    +    config_cmd(key=key, value=value, init_config=init_config)
    diff --git a/python/vectorless/cli/output.py b/python/vectorless/cli/output.py
    index 531058a..4c131f1 100644
    --- a/python/vectorless/cli/output.py
    +++ b/python/vectorless/cli/output.py
    @@ -1,7 +1,10 @@
     """Output formatting — text, json, table."""
     
    -from typing import Any, Optional
    +from __future__ import annotations
    +
    +import json
     from enum import Enum
    +from typing import Any, Dict, List, Optional
     
     
     class OutputFormat(Enum):
    @@ -11,34 +14,144 @@ class OutputFormat(Enum):
     
     
     def format_result(data: Any, fmt: OutputFormat) -> str:
    -    """Format a result dict for terminal output.
    +    """Format a result dict for terminal output."""
    +    if fmt == OutputFormat.JSON:
    +        return format_json(data)
    +    return format_text(data)
     
    -    Args:
    -        data: Structured data to format.
    -        fmt: Target output format.
     
    -    Returns:
    -        Formatted string ready to print.
    -    """
    -    raise NotImplementedError
    +def format_text(data: Any) -> str:
    +    """Format data as readable text."""
    +    if isinstance(data, dict):
    +        lines = []
    +        for key, value in data.items():
    +            lines.append(f"  {key}: {value}")
    +        return "\n".join(lines)
    +    return str(data)
     
     
    -def format_documents_table(documents: list[dict]) -> str:
    +def format_documents_table(documents: List[Any]) -> str:
         """Format a list of documents as a table.
     
    -    Columns: Doc ID | Title | Format | Nodes | Pages | Indexed At
    +    Columns: Doc ID | Title | Format | Pages | Source
     
    -    Args:
    -        documents: List of document info dicts.
    +    Uses rich if available, plain text otherwise.
    +    """
    +    if not documents:
    +        return "No documents indexed."
    +
    +    try:
    +        from rich.console import Console
    +        from rich.table import Table
    +
    +        table = Table(title="Indexed Documents")
    +        table.add_column("Doc ID", style="cyan", no_wrap=True, max_width=12)
    +        table.add_column("Title", style="white")
    +        table.add_column("Format", style="green")
    +        table.add_column("Pages", style="yellow", justify="right")
    +        table.add_column("Source", style="dim")
    +
    +        for doc in documents:
    +            doc_id = doc.id if hasattr(doc, "id") else str(doc.get("id", ""))
    +            name = doc.name if hasattr(doc, "name") else str(doc.get("name", ""))
    +            fmt = doc.format if hasattr(doc, "format") else str(doc.get("format", ""))
    +            pages = doc.page_count if hasattr(doc, "page_count") else doc.get("page_count")
    +            source = (
    +                doc.source_path if hasattr(doc, "source_path") else doc.get("source_path")
    +            )
    +            table.add_row(
    +                doc_id[:12],
    +                name,
    +                fmt,
    +                str(pages) if pages else "-",
    +                str(source) if source else "-",
    +            )
    +
    +        from io import StringIO
    +
    +        buf = StringIO()
    +        console = Console(file=buf, force_terminal=True)
    +        console.print(table)
    +        return buf.getvalue()
    +
    +    except ImportError:
    +        # Plain text fallback
    +        lines = []
    +        header = f"{'Doc ID':<14} {'Title':<30} {'Format':<10} {'Pages':>6} {'Source'}"
    +        lines.append(header)
    +        lines.append("-" * len(header))
    +
    +        for doc in documents:
    +            doc_id = (doc.id if hasattr(doc, "id") else str(doc.get("id", "")))[:12]
    +            name = (doc.name if hasattr(doc, "name") else str(doc.get("name", "")))[:28]
    +            fmt = doc.format if hasattr(doc, "format") else str(doc.get("format", ""))
    +            pages = doc.page_count if hasattr(doc, "page_count") else doc.get("page_count")
    +            source = (
    +                doc.source_path if hasattr(doc, "source_path") else doc.get("source_path")
    +            )
    +            lines.append(
    +                f"{doc_id:<14} {name:<30} {fmt:<10} {str(pages or '-'):>6} {source or '-'}"
    +            )
    +
    +        return "\n".join(lines)
    +
    +
    +def format_query_result(
    +    result: Any,
    +    fmt: OutputFormat = OutputFormat.TEXT,
    +    verbose: bool = False,
    +) -> str:
    +    """Format query results for output.
     
    -    Returns:
    -        Formatted table string (uses comfy-table or rich).
    +    Args:
    +        result: QueryResponse or similar with items and failed.
    +        fmt: Output format.
    +        verbose: Show evidence details.
         """
    -    raise NotImplementedError
    +    if fmt == OutputFormat.JSON:
    +        if hasattr(result, "to_dict"):
    +            return format_json(result.to_dict())
    +        return format_json(result)
    +
    +    lines = []
    +    items = result.items if hasattr(result, "items") else result.get("items", [])
    +
    +    for item in items:
    +        content = item.content if hasattr(item, "content") else item.get("content", "")
    +        doc_id = item.doc_id if hasattr(item, "doc_id") else item.get("doc_id", "")
    +        confidence = (
    +            item.confidence if hasattr(item, "confidence") else item.get("confidence", 0)
    +        )
    +
    +        lines.append(f"[{doc_id}] (confidence: {confidence:.2f})")
    +        lines.append(f"  {content}")
    +
    +        if verbose:
    +            evidence = (
    +                item.evidence if hasattr(item, "evidence") else item.get("evidence", [])
    +            )
    +            if evidence:
    +                lines.append("  Evidence:")
    +                for ev in evidence:
    +                    title = ev.title if hasattr(ev, "title") else ev.get("title", "")
    +                    path = ev.path if hasattr(ev, "path") else ev.get("path", "")
    +                    lines.append(f"    - {title} ({path})")
    +
    +        lines.append("")
    +
    +    failed = result.failed if hasattr(result, "failed") else result.get("failed", [])
    +    if failed:
    +        lines.append("Failures:")
    +        for f in failed:
    +            source = f.source if hasattr(f, "source") else f.get("source", "")
    +            error = f.error if hasattr(f, "error") else f.get("error", "")
    +            lines.append(f"  {source}: {error}")
    +
    +    return "\n".join(lines)
     
     
     def format_tree(
    -    nodes: list[dict],
    +    nodes: List[Dict],
         *,
         max_depth: Optional[int] = None,
         show_summary: bool = False,
    @@ -46,30 +159,45 @@ def format_tree(
     ) -> str:
         """Format document tree as indented tree view.
     
    -    Args:
    -        nodes: Flat list of tree nodes with parent references.
    -        max_depth: Max depth to display.
    -        show_summary: Include summaries.
    -        show_keywords: Include routing keywords.
    -
    -    Returns:
    -        Indented tree string.
    +    Limited implementation without Rust tree exposure.
    +    Displays graph structure instead.
         """
    -    raise NotImplementedError
    +    if not nodes:
    +        return "No tree data available."
     
    +    lines = []
    +    for node in nodes:
    +        indent = "  " * node.get("depth", 0)
    +        title = node.get("title", "untitled")
    +        lines.append(f"{indent}├── {title}")
    +        if show_summary and node.get("summary"):
    +            lines.append(f"{indent}│   summary: {node['summary'][:80]}...")
    +        if show_keywords and node.get("keywords"):
    +            kw = ", ".join(node["keywords"][:5])
    +            lines.append(f"{indent}│   keywords: {kw}")
     
    -def format_navigation_steps(steps: list[dict]) -> str:
    -    """Format Agent navigation steps for verbose mode.
    +    return "\n".join(lines)
     
    -    Args:
    -        steps: List of navigation step dicts with action, target, reasoning.
     
    -    Returns:
    -        Step-by-step navigation log string.
    -    """
    -    raise NotImplementedError
    +def format_navigation_steps(steps: List[Dict]) -> str:
    +    """Format Agent navigation steps for verbose mode."""
    +    if not steps:
    +        return ""
    +
    +    lines = []
    +    for i, step in enumerate(steps, 1):
    +        action = step.get("action", "?")
    +        target = step.get("target", "")
    +        reasoning = step.get("reasoning", "")
    +        lines.append(f"  Step {i}: {action} {target}")
    +        if reasoning:
    +            lines.append(f"    Reason: {reasoning}")
    +
    +    return "\n".join(lines)
     
     
     def format_json(data: Any) -> str:
         """Format data as indented JSON."""
    -    raise NotImplementedError
    +    if hasattr(data, "to_dict"):
    +        data = data.to_dict()
    +    return json.dumps(data, indent=2, default=str, ensure_ascii=False)
    diff --git a/python/vectorless/config/__init__.py b/python/vectorless/config/__init__.py
    new file mode 100644
    index 0000000..7a925a6
    --- /dev/null
    +++ b/python/vectorless/config/__init__.py
    @@ -0,0 +1,25 @@
    +"""Configuration models and loading utilities."""
    +
    +from vectorless.config.loading import load_config, load_config_from_env, load_config_from_file
    +from vectorless.config.models import (
    +    EngineConfig,
    +    LlmConfig,
    +    MetricsConfig,
    +    RetrievalConfig,
    +    RetryConfig,
    +    StorageConfig,
    +    ThrottleConfig,
    +)
    +
    +__all__ = [
    +    "EngineConfig",
    +    "LlmConfig",
    +    "MetricsConfig",
    +    "RetrievalConfig",
    +    "RetryConfig",
    +    "StorageConfig",
    +    "ThrottleConfig",
    +    "load_config",
    +    "load_config_from_env",
    +    "load_config_from_file",
    +]
    diff --git a/python/vectorless/config/loading.py b/python/vectorless/config/loading.py
    new file mode 100644
    index 0000000..445ffbe
    --- /dev/null
    +++ b/python/vectorless/config/loading.py
    @@ -0,0 +1,172 @@
    +"""Configuration loading from environment variables and TOML files."""
    +
    +from __future__ import annotations
    +
    +import os
    +import sys
    +from pathlib import Path
    +from typing import Any, Optional
    +
    +from vectorless.config.models import EngineConfig, LlmConfig, RetrievalConfig, StorageConfig
    +
    +if sys.version_info >= (3, 11):
    +    import tomllib
    +else:
    +    try:
    +        import tomli as tomllib  # type: ignore[no-redef]
    +    except ImportError:
    +        tomllib = None  # type: ignore[assignment]
    +
    +
    +def load_config_from_env(prefix: str = "VECTORLESS_") -> EngineConfig:
    +    """Load configuration from environment variables.
    +
    +    Recognized variables::
    +
    +        VECTORLESS_API_KEY         -> llm.api_key
    +        VECTORLESS_MODEL           -> llm.model
    +        VECTORLESS_ENDPOINT        -> llm.endpoint
    +        VECTORLESS_WORKSPACE_DIR   -> storage.workspace_dir
    +        VECTORLESS_TOP_K           -> retrieval.top_k
    +        VECTORLESS_MAX_ITERATIONS  -> retrieval.max_iterations
    +        VECTORLESS_METRICS_ENABLED -> metrics.enabled
    +    """
    +    llm = LlmConfig()
    +    storage = StorageConfig()
    +    retrieval = RetrievalConfig()
    +    metrics_enabled: Optional[bool] = None
    +
    +    env_map = {
    +        f"{prefix}API_KEY": ("llm.api_key", str),
    +        f"{prefix}MODEL": ("llm.model", str),
    +        f"{prefix}ENDPOINT": ("llm.endpoint", str),
    +        f"{prefix}WORKSPACE_DIR": ("storage.workspace_dir", str),
    +        f"{prefix}TOP_K": ("retrieval.top_k", int),
    +        f"{prefix}MAX_ITERATIONS": ("retrieval.max_iterations", int),
    +        f"{prefix}METRICS_ENABLED": ("metrics.enabled", bool),
    +    }
    +
    +    kwargs: dict[str, Any] = {}
    +
    +    for env_key, (path, type_fn) in env_map.items():
    +        value = os.environ.get(env_key)
    +        if value is not None:
    +            if type_fn is bool:
    +                kwargs[path] = value.lower() in ("1", "true", "yes")
    +            else:
    +                kwargs[path] = type_fn(value)
    +
    +    # Apply to sub-models
    +    if f"llm.api_key" in kwargs:
    +        llm = LlmConfig(
    +            api_key=kwargs["llm.api_key"],
    +            model=kwargs.get("llm.model", llm.model),
    +            endpoint=kwargs.get("llm.endpoint", llm.endpoint),
    +        )
    +    elif "llm.model" in kwargs or "llm.endpoint" in kwargs:
    +        llm = LlmConfig(
    +            model=kwargs.get("llm.model", llm.model),
    +            endpoint=kwargs.get("llm.endpoint", llm.endpoint),
    +        )
    +
    +    if "storage.workspace_dir" in kwargs:
    +        storage = StorageConfig(workspace_dir=kwargs["storage.workspace_dir"])
    +
    +    if "retrieval.top_k" in kwargs or "retrieval.max_iterations" in kwargs:
    +        retrieval = RetrievalConfig(
    +            top_k=kwargs.get("retrieval.top_k", retrieval.top_k),
    +            max_iterations=kwargs.get("retrieval.max_iterations", retrieval.max_iterations),
    +        )
    +
    +    if "metrics.enabled" in kwargs:
    +        from vectorless.config.models import MetricsConfig
    +
    +        metrics = MetricsConfig(enabled=kwargs["metrics.enabled"])
    +    else:
    +        from vectorless.config.models import MetricsConfig
    +
    +        metrics = MetricsConfig()
    +
    +    return EngineConfig(llm=llm, storage=storage, retrieval=retrieval, metrics=metrics)
    +
    +
    +def load_config_from_file(path: Path) -> EngineConfig:
    +    """Load configuration from a TOML file.
    +
    +    Expected format::
    +
    +        [llm]
    +        model = "gpt-4o"
    +        api_key = "sk-..."
    +        endpoint = "https://api.openai.com/v1"
    +
    +        [llm.throttle]
    +        max_concurrent_requests = 10
    +        requests_per_minute = 500
    +
    +        [retrieval]
    +        top_k = 5
    +        max_iterations = 10
    +
    +        [storage]
    +        workspace_dir = "~/.vectorless"
    +
    +        [metrics]
    +        enabled = true
    +    """
    +    if tomllib is None:
    +        raise ImportError(
    +            "TOML parsing requires 'tomli' on Python < 3.11. "
    +            "Install with: pip install vectorless[cli]"
    +        )
    +
    +    with open(path, "rb") as f:
    +        data = tomllib.load(f)
    +
    +    return EngineConfig(**data)
    +
    +
    +def load_config(
    +    config_file: Optional[Path] = None,
    +    env_prefix: str = "VECTORLESS_",
    +    overrides: Optional[dict[str, Any]] = None,
    +) -> EngineConfig:
    +    """Load configuration with layered precedence.
    +
    +    Merge order (later overrides earlier):
    +        defaults -> config file -> environment variables -> overrides dict
    +    """
    +    # Start with defaults
    +    config_data: dict[str, Any] = {}
    +
    +    # Layer 1: config file
    +    if config_file is not None and config_file.exists():
    +        if tomllib is None:
    +            raise ImportError(
    +                "TOML parsing requires 'tomli' on Python < 3.11. "
    +                "Install with: pip install vectorless[cli]"
    +            )
    +        with open(config_file, "rb") as f:
    +            file_data = tomllib.load(f)
    +        config_data.update(file_data)
    +
    +    # Layer 2: environment variables
    +    env_config = load_config_from_env(prefix=env_prefix)
    +    # Merge: only override if env var was actually set
    +    env_defaults = load_config_from_env.__wrapped__ if hasattr(load_config_from_env, "__wrapped__") else None  # noqa: E501
    +    base = EngineConfig()
    +    env_data: dict[str, Any] = {}
    +    if env_config.llm.api_key != base.llm.api_key or env_config.llm.model != base.llm.model:
    +        env_data["llm"] = env_config.llm.model_dump()
    +    if env_config.storage.workspace_dir != base.storage.workspace_dir:
    +        env_data["storage"] = env_config.storage.model_dump()
    +    if env_config.retrieval != base.retrieval:
    +        env_data["retrieval"] = env_config.retrieval.model_dump()
    +
    +    config_data.update(env_data)
    +
    +    # Layer 3: explicit overrides
    +    if overrides:
    +        config_data.update(overrides)
    +
    +    return EngineConfig(**config_data)
    diff --git a/python/vectorless/config/models.py b/python/vectorless/config/models.py
    new file mode 100644
    index 0000000..57da548
    --- /dev/null
    +++ b/python/vectorless/config/models.py
    @@ -0,0 +1,89 @@
    +"""Pydantic configuration models for Vectorless Engine."""
    +
    +from __future__ import annotations
    +
    +from typing import Optional
    +
    +from pydantic import BaseModel, Field
    +
    +from vectorless._core import Config as RustConfig
    +
    +
    +class ThrottleConfig(BaseModel):
    +    """LLM request throttling."""
    +
    +    max_concurrent_requests: int = 10
    +    requests_per_minute: int = 500
    +
    +
    +class RetryConfig(BaseModel):
    +    """LLM request retry policy."""
    +
    +    max_attempts: int = 3
    +    initial_delay_secs: float = 1.0
    +    max_delay_secs: float = 30.0
    +
    +
    +class LlmConfig(BaseModel):
    +    """LLM connection configuration."""
    +
    +    model: str = ""
    +    api_key: Optional[str] = None
    +    endpoint: Optional[str] = None
    +    throttle: ThrottleConfig = ThrottleConfig()
    +    retry: RetryConfig = RetryConfig()
    +
    +
    +class MetricsConfig(BaseModel):
    +    """Metrics collection configuration."""
    +
    +    enabled: bool = True
    +
    +
    +class RetrievalConfig(BaseModel):
    +    """Retrieval behavior configuration."""
    +
    +    top_k: int = Field(default=3, ge=1)
    +    max_iterations: int = Field(default=10, ge=1)
    +
    +
    +class StorageConfig(BaseModel):
    +    """Storage and workspace configuration."""
    +
    +    workspace_dir: str = "~/.vectorless"
    +
    +
    +class EngineConfig(BaseModel):
    +    """Full engine configuration.
    +
    +    Usage::
    +
    +        from vectorless import EngineConfig
    +
    +        config = EngineConfig(
    +            llm=LlmConfig(model="gpt-4o", api_key="sk-..."),
    +            retrieval=RetrievalConfig(top_k=5),
    +        )
    +
    +        # Convert to Rust Config for Engine construction
    +        rust_config = config.to_rust_config()
    +    """
    +
    +    llm: LlmConfig = LlmConfig()
    +    metrics: MetricsConfig = MetricsConfig()
    +    retrieval: RetrievalConfig = RetrievalConfig()
    +    storage: StorageConfig = StorageConfig()
    +
    +    def to_rust_config(self) -> RustConfig:
    +        """Convert to the Rust-backed Config object.
    +
    +        Calls the setter methods defined in python/src/config.rs.
    +        """
    +        cfg = RustConfig()
    +        cfg.set_workspace_dir(self.storage.workspace_dir)
    +        cfg.set_top_k(self.retrieval.top_k)
    +        cfg.set_max_iterations(self.retrieval.max_iterations)
    +        cfg.set_max_concurrent_requests(self.llm.throttle.max_concurrent_requests)
    +        cfg.set_requests_per_minute(self.llm.throttle.requests_per_minute)
    +        cfg.set_metrics_enabled(self.metrics.enabled)
    +        return cfg
    diff --git a/python/vectorless/events.py b/python/vectorless/events.py
    new file mode 100644
    index 0000000..6d76499
    --- /dev/null
    +++ b/python/vectorless/events.py
    @@ -0,0 +1,118 @@
    +"""Python-side event callback system for progress monitoring.
    +
    +This is a pure-Python callback registry. It fires events based on
    +result data after operations complete. Real-time streaming events
    +require the Rust ``query_stream()`` to be exposed via PyO3 (future work).
    +"""
    +
    +from __future__ import annotations
    +
    +from dataclasses import dataclass, field
    +from enum import Enum
    +from typing import Any, Callable, List, Optional
    +
    +
    +class IndexEventType(str, Enum):
    +    STARTED = "started"
    +    FORMAT_DETECTED = "format_detected"
    +    PARSING_PROGRESS = "parsing_progress"
    +    TREE_BUILT = "tree_built"
    +    SUMMARY_PROGRESS = "summary_progress"
    +    COMPLETE = "complete"
    +    ERROR = "error"
    +
    +
    +class QueryEventType(str, Enum):
    +    STARTED = "started"
    +    NODE_VISITED = "node_visited"
    +    CANDIDATE_FOUND = "candidate_found"
    +    SUFFICIENCY_CHECK = "sufficiency_check"
    +    COMPLETE = "complete"
    +    ERROR = "error"
    +
    +
    +@dataclass
    +class IndexEventData:
    +    """Data payload for index events."""
    +
    +    event_type: IndexEventType
    +    path: Optional[str] = None
    +    format: Optional[str] = None
    +    percent: Optional[int] = None
    +    node_count: Optional[int] = None
    +    completed: Optional[int] = None
    +    total: Optional[int] = None
    +    doc_id: Optional[str] = None
    +    message: Optional[str] = None
    +
    +
    +@dataclass
    +class QueryEventData:
    +    """Data payload for query events."""
    +
    +    event_type: QueryEventType
    +    query: Optional[str] = None
    +    node_id: Optional[str] = None
    +    title: Optional[str] = None
    +    score: Optional[float] = None
    +    tokens: Optional[int] = None
    +    total_results: Optional[int] = None
    +    confidence: Optional[float] = None
    +    message: Optional[str] = None
    +
    +
    +IndexEventHandler = Callable[[IndexEventData], None]
    +QueryEventHandler = Callable[[QueryEventData], None]
    +WorkspaceEventHandler = Callable[[dict], None]
    +
    +
    +class EventEmitter:
    +    """Python-side event emitter for progress monitoring.
    +
    +    Usage::
    +
    +        from vectorless import Session, EventEmitter
    +
    +        events = EventEmitter()
    +
    +        @events.on_query
    +        def on_query(event):
    +            print(f"Query: {event.query}")
    +
    +        session = Session(api_key="sk-...", model="gpt-4o", events=events)
    +    """
    +
    +    def __init__(self) -> None:
    +        self._index_handlers: List[IndexEventHandler] = []
    +        self._query_handlers: List[QueryEventHandler] = []
    +        self._workspace_handlers: List[WorkspaceEventHandler] = []
    +
    +    def on_index(self, handler: IndexEventHandler) -> "EventEmitter":
    +        """Register an index event handler. Can be used as decorator."""
    +        self._index_handlers.append(handler)
    +        return self
    +
    +    def on_query(self, handler: QueryEventHandler) -> "EventEmitter":
    +        """Register a query event handler. Can be used as decorator."""
    +        self._query_handlers.append(handler)
    +        return self
    +
    +    def on_workspace(self, handler: WorkspaceEventHandler) -> "EventEmitter":
    +        """Register a workspace event handler. Can be used as decorator."""
    +        self._workspace_handlers.append(handler)
    +        return self
    +
    +    def emit_index(self, event: IndexEventData) -> None:
    +        """Emit an index event to all registered handlers."""
    +        for handler in self._index_handlers:
    +            handler(event)
    +
    +    def emit_query(self, event: QueryEventData) -> None:
    +        """Emit a query event to all registered handlers."""
    +        for handler in self._query_handlers:
    +            handler(event)
    +
    +    def emit_workspace(self, event: dict) -> None:
    +        """Emit a workspace event to all registered handlers."""
    +        for handler in self._workspace_handlers:
    +            handler(event)
    diff --git a/python/vectorless/jupyter.py b/python/vectorless/jupyter.py
    new file mode 100644
    index 0000000..2d63a92
    --- /dev/null
    +++ b/python/vectorless/jupyter.py
    @@ -0,0 +1,140 @@
    +"""Jupyter rich display integration for Vectorless results."""
    +
    +from __future__ import annotations
    +
    +import html as html_module
    +from typing import Any, List, Optional
    +
    +from vectorless.types.results import QueryResponse, QueryResult, Evidence
    +
    +
    +class QueryResultDisplay:
    +    """Rich display for query results in Jupyter notebooks.
    +
    +    Implements _repr_html_(), _repr_markdown_(), and _repr_json_()
    +    for automatic rendering.
    +    """
    +
    +    def __init__(self, result: QueryResponse) -> None:
    +        self._result = result
    +
    +    def _repr_html_(self) -> str:
    +        rows = []
    +        for item in self._result.items:
    +            escaped_content = html_module.escape(item.content[:500])
    +            confidence_bar = _confidence_bar(item.confidence)
    +            evidence_html = _evidence_list_html(item.evidence)
    +            rows.append(
    +                f"
    " + f"
    " + f"{html_module.escape(item.doc_id)}" + f"{confidence_bar}" + f"
    " + f"

    {escaped_content}

    " + f"{evidence_html}" + f"
    " + ) + + failed_html = "" + if self._result.has_failures(): + failed_items = [] + for f in self._result.failed: + failed_items.append( + f"
  • {html_module.escape(f.source)}: " + f"{html_module.escape(f.error)}
  • " + ) + failed_html = ( + f"
    " + f"Failures:
      {''.join(failed_items)}
    " + ) + + return ( + f"
    " + f"

    Results ({len(self._result.items)})

    " + f"{''.join(rows)}" + f"{failed_html}" + f"
    " + ) + + def _repr_markdown_(self) -> str: + lines = [f"## Results ({len(self._result.items)})\n"] + for item in self._result.items: + lines.append(f"### {item.doc_id} (confidence: {item.confidence:.2f})\n") + lines.append(f"{item.content}\n") + if item.evidence: + lines.append("**Evidence:**\n") + for ev in item.evidence: + lines.append(f"- **{ev.title}** ({ev.path})") + lines.append("") + return "\n".join(lines) + + def _repr_json_(self) -> dict: + return self._result.to_dict() + + +class DocumentGraphDisplay: + """Rich display for document relationship graphs.""" + + def __init__(self, graph: Any) -> None: + self._graph = graph + + def _repr_html_(self) -> str: + node_count = self._graph.node_count() if self._graph else 0 + edge_count = self._graph.edge_count() if self._graph else 0 + doc_ids = self._graph.doc_ids() if self._graph else [] + + rows = [] + for doc_id in doc_ids: + node = self._graph.get_node(doc_id) + if node: + rows.append( + f"
    " + f"" + f"" + ) + + return ( + f"
    " + f"

    Document Graph

    " + f"

    {node_count} nodes, {edge_count} edges

    " + f"
    {html_module.escape(node.doc_id)}{html_module.escape(node.title)}{node.node_count}
    " + f"" + f"" + f"" + f"" + f"{''.join(rows)}
    Doc IDTitleNodes
    " + ) + + +def _confidence_bar(confidence: float) -> str: + """Generate an HTML confidence indicator bar.""" + pct = int(confidence * 100) + if confidence >= 0.8: + color = "#4caf50" + elif confidence >= 0.5: + color = "#ff9800" + else: + color = "#f44336" + return ( + f"
    " + f"{pct}%" + f"
    " + f"
    " + f"
    " + ) + + +def _evidence_list_html(evidence: List[Evidence]) -> str: + """Generate HTML for evidence items.""" + if not evidence: + return "" + items = [] + for ev in evidence[:5]: + items.append( + f"
  • {html_module.escape(ev.title)} " + f"{html_module.escape(ev.path)}
  • " + ) + extra = f" (+{len(evidence) - 5} more)" if len(evidence) > 5 else "" + return f"" diff --git a/python/vectorless/py.typed b/python/vectorless/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/python/vectorless/session.py b/python/vectorless/session.py new file mode 100644 index 0000000..5a31c5b --- /dev/null +++ b/python/vectorless/session.py @@ -0,0 +1,336 @@ +"""High-level Vectorless Session API. + +``Session`` is the single recommended entry point for all operations. +It wraps the Rust Engine with Pythonic ergonomics: typed configuration, +event callbacks, flexible input methods, and batch operations. +""" + +from __future__ import annotations + +import asyncio +from pathlib import Path +from typing import Any, List, Optional, Union + +from vectorless._core import Engine, IndexContext, IndexOptions, QueryContext +from vectorless.config import EngineConfig, load_config_from_env +from vectorless.events import ( + EventEmitter, + IndexEventData, + IndexEventType, + QueryEventData, + QueryEventType, +) +from vectorless.streaming import StreamingQueryResult +from vectorless.types.graph import DocumentGraphWrapper +from vectorless.types.results import ( + IndexResultWrapper, + QueryResponse, +) + + +class Session: + """High-level Vectorless session. + + Configuration precedence: constructor args > env vars > config file > defaults. + + Usage:: + + from vectorless import Session + + session = Session(api_key="sk-...", model="gpt-4o") + result = await session.index(path="./report.pdf") + answer = await session.ask("What is the Q4 revenue?", doc_ids=[result.doc_id]) + print(answer.single().content) + + Or from environment variables:: + + # VECTORLESS_API_KEY, VECTORLESS_MODEL set in env + session = Session.from_env() + """ + + def __init__( + self, + api_key: Optional[str] = None, + model: Optional[str] = None, + endpoint: Optional[str] = None, + config: Optional[EngineConfig] = None, + config_file: Optional[Union[str, Path]] = None, + events: Optional[EventEmitter] = None, + ) -> None: + self._events = events or EventEmitter() + + # Resolve config: constructor > env > file > defaults + if config is not None: + self._config = config + else: + self._config = self._resolve_config(api_key, model, endpoint, config_file) + + # Build Rust engine + rust_config = self._config.to_rust_config() + self._engine = Engine( + api_key=self._config.llm.api_key, + model=self._config.llm.model or None, + endpoint=self._config.llm.endpoint or None, + config=rust_config, + ) + + @classmethod + def from_env(cls, events: Optional[EventEmitter] = None) -> "Session": + """Create a Session from environment variables only.""" + config = load_config_from_env() + return cls(config=config, events=events) + + @classmethod + def from_config_file( + cls, + path: Union[str, Path], + events: Optional[EventEmitter] = None, + ) -> "Session": + """Create a Session from a TOML config file.""" + from vectorless.config import load_config_from_file + + config = load_config_from_file(Path(path)) + return cls(config=config, events=events) + + def _resolve_config( + self, + api_key: Optional[str], + model: Optional[str], + endpoint: Optional[str], + config_file: Optional[Union[str, Path]], + ) -> EngineConfig: + from vectorless.config import load_config + + overrides: dict[str, Any] = {} + llm_overrides: dict[str, Any] = {} + if api_key is not None: + llm_overrides["api_key"] = api_key + if model is not None: + llm_overrides["model"] = model + if endpoint is not None: + llm_overrides["endpoint"] = endpoint + if llm_overrides: + overrides["llm"] = llm_overrides + + return load_config( + config_file=Path(config_file) if config_file else None, + overrides=overrides if overrides else None, + ) + + # ── Indexing ────────────────────────────────────────────── + + async def index( + self, + path: Optional[Union[str, Path]] = None, + paths: Optional[List[Union[str, Path]]] = None, + directory: Optional[Union[str, Path]] = None, + content: Optional[str] = None, + bytes_data: Optional[bytes] = None, + format: str = "markdown", + name: Optional[str] = None, + mode: str = "default", + force: bool = False, + ) -> IndexResultWrapper: + """Index a document from various sources. + + Exactly one source must be provided: path, paths, directory, + content, or bytes_data. + """ + sources_provided = sum( + x is not None for x in [path, paths, directory, content, bytes_data] + ) + if sources_provided != 1: + raise ValueError( + "Provide exactly one source: path, paths, directory, content, or bytes_data" + ) + + if force: + mode = "force" + + # Build IndexContext + if path is not None: + ctx = IndexContext.from_path(str(path)) + elif paths is not None: + ctx = IndexContext.from_paths([str(p) for p in paths]) + elif directory is not None: + ctx = IndexContext.from_dir(str(directory), recursive=True) + elif content is not None: + ctx = IndexContext.from_content(content, format) + elif bytes_data is not None: + ctx = IndexContext.from_bytes(list(bytes_data), format) + else: + raise ValueError("No source provided") + + if name is not None: + ctx = ctx.with_name(name) + if mode != "default": + ctx = ctx.with_mode(mode) + + # Emit start event + source_desc = str(path or paths or directory or "" or "") + self._events.emit_index( + IndexEventData(event_type=IndexEventType.STARTED, path=source_desc) + ) + + result = await self._engine.index(ctx) + + # Emit complete event + self._events.emit_index( + IndexEventData( + event_type=IndexEventType.COMPLETE, + doc_id=result.doc_id, + message=f"Indexed {result.doc_id or 'documents'}", + ) + ) + + return IndexResultWrapper.from_rust(result) + + async def index_batch( + self, + paths: List[Union[str, Path]], + *, + mode: str = "default", + jobs: int = 1, + force: bool = False, + progress: bool = True, + ) -> List[IndexResultWrapper]: + """Index multiple files with optional concurrency. + + Args: + paths: List of file paths to index. + mode: Indexing mode ("default", "force", "incremental"). + jobs: Max concurrent indexing jobs. + force: Force re-index existing documents. + progress: Emit progress events. + """ + semaphore = asyncio.Semaphore(jobs) + results: List[IndexResultWrapper] = [] + + async def _index_one(p: Union[str, Path]) -> IndexResultWrapper: + async with semaphore: + self._events.emit_index( + IndexEventData( + event_type=IndexEventType.STARTED, + path=str(p), + ) + ) + result = await self.index(path=p, mode=mode, force=force) + if progress: + self._events.emit_index( + IndexEventData( + event_type=IndexEventType.COMPLETE, + path=str(p), + doc_id=result.doc_id, + ) + ) + return result + + tasks = [_index_one(p) for p in paths] + results = await asyncio.gather(*tasks) + return list(results) + + # ── Querying ────────────────────────────────────────────── + + async def ask( + self, + question: str, + *, + doc_ids: Optional[List[str]] = None, + workspace_scope: bool = False, + timeout_secs: Optional[int] = None, + ) -> QueryResponse: + """Ask a question and get results with source attribution. + + Args: + question: Natural language query. + doc_ids: Limit query to specific document IDs. + workspace_scope: Query across all indexed documents. + timeout_secs: Per-operation timeout. + """ + # Emit start event + self._events.emit_query( + QueryEventData( + event_type=QueryEventType.STARTED, + query=question, + ) + ) + + ctx = QueryContext(question) + if doc_ids is not None: + ctx = ctx.with_doc_ids(doc_ids) + elif workspace_scope: + ctx = ctx.with_workspace() + if timeout_secs is not None: + ctx = ctx.with_timeout_secs(timeout_secs) + + result = await self._engine.query(ctx) + response = QueryResponse.from_rust(result) + + # Emit complete event + self._events.emit_query( + QueryEventData( + event_type=QueryEventType.COMPLETE, + query=question, + total_results=len(response.items), + ) + ) + + return response + + async def query_stream( + self, + question: str, + **kwargs: Any, + ) -> StreamingQueryResult: + """Stream query progress as an async iterator. + + Note: Currently wraps ``ask()`` and yields synthetic events. + Real-time streaming requires Rust-side ``query_stream()`` exposure. + """ + response = await self.ask(question, **kwargs) + return StreamingQueryResult(response) + + # ── Document Management ─────────────────────────────────── + + async def list_documents(self) -> list: + """List all indexed documents.""" + return await self._engine.list() + + async def remove_document(self, doc_id: str) -> bool: + """Remove a document by ID.""" + return await self._engine.remove(doc_id) + + async def document_exists(self, doc_id: str) -> bool: + """Check if a document exists.""" + return await self._engine.exists(doc_id) + + async def clear_all(self) -> int: + """Remove all indexed documents. Returns count removed.""" + return await self._engine.clear() + + # ── Graph ───────────────────────────────────────────────── + + async def get_graph(self) -> Optional[DocumentGraphWrapper]: + """Get the cross-document relationship graph.""" + graph = await self._engine.get_graph() + if graph is None: + return None + return DocumentGraphWrapper.from_rust(graph) + + # ── Metrics ─────────────────────────────────────────────── + + def metrics_report(self) -> Any: + """Get a comprehensive metrics report.""" + return self._engine.metrics_report() + + # ── Context Manager ─────────────────────────────────────── + + async def __aenter__(self) -> "Session": + return self + + async def __aexit__(self, *args: Any) -> None: + pass + + def __repr__(self) -> str: + model = self._config.llm.model or "unknown" + return f"Session(model={model!r})" diff --git a/python/vectorless/streaming.py b/python/vectorless/streaming.py new file mode 100644 index 0000000..d3109ef --- /dev/null +++ b/python/vectorless/streaming.py @@ -0,0 +1,77 @@ +"""Streaming query compatibility layer. + +Provides an async iterator interface for queries. Currently wraps the +synchronous ``query()`` and yields synthetic progress events. Real-time +streaming requires exposing ``query_stream()`` from Rust via PyO3. +""" + +from __future__ import annotations + +from typing import AsyncIterator, Dict, List, Optional + +from vectorless.types.results import QueryResponse + + +class StreamingQueryResult: + """Async iterator for query progress events. + + Usage:: + + stream = await session.query_stream("What is the revenue?") + async for event in stream: + print(event) + result = stream.result + """ + + def __init__(self, response: QueryResponse) -> None: + self._response = response + self._consumed = False + + def __aiter__(self) -> AsyncIterator[Dict]: + return self._iterate() + + async def _iterate(self) -> AsyncIterator[Dict]: + if self._consumed: + return + self._consumed = True + + # Synthetic events from the final result + yield {"type": "started", "message": "Query started"} + + for i, item in enumerate(self._response.items): + yield { + "type": "candidate_found", + "doc_id": item.doc_id, + "score": item.score, + "confidence": item.confidence, + "index": i, + } + + for j, evidence in enumerate(item.evidence): + yield { + "type": "evidence", + "doc_id": item.doc_id, + "evidence_title": evidence.title, + "evidence_path": evidence.path, + "content_length": len(evidence.content), + "index": j, + } + + if self._response.has_failures(): + for failed in self._response.failed: + yield { + "type": "error", + "source": failed.source, + "error": failed.error, + } + + yield { + "type": "completed", + "total_results": len(self._response.items), + "total_failures": len(self._response.failed), + } + + @property + def result(self) -> Optional[QueryResponse]: + """Final result, available after iteration completes.""" + return self._response if self._consumed else None diff --git a/python/vectorless/types/__init__.py b/python/vectorless/types/__init__.py new file mode 100644 index 0000000..f8d8947 --- /dev/null +++ b/python/vectorless/types/__init__.py @@ -0,0 +1,37 @@ +"""Typed Python wrappers for Vectorless result and graph types.""" + +from vectorless.types.graph import ( + DocumentGraphWrapper, + EdgeEvidence, + GraphEdge, + GraphNode, + WeightedKeyword, +) +from vectorless.types.results import ( + Evidence, + FailedItem, + IndexItemWrapper, + IndexMetrics, + IndexResultWrapper, + QueryMetrics, + QueryResponse, + QueryResult, +) + +__all__ = [ + # Results + "Evidence", + "FailedItem", + "IndexItemWrapper", + "IndexMetrics", + "IndexResultWrapper", + "QueryMetrics", + "QueryResponse", + "QueryResult", + # Graph + "DocumentGraphWrapper", + "EdgeEvidence", + "GraphEdge", + "GraphNode", + "WeightedKeyword", +] diff --git a/python/vectorless/types/graph.py b/python/vectorless/types/graph.py new file mode 100644 index 0000000..4b327e9 --- /dev/null +++ b/python/vectorless/types/graph.py @@ -0,0 +1,107 @@ +"""Typed Python wrappers for PyO3 graph types.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Tuple + + +@dataclass(frozen=True) +class WeightedKeyword: + """A keyword with importance weight.""" + + keyword: str + weight: float + + @classmethod + def from_rust(cls, item: object) -> WeightedKeyword: + return cls(keyword=item.keyword, weight=item.weight) + + +@dataclass(frozen=True) +class EdgeEvidence: + """Evidence for a graph edge connecting two documents.""" + + shared_keyword_count: int = 0 + keyword_jaccard: float = 0.0 + shared_keywords: Tuple[Tuple[str, float], ...] = () + + @classmethod + def from_rust(cls, item: object) -> EdgeEvidence: + keywords = tuple((kw.keyword, kw.weight) for kw in item.shared_keywords) + return cls( + shared_keyword_count=item.shared_keyword_count, + keyword_jaccard=item.keyword_jaccard, + shared_keywords=keywords, + ) + + +@dataclass(frozen=True) +class GraphEdge: + """An edge in the document relationship graph.""" + + target_doc_id: str + weight: float + evidence: Optional[EdgeEvidence] = None + + @classmethod + def from_rust(cls, item: object) -> GraphEdge: + evidence = EdgeEvidence.from_rust(item.evidence) if item.evidence else None + return cls( + target_doc_id=item.target_doc_id, + weight=item.weight, + evidence=evidence, + ) + + +@dataclass(frozen=True) +class GraphNode: + """A document node in the relationship graph.""" + + doc_id: str + title: str + format: str + node_count: int + top_keywords: List[WeightedKeyword] = field(default_factory=list) + + @classmethod + def from_rust(cls, item: object) -> GraphNode: + keywords = [WeightedKeyword.from_rust(kw) for kw in item.top_keywords] + return cls( + doc_id=item.doc_id, + title=item.title, + format=item.format, + node_count=item.node_count, + top_keywords=keywords, + ) + + +@dataclass +class DocumentGraphWrapper: + """Typed wrapper around the cross-document relationship graph.""" + + _inner: Any + + @classmethod + def from_rust(cls, graph: object) -> DocumentGraphWrapper: + return cls(_inner=graph) + + def node_count(self) -> int: + return self._inner.node_count() + + def edge_count(self) -> int: + return self._inner.edge_count() + + def get_node(self, doc_id: str) -> Optional[GraphNode]: + node = self._inner.get_node(doc_id) + return GraphNode.from_rust(node) if node is not None else None + + def get_neighbors(self, doc_id: str) -> List[GraphEdge]: + neighbors = self._inner.get_neighbors(doc_id) + return [GraphEdge.from_rust(e) for e in neighbors] + + def doc_ids(self) -> List[str]: + return list(self._inner.doc_ids()) + + def is_empty(self) -> bool: + return self._inner.is_empty() diff --git a/python/vectorless/types/results.py b/python/vectorless/types/results.py new file mode 100644 index 0000000..5914d72 --- /dev/null +++ b/python/vectorless/types/results.py @@ -0,0 +1,237 @@ +"""Typed Python wrappers for PyO3 result types.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import Iterator, List, Optional + + +@dataclass(frozen=True) +class Evidence: + """A single piece of evidence with source attribution.""" + + title: str + path: str + content: str + doc_name: Optional[str] = None + + @classmethod + def from_rust(cls, item: object) -> Evidence: + return cls( + title=item.title, + path=item.path, + content=item.content, + doc_name=item.doc_name, + ) + + def to_dict(self) -> dict: + d = {"title": self.title, "path": self.path, "content": self.content} + if self.doc_name is not None: + d["doc_name"] = self.doc_name + return d + + def to_json(self) -> str: + return json.dumps(self.to_dict(), ensure_ascii=False) + + +@dataclass(frozen=True) +class QueryMetrics: + """Metrics from a single query execution.""" + + llm_calls: int = 0 + rounds_used: int = 0 + nodes_visited: int = 0 + evidence_count: int = 0 + evidence_chars: int = 0 + + @classmethod + def from_rust(cls, item: object) -> QueryMetrics: + return cls( + llm_calls=item.llm_calls, + rounds_used=item.rounds_used, + nodes_visited=item.nodes_visited, + evidence_count=item.evidence_count, + evidence_chars=item.evidence_chars, + ) + + def to_dict(self) -> dict: + return { + "llm_calls": self.llm_calls, + "rounds_used": self.rounds_used, + "nodes_visited": self.nodes_visited, + "evidence_count": self.evidence_count, + "evidence_chars": self.evidence_chars, + } + + +@dataclass(frozen=True) +class QueryResult: + """A single document's query result.""" + + doc_id: str + content: str + score: float + confidence: float + node_ids: List[str] = field(default_factory=list) + evidence: List[Evidence] = field(default_factory=list) + metrics: Optional[QueryMetrics] = None + + @classmethod + def from_rust(cls, item: object) -> QueryResult: + evidence = [Evidence.from_rust(e) for e in item.evidence] + metrics = QueryMetrics.from_rust(item.metrics) if item.metrics else None + return cls( + doc_id=item.doc_id, + content=item.content, + score=item.score, + confidence=item.confidence, + node_ids=list(item.node_ids), + evidence=evidence, + metrics=metrics, + ) + + def to_dict(self) -> dict: + d = { + "doc_id": self.doc_id, + "content": self.content, + "score": self.score, + "confidence": self.confidence, + "node_ids": self.node_ids, + "evidence": [e.to_dict() for e in self.evidence], + } + if self.metrics: + d["metrics"] = self.metrics.to_dict() + return d + + +@dataclass(frozen=True) +class FailedItem: + """A failed item in a batch operation.""" + + source: str + error: str + + @classmethod + def from_rust(cls, item: object) -> FailedItem: + return cls(source=item.source, error=item.error) + + +@dataclass(frozen=True) +class QueryResponse: + """Wraps a complete query result (potentially multi-document).""" + + items: List[QueryResult] = field(default_factory=list) + failed: List[FailedItem] = field(default_factory=list) + + @classmethod + def from_rust(cls, result: object) -> QueryResponse: + items = [QueryResult.from_rust(i) for i in result.items] + failed = [FailedItem.from_rust(f) for f in result.failed] + return cls(items=items, failed=failed) + + def single(self) -> Optional[QueryResult]: + """Get the first (single-doc) result item.""" + return self.items[0] if self.items else None + + def has_failures(self) -> bool: + return len(self.failed) > 0 + + def __len__(self) -> int: + return len(self.items) + + def __iter__(self) -> Iterator[QueryResult]: # type: ignore[override] + return iter(self.items) + + def to_dict(self) -> dict: + return { + "items": [i.to_dict() for i in self.items], + "failed": [{"source": f.source, "error": f.error} for f in self.failed], + } + + +@dataclass(frozen=True) +class IndexMetrics: + """Metrics from the indexing pipeline.""" + + total_time_ms: int = 0 + parse_time_ms: int = 0 + build_time_ms: int = 0 + enhance_time_ms: int = 0 + nodes_processed: int = 0 + summaries_generated: int = 0 + summaries_failed: int = 0 + llm_calls: int = 0 + total_tokens_generated: int = 0 + topics_indexed: int = 0 + keywords_indexed: int = 0 + + @classmethod + def from_rust(cls, item: object) -> IndexMetrics: + return cls( + total_time_ms=item.total_time_ms, + parse_time_ms=item.parse_time_ms, + build_time_ms=item.build_time_ms, + enhance_time_ms=item.enhance_time_ms, + nodes_processed=item.nodes_processed, + summaries_generated=item.summaries_generated, + summaries_failed=item.summaries_failed, + llm_calls=item.llm_calls, + total_tokens_generated=item.total_tokens_generated, + topics_indexed=item.topics_indexed, + keywords_indexed=item.keywords_indexed, + ) + + +@dataclass(frozen=True) +class IndexItemWrapper: + """A single indexed document item.""" + + doc_id: str + name: str + format: str + description: Optional[str] = None + source_path: Optional[str] = None + page_count: Optional[int] = None + metrics: Optional[IndexMetrics] = None + + @classmethod + def from_rust(cls, item: object) -> IndexItemWrapper: + metrics = IndexMetrics.from_rust(item.metrics) if item.metrics else None + return cls( + doc_id=item.doc_id, + name=item.name, + format=item.format, + description=item.description, + source_path=item.source_path, + page_count=item.page_count, + metrics=metrics, + ) + + +@dataclass(frozen=True) +class IndexResultWrapper: + """Result of a document indexing operation.""" + + doc_id: Optional[str] = None + items: List[IndexItemWrapper] = field(default_factory=list) + failed: List[FailedItem] = field(default_factory=list) + + @classmethod + def from_rust(cls, result: object) -> IndexResultWrapper: + items = [IndexItemWrapper.from_rust(i) for i in result.items] + failed = [FailedItem.from_rust(f) for f in result.failed] + return cls( + doc_id=result.doc_id, + items=items, + failed=failed, + ) + + def has_failures(self) -> bool: + return len(self.failed) > 0 + + def total(self) -> int: + return len(self.items) + len(self.failed) + + def __len__(self) -> int: + return len(self.items) From 3a760afc40f9e726278c597cccf8ed1a4e2cbb36 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 21 Apr 2026 20:31:12 +0800 Subject: [PATCH 3/3] style(docs): update documentation site styling and navigation - add Font Awesome stylesheet for icon support - add Python and Rust SDK links to navigation bar - remove examples link from navigation bar - update color scheme from amber to rose with new primary colors (#AF788B for light mode, #C9A0AE for dark mode) - adjust navbar padding and layout for better alignment - remove footer configuration and related CSS - implement new hero banner design with rose color scheme - create responsive layout with flex-based positioning - add feature pills and principles card to homepage - update syntax highlighting colors for better contrast - add prevent-scroll functionality for homepage hero section --- docs/docusaurus.config.ts | 69 +- docs/src/css/custom.css | 162 ++-- docs/src/pages/index.module.css | 999 ++++-------------------- docs/src/pages/index.tsx | 643 ++------------- docs/src/theme/Navbar/index.tsx | 6 +- docs/src/theme/Navbar/styles.module.css | 25 +- 6 files changed, 321 insertions(+), 1583 deletions(-) diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 759c457..dc8ee2b 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -16,6 +16,10 @@ const config: Config = { url: 'https://vectorless.dev', baseUrl: '/', + stylesheets: [ + 'https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css', + ], + organizationName: 'vectorlessflow', projectName: 'vectorless', @@ -69,73 +73,12 @@ const config: Config = { target: '_self' // This makes the logo click follow the link in the same window }, items: [ + {to: '/docs/sdk/python', label: 'Python', position: 'left'}, + {to: '/docs/sdk/rust', label: 'Rust', position: 'left'}, {to: '/docs/intro', label: 'Documentation', position: 'left'}, - // { - // href: 'https://github.com/vectorlessflow/vectorless/tree/main/examples', - // label: 'Examples', - // position: 'left', - // target: '_self', - // }, {to: '/blog', label: 'Blog', position: 'left'}, ], }, - footer: { - style: 'light', - links: [ - { - title: 'Product', - items: [ - { - label: 'Getting Started', - to: '/docs/intro', - }, - { - label: 'Documentation', - to: '/docs/intro', - }, - { - label: 'Blog', - to: '/blog', - }, - ], - }, - { - title: 'Integrations', - items: [ - { - label: 'Python SDK', - href: 'https://pypi.org/project/vectorless/', - }, - { - label: 'Rust Crate', - href: 'https://crates.io/crates/vectorless', - }, - { - label: 'API Reference', - to: '/docs/api-reference', - }, - ], - }, - { - title: 'Community', - items: [ - { - label: 'GitHub', - href: 'https://github.com/vectorlessflow/vectorless', - }, - { - label: 'Report a Bug', - href: 'https://github.com/vectorlessflow/vectorless/issues', - }, - { - label: 'Apache 2.0 License', - href: 'https://github.com/vectorlessflow/vectorless/blob/main/LICENSE', - }, - ], - }, - ], - copyright: `Copyright \u00A9 ${new Date().getFullYear()} Vectorless`, - }, prism: { theme: prismThemes.github, darkTheme: prismThemes.dracula, diff --git a/docs/src/css/custom.css b/docs/src/css/custom.css index b1b96b0..179c95c 100644 --- a/docs/src/css/custom.css +++ b/docs/src/css/custom.css @@ -1,66 +1,66 @@ /** * Any CSS included here will be global. The classic template - * bundles Infima by default. Infima is a CSS framework designed to + * bundles Infira by default. Infima is a CSS framework designed to * work well for content-centric websites. * - * Color scheme: dark-first with amber accent. + * Color scheme: rose accent. */ :root { - --primary: #F59E0B; - --primary-dark: #D97706; - --primary-deeper: #B45309; - --primary-light: #FBBF24; - --primary-soft: rgba(245, 158, 11, 0.12); + --primary: #AF788B; + --primary-dark: #8B5E6F; + --primary-deeper: #6D4A58; + --primary-light: #C9A0AE; + --primary-soft: rgba(175, 120, 139, 0.12); --accent-tech: #5E6AD2; --accent-green: #10B981; - --text: #1F2937; - --text-light: #6B7280; + --text: #1E293B; + --text-light: #5B6E8C; --bg: #FFFFFF; - --bg-secondary: #F9FAFB; - --bg-offset: #F3F4F6; + --bg-secondary: #F6F8FA; + --bg-offset: #F6F8FA; --card-bg: #FFFFFF; - --border: #E5E7EB; + --border: #E2E8F0; --code-bg: #0F172A; --code-text: #E2E8F0; - --code-comment: #6B7280; - --code-keyword: #5E6AD2; - - --ifm-color-primary: #F59E0B; - --ifm-color-primary-dark: #D97706; - --ifm-color-primary-darker: #B45309; - --ifm-color-primary-darkest: #92400E; - --ifm-color-primary-light: #FBBF24; - --ifm-color-primary-lighter: #FCD34D; - --ifm-color-primary-lightest: #FDE68A; + --code-comment: #6272A4; + --code-keyword: #FF79C6; + + --ifm-color-primary: #AF788B; + --ifm-color-primary-dark: #8B5E6F; + --ifm-color-primary-darker: #6D4A58; + --ifm-color-primary-darkest: #573D4A; + --ifm-color-primary-light: #C9A0AE; + --ifm-color-primary-lighter: #D4B3BF; + --ifm-color-primary-lightest: #E6CDD6; --ifm-code-font-size: 95%; --ifm-background-color: #FFFFFF; - --docusaurus-highlighted-code-line-bg: rgba(245, 158, 11, 0.08); + --docusaurus-highlighted-code-line-bg: rgba(175, 120, 139, 0.08); } [data-theme='dark'] { - --text: #EDF2F8; - --text-light: #9AA4BF; - --bg: #141720; - --bg-secondary: #1A1F27; - --bg-offset: #1A1F27; - --card-bg: #1E2430; - --border: #2A3040; - --primary-soft: rgba(245, 158, 11, 0.12); - --code-bg: #0E1117; - --code-text: #CBD5E1; - --code-comment: #6B7280; - --code-keyword: #5E6AD2; - - --ifm-color-primary: #FBBF24; - --ifm-color-primary-dark: #F59E0B; - --ifm-color-primary-darker: #D97706; - --ifm-color-primary-darkest: #B45309; - --ifm-color-primary-light: #FCD34D; - --ifm-color-primary-lighter: #FDE68A; - --ifm-color-primary-lightest: #FEF3C7; - --docusaurus-highlighted-code-line-bg: rgba(245, 158, 11, 0.15); - --ifm-background-color: #141720; + --text: #EEF2FF; + --text-light: #8B9AB0; + --bg: #0A0C10; + --bg-secondary: #11151A; + --bg-offset: #11151A; + --card-bg: #131720; + --border: #1E293B; + --primary-soft: rgba(201, 160, 174, 0.15); + --code-bg: #010409; + --code-text: #E2E8F0; + --code-comment: #6272A4; + --code-keyword: #FF79C6; + + --ifm-color-primary: #C9A0AE; + --ifm-color-primary-dark: #AF788B; + --ifm-color-primary-darker: #8B5E6F; + --ifm-color-primary-darkest: #6D4A58; + --ifm-color-primary-light: #D4B3BF; + --ifm-color-primary-lighter: #E0C5CE; + --ifm-color-primary-lightest: #E6CDD6; + --docusaurus-highlighted-code-line-bg: rgba(175, 120, 139, 0.15); + --ifm-background-color: #0A0C10; } /* ===== Navbar ===== */ @@ -69,13 +69,26 @@ border-bottom: none !important; box-shadow: none !important; height: 68px !important; - padding: 0 1.5rem !important; + padding: 0 !important; } .navbar__inner { height: 68px !important; max-width: 1280px; margin: 0 auto; + display: flex; + align-items: center; + width: 100%; +} + +.navbar__items { + flex: 1; + display: flex; + justify-content: center; +} + +.navbar__items--right { + flex: 0; } .navbar__brand { @@ -109,58 +122,11 @@ color: var(--primary) !important; } -[data-theme='dark'] .navbar { - background-color: var(--bg) !important; - border-bottom-color: var(--border) !important; -} - -[data-theme='dark'] .navbar__title { - color: var(--text) !important; -} - -[data-theme='dark'] .navbar__link { - color: var(--text-light) !important; -} - -[data-theme='dark'] .navbar__link:hover { - color: var(--primary) !important; -} - -[data-theme='dark'] .navbar__link--active { - color: var(--primary) !important; -} - -/* ===== Footer ===== */ -.footer { - background-color: transparent !important; - padding: 5rem 1.5rem 1.5rem; -} - -.footer__title { - font-size: 0.85rem; - font-weight: 700; - text-transform: uppercase; - letter-spacing: 0.08em; - color: var(--text); - margin-bottom: 1rem; -} - -.footer__link-item { - font-size: 0.92rem; - color: var(--text-light); - line-height: 2; - transition: color 0.15s; -} - -.footer__link-item:hover { - color: var(--primary); -} - -.footer__link-item svg { - display: none; +/* ===== Prevent scroll on homepage ===== */ +html:has(.heroBanner) { + overflow: hidden; } -.footer__copyright { - font-size: 0.88rem; - color: var(--text-light); +html:has(.heroBanner) body { + overflow: hidden; } diff --git a/docs/src/pages/index.module.css b/docs/src/pages/index.module.css index c3025fa..acbe8e1 100644 --- a/docs/src/pages/index.module.css +++ b/docs/src/pages/index.module.css @@ -1,947 +1,260 @@ /** * Homepage styles for Vectorless. - * Color tokens come from src/css/custom.css (--primary, --text, etc.) - * - * NOTE: Do NOT use bare class ".container" — it collides with Docusaurus' - * built-in container. Use scoped names like .heroInner / .sectionInner instead. + * Uses rose color scheme tokens from custom.css. */ -/* ===== Hero ===== */ +/* ===== Hero Banner ===== */ .heroBanner { - padding: 10rem 1.5rem 6rem; - position: relative; + margin: 0; + padding: 0; + height: calc(100vh - 68px); overflow: hidden; + display: flex; + align-items: center; + justify-content: center; background-color: var(--bg); + background-image: + linear-gradient(rgba(175, 120, 139, 0.06) 1px, transparent 1px), + linear-gradient(90deg, rgba(175, 120, 139, 0.06) 1px, transparent 1px); + background-size: 48px 48px; + font-family: 'Space Grotesk', sans-serif; + color: var(--text); + line-height: 1.5; } -.heroInner { - max-width: 1100px; +/* ===== Hero Container ===== */ +.hero { + max-width: 1280px; + width: 90%; margin: 0 auto; - text-align: center; + display: flex; + flex-wrap: wrap; + align-items: center; + gap: 36px; + padding: 32px 28px; + background: radial-gradient(circle at 20% 30%, var(--primary-soft), transparent 70%); + border-radius: 48px; } -.heroTitle { - font-size: 5.5rem; +/* ===== Left: Brand + Features ===== */ +.heroContent { + flex: 1.2; + min-width: 280px; +} + +.mainTitle { + font-size: 3.8rem; font-weight: 800; - font-family: 'Inter', 'Libre Franklin', -apple-system, BlinkMacSystemFont, sans-serif; - color: var(--text); - margin: 0 0 1.75rem; letter-spacing: -0.03em; + background: linear-gradient(135deg, var(--text) 0%, var(--text-light) 80%); + -webkit-background-clip: text; + background-clip: text; + color: transparent; + margin-bottom: 8px; line-height: 1.1; - white-space: nowrap; } -.heroTitleEmphasis { +.subTitle { + font-size: 1.4rem; + font-weight: 500; color: var(--primary); + margin-bottom: 24px; + letter-spacing: -0.2px; + border-left: 3px solid var(--primary); + padding-left: 16px; } -.heroTitleLight { - color: #9AA4BF; - font-weight: 300; -} - -.heroTagline { - color: var(--text-light); - font-size: 1.3rem; - line-height: 1.7; - margin: 0 0 3rem; -} - -.heroTaglineHighlight { - color: var(--text); - font-weight: 600; -} - -.heroTaglineLine1 { - /* first line is naturally longer */ -} - -.heroTaglineLine2 { - /* second line shorter */ +/* ===== Feature Pills ===== */ +.featureList { + display: flex; + flex-direction: column; + gap: 14px; + margin-bottom: 24px; } -.heroTagline strong { - color: var(--text); - font-weight: 600; +.featureItem { + display: flex; + align-items: center; + gap: 14px; + font-size: 1rem; + font-weight: 500; + background: var(--primary-soft); + padding: 10px 16px; + border-radius: 60px; + width: fit-content; + backdrop-filter: blur(4px); } -.heroTagline em { - color: var(--primary-dark); - font-style: italic; +.featureIcon { + width: 32px; + height: 32px; + background: var(--primary-soft); + border-radius: 50%; + display: inline-flex; + align-items: center; + justify-content: center; + color: var(--primary); + font-size: 1rem; } +/* ===== GitHub Star Button ===== */ .heroActions { display: flex; gap: 1rem; - justify-content: center; + justify-content: flex-start; align-items: center; flex-wrap: wrap; } -/* ===== Three Rules ===== */ -.rulesRow { - display: flex; - gap: 1.5rem; - justify-content: center; - flex-wrap: wrap; - max-width: 1000px; - margin: 0 auto; -} - -.ruleCard { - flex: 1; - min-width: 240px; - max-width: 320px; - background: var(--card-bg); - border: 1px solid var(--border); - border-radius: 16px; - padding: 2.25rem 2rem; - text-align: center; -} - -.ruleTitle { - font-size: 1.1rem; - font-weight: 700; - color: var(--primary-dark); - margin-bottom: 0.75rem; - font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; -} - -[data-theme='dark'] .ruleTitle { - color: var(--primary); -} - -.ruleDesc { - font-size: 0.92rem; - line-height: 1.65; - color: var(--text-light); -} - -/* GitHub Star button */ .githubStarButton { display: inline-flex; align-items: center; gap: 0.5rem; - padding: 0.8rem 1.8rem; + padding: 0.9rem 2rem; font-size: 1rem; font-weight: 600; border-radius: 999px; background-color: var(--text); - color: #fff; + color: var(--bg); text-decoration: none; + transition: transform 0.15s, box-shadow 0.15s; + font-family: 'Space Grotesk', sans-serif; } -[data-theme='dark'] .githubStarButton { - background-color: #f3f3f3; - color: #333; +.githubStarButton:hover { + color: var(--bg); + text-decoration: none; + transform: translateY(-2px); + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.25); } .starIcon { margin-left: 0.4rem; } -/* ===== Buttons ===== */ -.buttonPrimary { - display: inline-flex; - align-items: center; - padding: 0.75rem 2rem; - font-size: 0.95rem; - font-weight: 600; - border-radius: 8px; - background-color: var(--primary-dark); - color: #fff; - text-decoration: none; - transition: background-color 0.2s, transform 0.1s; -} - -.buttonPrimary:hover { - background-color: var(--primary-deeper); - color: #fff; - text-decoration: none; - transform: translateY(-1px); -} - -.buttonSecondary { - display: inline-flex; - align-items: center; - padding: 0.75rem 2rem; - font-size: 0.95rem; - font-weight: 600; - border-radius: 8px; - background-color: transparent; - color: var(--text); - border: 1.5px solid var(--border); - text-decoration: none; - transition: border-color 0.2s, transform 0.1s; +/* ===== Right: Principles Card ===== */ +.heroPrinciples { + flex: 0.9; + min-width: 280px; + background: var(--bg-offset); + backdrop-filter: blur(12px); + border: 1px solid var(--border); + border-radius: 40px; + padding: 24px 22px; + box-shadow: 0 20px 35px -12px rgba(0,0,0,0.2); + transition: transform 0.2s ease, border-color 0.2s ease; } -.buttonSecondary:hover { +.heroPrinciples:hover { border-color: var(--primary); - color: var(--primary-dark); - text-decoration: none; - transform: translateY(-1px); -} - -/* ===== Sections ===== */ -.section { - padding: 5rem 1.5rem; -} - -.sectionAlt { - background-color: var(--bg-offset); -} - -.sectionInner { - max-width: 1200px; - margin: 0 auto; -} - -.sectionTitle { - font-size: 2rem; - font-weight: 700; - text-align: center; - margin: 0 0 0.5rem; - color: var(--text); -} - -.sectionSubtitle { - text-align: center; - font-size: 1.1rem; - color: var(--text-light); - margin: 0 0 3rem; + transform: translateY(-4px); } -/* ===== Get Started — Linear Dark Demo Card ===== */ -.demoCard { - max-width: 1200px; - margin: 0 auto; - background: #161A1F; - border-radius: 16px; - border: 1px solid #252A30; - overflow: hidden; - box-shadow: 0 12px 40px rgba(0, 0, 0, 0.4); -} - -.demoTabs { - display: flex; - align-items: center; - gap: 1.5rem; - padding: 0 1.25rem; - border-bottom: 1px solid #252A30; - background: #161A1F; -} - -.demoTab { - padding: 0.875rem 0 0.75rem; - font-size: 0.8rem; - font-weight: 500; - color: #9AA4BF; - border: none; - border-bottom: 2px solid transparent; - background: transparent; - cursor: pointer; - transition: all 0.15s ease; - letter-spacing: -0.2px; - font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; -} - -.demoTabActive { - color: #F59E0B; - border-bottom-color: #F59E0B; -} - -.demoTab:not(.demoTabActive):hover { - color: #EDF2F8; -} - -.demoPanel { - background: #111317; -} - -.demoCodeHeader { - padding: 0.75rem 1.25rem; - background: #111317; - border-bottom: 1px solid #252A30; - display: flex; - align-items: center; - gap: 0.75rem; -} - -.windowDots { - display: flex; - gap: 0.5rem; -} - -.windowDot { - width: 11px; - height: 11px; - border-radius: 50%; - display: inline-block; -} - -.dotRed { - background: #ED6A5E; -} - -.dotYellow { - background: #F5BD4F; -} - -.dotGreen { - background: #61C454; -} - -.copyBtn { - margin-left: auto; - background: transparent; - border: none; - color: #9AA4BF; - font-size: 0.7rem; - font-family: 'JetBrains Mono', 'Fira Code', monospace; - cursor: pointer; - padding: 0.25rem 0.75rem; - border-radius: 6px; - transition: all 0.15s; - letter-spacing: -0.2px; -} - -.copyBtn:hover { - background: rgba(175, 120, 139, 0.1); - color: #F59E0B; -} - -.demoPre { - margin: 0; - padding: 1.75rem 2rem; - overflow-x: auto; - font-family: 'JetBrains Mono', 'Fira Code', 'SF Mono', Menlo, monospace; - font-size: 0.85rem; - line-height: 1.75; - color: #EDF2F8; - background: #111317; -} - -.demoPre code { - font-family: inherit; - background: transparent; - border: none; - padding: 0; - color: inherit; -} - -/* Syntax highlight tokens */ -.hlKeyword { - color: #F59E0B; - font-weight: 500; -} - -.hlFunction { - color: #5E6AD2; -} - -.hlString { - color: #10B981; -} - -.hlComment { - color: #6B7280; - font-style: italic; -} - -.hlType { - color: #5E6AD2; -} - -.hlAttribute { - color: #D9A7E8; -} - -.terminalOutput { - background: #111317; - border-top: 1px solid #252A30; - padding: 1rem 2rem; - font-family: 'JetBrains Mono', 'Fira Code', monospace; - font-size: 0.78rem; - color: #9AA4BF; - line-height: 1.7; -} - -.terminalPrompt { - color: #10B981; -} - -.terminalAnswer { - color: #EDF2F8; -} - -.terminalCursor { - display: inline-block; - width: 7px; - height: 13px; - background-color: #AF788B; - vertical-align: middle; - margin-left: 4px; - animation: cursorBlink 1s step-end infinite; -} - -@keyframes cursorBlink { - 0%, 100% { opacity: 1; } - 50% { opacity: 0; } -} - -.installBar { - padding: 1rem 2rem; - background: #161A1F; - border-top: 1px solid #252A30; +.principlesTitle { + font-size: 1rem; + text-transform: uppercase; + letter-spacing: 2px; + color: var(--primary); + margin-bottom: 18px; display: flex; align-items: center; - justify-content: space-between; - flex-wrap: wrap; - gap: 0.75rem; -} - -.installCommand { - font-family: 'JetBrains Mono', 'Fira Code', monospace; - font-size: 0.75rem; - color: #9AA4BF; - background: #111317; - padding: 0.4rem 1rem; - border-radius: 8px; - border: 1px solid #252A30; + gap: 8px; } -.installCommand span { - color: #F59E0B; +.principle { + margin-bottom: 18px; + padding-bottom: 14px; + border-bottom: 1px solid var(--border); } -.installBtn { - background: #F59E0B; - border: none; - color: white; - font-size: 0.75rem; - font-weight: 500; - padding: 0.4rem 1.2rem; - border-radius: 20px; - cursor: pointer; - transition: background 0.15s; - font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; -} - -.installBtn:hover { - background: #D97706; +.principle:last-of-type { + border-bottom: none; + margin-bottom: 0; + padding-bottom: 0; } -/* ===== Format Pills ===== */ -.formatPills { +.principleHead { + font-weight: 700; + font-size: 1rem; + margin-bottom: 6px; display: flex; - justify-content: center; - gap: 0.75rem; - margin-bottom: 2rem; -} - -.formatPill { - display: inline-flex; align-items: center; - padding: 0.35rem 1rem; - border-radius: 20px; - font-size: 0.8rem; - font-weight: 600; - font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; - letter-spacing: -0.2px; - background: var(--primary-soft); - color: var(--primary-dark); - border: 1px solid var(--primary); -} - -[data-theme='dark'] .formatPill { - color: var(--primary); -} - -/* ===== Key Features Grid ===== */ -.featureGrid { - display: grid; - grid-template-columns: repeat(3, 1fr); - gap: 1.5rem; - max-width: 1100px; - margin: 0 auto; -} - -.featureCard { - background: var(--card-bg); - border: 1px solid var(--border); - border-radius: 16px; - padding: 2rem 1.75rem; - transition: border-color 0.2s, box-shadow 0.2s; -} - -.featureCard:hover { - border-color: var(--primary); - box-shadow: 0 4px 20px rgba(245, 158, 11, 0.08); -} - -.featureTitle { - font-size: 1.1rem; - font-weight: 700; + gap: 8px; color: var(--text); - margin: 0 0 0.75rem; } -.featureDesc { - font-size: 0.92rem; - line-height: 1.65; +.principleDesc { + font-size: 0.8rem; color: var(--text-light); - margin: 0; -} - -/* ===== Navigation Theater ===== */ -.narrativeDemo { - background: #161A1F; - border: 1px solid #252A30; - border-radius: 16px; - padding: 2rem 2.5rem; - max-width: 780px; - margin: 0 auto; -} - -.narrativeHeader { - display: flex; - gap: 12px; - align-items: center; - margin-bottom: 28px; -} - -.narrativeTitle { - font-size: 1.25rem; - font-weight: 600; - color: #E2E8F0; - margin: 0; + line-height: 1.4; } -.narrativeBadge { - background: rgba(245, 158, 11, 0.12); +.badgeRust { + background: var(--primary-soft); color: var(--primary); - padding: 2px 12px; border-radius: 30px; + padding: 2px 8px; font-size: 0.7rem; - margin-left: auto; -} - -.navTrack { - display: flex; - flex-direction: column; - gap: 0; - position: relative; -} - -/* vertical timeline line */ -.navTrack::before { - content: ''; - position: absolute; - left: 18px; - top: 24px; - bottom: 24px; - width: 2px; - background: #252A30; - border-radius: 1px; + font-weight: 500; + margin-left: 8px; } -.trackStep { +.principlesFooter { + margin-top: 14px; + font-size: 0.7rem; + text-align: right; + opacity: 0.6; + border-top: 1px solid var(--border); + padding-top: 10px; display: flex; - flex-direction: column; - padding: 0.75rem 0 0.75rem 48px; - position: relative; -} - -.trackStep .stepBadge { - display: inline-flex; align-items: center; + justify-content: flex-end; gap: 6px; - font-size: 0.78rem; - font-weight: 600; - color: var(--primary); - margin-bottom: 4px; - position: relative; -} - -/* dot on the timeline */ -.trackStep .stepBadge::before { - content: ''; - position: absolute; - left: -36px; - top: 50%; - transform: translateY(-50%); - width: 10px; - height: 10px; - border-radius: 50%; - background: var(--primary); - border: 2px solid #161A1F; - z-index: 1; -} - -.stepBadgeGreen { - color: var(--accent-green) !important; -} - -.stepBadgeGreen::before { - background: var(--accent-green) !important; -} - -.stepContent { - font-size: 0.9rem; - line-height: 1.6; - color: #C8D0DE; -} - -/* inline inside narrative demo — dark-friendly */ -.stepContent code { - background: #1A1F27; - color: #E2E8F0; - padding: 2px 6px; - border-radius: 4px; - font-size: 0.85em; - border: 1px solid #2A3040; -} - -.hamsterVoice { - background: rgba(245, 158, 11, 0.08); - border-left: 3px solid var(--primary); - border-radius: 8px; - padding: 1rem 1.25rem; - margin: 0.75rem 0 0.75rem 48px; - font-size: 0.85rem; - line-height: 1.6; - color: #B0B8C8; -} - -/* ===== Use Cases Slider ===== */ -.sliderOuter { - overflow: hidden; - width: 100%; - padding: 1rem 0; -} - -.sliderTrack { - display: flex; - gap: 1.5rem; - transition: transform 0.45s cubic-bezier(0.2, 0.9, 0.4, 1.1); - will-change: transform; -} - -.caseCard { - flex: 0 0 calc(65% - 0.75rem); - background: #161A1F; - border: 1px solid #252A30; - border-radius: 16px; - padding: 3rem 3rem 2.5rem; - opacity: 0.4; - transform: scale(0.94); - transition: all 0.35s ease; - filter: brightness(0.75); - min-height: 460px; - display: flex; - flex-direction: column; - justify-content: center; - gap: 0.5rem; -} - -.caseCardActive { - opacity: 1; - transform: scale(1); - filter: brightness(1); - border-color: #F59E0B; - box-shadow: 0 12px 40px rgba(245, 158, 11, 0.18); -} - -.caseTitle { - font-size: 1.55rem; - font-weight: 600; - margin: 0 0 0.75rem; - color: #EDF2F8; -} - -.caseDesc { - color: #9AA4BF; - font-size: 1.05rem; - line-height: 1.7; - margin: 0 0 1.75rem; -} - -.caseQuery { - background: #111317; - border-radius: 12px; - padding: 1.5rem 1.75rem; - font-family: 'JetBrains Mono', 'Fira Code', monospace; - font-size: 0.88rem; - color: #EDF2F8; - border: 1px solid #252A30; - line-height: 1.7; -} - -.caseQueryLabel { - color: #F59E0B; - font-weight: 600; - margin-bottom: 0.5rem; -} - -.caseQueryText { - color: #EDF2F8; -} - -.caseAnswer { - color: #9AA4BF; - margin-top: 0.75rem; - padding-top: 0.75rem; - border-top: 1px solid #252A30; - font-size: 0.75rem; -} - -.sliderNav { - display: flex; - justify-content: center; - align-items: center; - gap: 1rem; - margin-top: 2.5rem; -} - -.sliderBtn { - background: var(--card-bg); - border: 1px solid var(--border); color: var(--text-light); - font-size: 1.2rem; - width: 44px; - height: 44px; - border-radius: 44px; - display: inline-flex; - align-items: center; - justify-content: center; - cursor: pointer; - transition: all 0.2s; -} - -.sliderBtn:hover { - border-color: var(--primary); - color: var(--primary-dark); - background: var(--primary-soft); -} - -.sliderDots { - display: flex; - gap: 0.5rem; -} - -.sliderDot { - width: 8px; - height: 8px; - border-radius: 8px; - background: var(--text-light); - border: none; - padding: 0; - cursor: pointer; - transition: all 0.25s; - opacity: 0.4; -} - -.sliderDotActive { - width: 28px; - background: var(--primary); - opacity: 1; -} - -/* ===== CTA ===== */ -.sectionCtaDark { - background: #111317; - padding: 3rem 1.5rem; -} - -.ctaBox { - max-width: 880px; - margin: 0 auto; - text-align: center; - padding: 4rem 2rem; -} - -.ctaTitle { - font-size: 2.5rem; - font-weight: 700; - letter-spacing: -0.02em; - margin: 0 0 1rem; - color: var(--primary); -} - -.ctaDesc { - font-size: 1.05rem; - color: #9AA4BF; - max-width: 520px; - margin: 0 auto 2rem; - line-height: 1.6; -} - -.ctaButtons { - display: flex; - justify-content: center; - gap: 1rem; - margin-bottom: 1.5rem; - flex-wrap: wrap; -} - -.ctaBtnSecondary { - padding: 0.7rem 1.8rem; - border-radius: 40px; - font-weight: 600; - font-size: 0.88rem; - background: transparent; - border: 1px solid #252A30; - color: #EDF2F8; - text-decoration: none; - cursor: pointer; - transition: all 0.2s; - font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; -} - -.ctaBtnSecondary:hover { - border-color: #F59E0B; - background: rgba(245, 158, 11, 0.12); - color: #F59E0B; - text-decoration: none; -} - -.ctaInstallCards { - display: flex; - justify-content: center; - gap: 1rem; - flex-wrap: wrap; - margin-bottom: 2rem; -} - -.ctaInstallItem { - background: #161A1F; - border: 1px solid #252A30; - border-radius: 12px; - padding: 0.65rem 1.25rem; - display: flex; - align-items: center; - gap: 1rem; -} - -.ctaInstallCommand { - font-family: 'JetBrains Mono', 'Fira Code', monospace; - font-size: 0.82rem; - color: #EDF2F8; -} - -.ctaInstallCommand span { - color: #F59E0B; -} - -.ctaCopyIcon { - background: transparent; - border: none; - color: #9AA4BF; - cursor: pointer; - padding: 0.25rem 0.65rem; - border-radius: 6px; - font-size: 0.7rem; - font-family: 'Inter', sans-serif; - transition: all 0.15s; -} - -.ctaCopyIcon:hover { - background: rgba(245, 158, 11, 0.12); - color: #F59E0B; -} - -/* ===== Dark theme overrides ===== */ -[data-theme='dark'] .heroTitleEmphasis { - color: var(--primary); -} - -[data-theme='dark'] .heroTitleLight { - color: #6B7A8A; -} - -[data-theme='dark'] .heroTaglineHighlight { - color: var(--text); -} - -[data-theme='dark'] .card:hover { - border-color: var(--primary); } /* ===== Responsive ===== */ -@media screen and (max-width: 996px) { - .heroBanner { - padding: 6rem 1.5rem 4rem; - } - - .heroTitle { - font-size: 3.2rem; - } - - .heroTagline { - font-size: 1.15rem; +@media (max-width: 880px) { + .hero { + flex-direction: column; + padding: 40px 20px; } - .caseCard { - flex: 0 0 calc(70% - 0.75rem); + .mainTitle { + font-size: 2.8rem; } - .section { - padding: 3.5rem 1.5rem; + .subTitle { + font-size: 1.2rem; } - .featureGrid { - grid-template-columns: repeat(2, 1fr); + .featureItem { + width: 100%; } } -@media screen and (max-width: 600px) { +@media (max-width: 480px) { .heroBanner { - padding: 4.5rem 1rem 3rem; - } - - .heroTitle { - font-size: 2rem; - } - - .heroActions { - flex-direction: column; - align-items: center; - } - - .buttonPrimary, - .buttonSecondary { - width: 100%; - justify-content: center; - max-width: 280px; - } - - .caseCard { - flex: 0 0 calc(90% - 0.75rem); - } - - .demoPre { - font-size: 0.65rem; - } - - .installBar { - flex-direction: column; align-items: flex-start; + padding-top: 80px; } - .sectionTitle { - font-size: 1.5rem; + .mainTitle { + font-size: 2.2rem; } - .featureGrid { - grid-template-columns: 1fr; + .subTitle { + font-size: 1.05rem; + padding-left: 12px; } - .rulesRow { - flex-direction: column; - align-items: center; + .featureItem { + font-size: 0.9rem; + padding: 8px 12px; } - .ruleCard { - max-width: 100%; + .heroPrinciples { + border-radius: 24px; + padding: 20px 16px; } } diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx index ca30948..4626d46 100644 --- a/docs/src/pages/index.tsx +++ b/docs/src/pages/index.tsx @@ -1,595 +1,115 @@ import type {ReactNode} from 'react'; -import {useState, useMemo, useRef, useEffect, useCallback} from 'react'; import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; import Layout from '@theme/Layout'; -import Heading from '@theme/Heading'; import Link from '@docusaurus/Link'; import styles from './index.module.css'; -function HomepageHeader() { +/* ===== Hamster SVG Icon ===== */ +function HamsterIcon({size = 14}: {size?: number}) { return ( -
    -
    -

    - Reason, - don't vector -

    -

    - - Vectorless will reason through any of your structured documents — PDFs, Markdown, reports, contracts, - -
    - and retrieve only what's relevant. Every retrieval is a reasoning act. -

    -
    - - - Star on GitHub - - -
    -
    -
    + + + + + + + + + + + + + + + + + + + ); } -/* ---- Regex-based syntax highlighter ---- */ -function highlight(code: string, lang: 'python' | 'rust'): ReactNode[] { - // Each rule has exactly ONE capture group in its regex - const rules: {re: RegExp; cls: string}[] = lang === 'python' - ? [ - {re: /(#.*)/g, cls: styles.hlComment}, - {re: /("(?:[^"\\]|\\.)*")/g, cls: styles.hlString}, - {re: /\b(import|from|async|def|await|return|as|with|for|in|if|else|None|True|False)\b/g, cls: styles.hlKeyword}, - {re: /\b([A-Z][A-Za-z0-9_]*)\b/g, cls: styles.hlType}, - {re: /\b([a-z_]\w*)\s*(?=\()/g, cls: styles.hlFunction}, - ] - : [ - {re: /(\/\/.*)/g, cls: styles.hlComment}, - {re: /("(?:[^"\\]|\\.)*")/g, cls: styles.hlString}, - {re: /\b(use|let|mut|fn|async|await|return|if|else|match|struct|impl|pub|mod|crate|self|super|where|for|in|loop|while|break|continue|move|ref|type|enum|trait|const|static|unsafe|extern)\b/g, cls: styles.hlKeyword}, - {re: /\b([A-Z][A-Za-z0-9_]*)\b/g, cls: styles.hlType}, - {re: /\b(\w+!)/g, cls: styles.hlFunction}, - {re: /\b([a-z_]\w*)\s*(?=\()/g, cls: styles.hlFunction}, - {re: /(#\[.*?\])/g, cls: styles.hlAttribute}, - ]; - - // Build combined regex — join the single capture-group sources directly - const combined = rules.map(r => r.re.source).join('|'); - const re = new RegExp(combined, 'gm'); - - const nodes: ReactNode[] = []; - let lastIdx = 0; - let m: RegExpExecArray | null; - re.lastIndex = 0; - - while ((m = re.exec(code)) !== null) { - if (m.index > lastIdx) { - nodes.push(code.slice(lastIdx, m.index)); - } - // match[1..rules.length] corresponds to each rule's capture group - for (let i = 0; i < rules.length; i++) { - const captured = m[i + 1]; - if (captured !== undefined) { - nodes.push({captured}); - break; - } - } - lastIdx = re.lastIndex; - } - if (lastIdx < code.length) { - nodes.push(code.slice(lastIdx)); - } - return nodes; -} - -const PYTHON_CODE = `import asyncio -from vectorless import Engine, IndexContext, QueryContext - -async def main(): - engine = Engine(api_key="sk-...", model="gpt-4o", endpoint="https://api.openai.com/v1") - - # Index a document - result = await engine.index(IndexContext.from_path("./report.pdf")) - doc_id = result.doc_id - - # Query - result = await engine.query( - QueryContext("What is the total revenue?") - .with_doc_ids([doc_id]) - ) - print(result.single().content) - -asyncio.run(main())`; - -const RUST_CODE = `use vectorless::{EngineBuilder, IndexContext, QueryContext}; - -#[tokio::main] -async fn main() -> vectorless::Result<()> { - let engine = EngineBuilder::new() - .with_key("sk-...") - .with_model("gpt-4o") - .with_endpoint("https://api.openai.com/v1") - .build() - .await?; - - // Index a document - let result = engine.index(IndexContext::from_path("./report.pdf")).await?; - let doc_id = result.doc_id().unwrap(); - - // Query - let result = engine.query( - QueryContext::new("What is the total revenue?") - .with_doc_ids(vec![doc_id.to_string()]) - ).await?; - println!("{}", result.content); - Ok(()) -}`; - -function PythonCode() { - const nodes = useMemo(() => highlight(PYTHON_CODE, 'python'), []); - return
    {nodes}
    ; -} - -function RustCode() { - const nodes = useMemo(() => highlight(RUST_CODE, 'rust'), []); - return
    {nodes}
    ; -} - -function SectionThreeRules() { +/* ===== Hero ===== */ +function HomepageHeader() { return ( -
    -
    - - Three rules. No exceptions. - -

    - Every decision in this system follows these principles. -

    -
    -
    -
    Reason, don't vector
    -
    Every retrieval is a reasoning act, not a similarity computation.
    -
    -
    -
    Model fails, we fail
    -
    No heuristic fallbacks. No silent degradation.
    +
    +
    + {/* Left: Brand + Features */} +
    +

    Vectorless

    +

    Reasoning-native Document Engine

    + +
    +
    + Open source by design +
    +
    + Rust-powered · Python ecosystem +
    +
    + Rules of Three — no exceptions +
    -
    -
    No thought, no answer
    -
    Only reasoned output counts as an answer.
    + +
    + + + Star on GitHub + +
    -
    -
    - ); -} - -function SectionGetStarted() { - const [activeTab, setActiveTab] = useState<'python' | 'rust'>('python'); - const [copyLabel, setCopyLabel] = useState('Copy'); - const [installLabel, setInstallLabel] = useState('Copy & install'); - - const installCmd = activeTab === 'python' ? 'pip install vectorless' : 'cargo add vectorless'; - const handleCopy = () => { - const code = activeTab === 'python' ? PYTHON_CODE : RUST_CODE; - navigator.clipboard.writeText(code); - setCopyLabel('\u2713 Copied!'); - setTimeout(() => setCopyLabel('Copy'), 1500); - }; - - const handleInstallCopy = () => { - navigator.clipboard.writeText(installCmd); - setInstallLabel('\u2713 Copied!'); - setTimeout(() => setInstallLabel('Copy & install'), 1500); - }; - - return ( -
    -
    - - Get Started - -

    - Three lines to index. One line to query. -

    -
    - PDF - Markdown -
    -
    - {/* Tabs */} -
    - - + {/* Right: Principles Card */} +
    +
    + Three rules · No exceptions
    - {/* Python panel */} - {activeTab === 'python' && ( -
    -
    -
    - - - -
    - -
    - -
    - $ python demo.py
    - The total revenue for fiscal year 2024 was $2.3 billion, a 15% increase YoY. - -
    -
    - )} - - {/* Rust panel */} - {activeTab === 'rust' && ( -
    -
    -
    - - - -
    - -
    - -
    - $ cargo run
    - The total revenue for fiscal year 2024 was $2.3 billion, a 15% increase YoY. - -
    +
    +
    + 1. Reason, don't vector + core
    - )} - - {/* Install bar */} -
    -
    - $ {installCmd} +
    + Every retrieval is a reasoning act, not a similarity computation. No embeddings, no approximate matches.
    -
    -
    -
    -
    - ); -} -function SectionHowItWorks() { - return ( -
    -
    - - How does Vectorless work? - -

    - Documents are compiled into navigable trees. Multiple agents reason through them. -

    -
    -
    - - - -

    Vectorless Reasoning Flow

    - Live Reasoning -
    -
    - {/* Step 1: Index */} -
    -
    - Index -
    -
    - 3 documents indexed → hierarchical trees + NavigationIndex + ReasoningIndex + DocCards built -
    -
    - {/* Step 2: Query */} -
    -
    - Query -
    -
    - “How much delta-V remains after the Day 17 thruster failure, and is it enough?” -
    -
    - {/* Step 3: Orchestrator analyzes DocCards */} -
    -
    - Orchestrator · Analyze -
    -
    - LLM understands query intent (complex, analytical) → reads DocCards → dispatches Worker to doc #1 -
    -
    - {/* Step 4: Bird's-eye view */} -
    -
    - Worker · Bird’s-Eye -
    -
    - ls root → sees 4 top-level sections → LLM generates navigation plan targeting Orbital Mechanics + Mission Anomalies -
    +
    +
    + 2. Model fails, we fail
    - {/* Step 5: Navigate */} -
    -
    - Navigate -
    -
    - cd "Orbital Mechanics"cd "Transfer Orbit Analysis"cat "Delta-V Budget" → evidence #1 collected -
    -
    - {/* Step 6: Cross-reference */} -
    -
    - Cross-Reference -
    -
    - find "misfire" → hit in Mission Anomalies → cd + cat "Day 17 Thruster Misfire" → evidence #2 collected -
    -
    - {/* Step 7: Sufficiency check */} -
    -
    - Check -
    -
    - check → LLM evaluates: both delta-V budget and anomaly impact found → SUFFICIENT → done -
    -
    - {/* Step 8: Rerank + Synthesize */} -
    - - Rerank pipeline: dedup → LLM-scored relevance (score: 0.87, confidence: high) → return original passages with source attribution. -
    - {/* Step 9: Final Answer */} -
    -
    - Result -
    -
    - After the B3 thruster failure, remaining reserve is 218 m/s vs. 150 m/s requirement — sufficient to complete the mission. Sources: Delta-V Budget, Day 17 Thruster Misfire. -
    +
    + No heuristic fallbacks. No silent degradation. If the reasoning model cannot find an answer, we return nothing — not a guess.
    -
    -
    -
    - ); -} -const KEY_FEATURES = [ - { - title: 'Rust Core', - desc: 'The entire engine is implemented in Rust for performance and reliability. Python SDK and CLI also provided.', - }, - { - title: 'Multi-Agent Retrieval', - desc: 'An Orchestrator plans and evaluates. Workers navigate documents. Each retrieval is a reasoning act.', - }, - { - title: 'Zero Vectors', - desc: 'No embedding model, no vector store, no similarity search. Eliminates wrong chunk boundaries and stale embeddings.', - }, - { - title: 'Tree Navigation', - desc: 'Documents compiled into hierarchical trees. Workers navigate like a human: scan TOC, jump to section, read passage.', - }, - { - title: 'Document-Exact Output', - desc: 'Returns original text passages. No synthesis, no rewriting, no hallucinated content.', - }, - { - title: 'Incremental Updates', - desc: 'Content fingerprinting detects changes. Only recompiles modified sections. Checkpointable 8-stage pipeline.', - }, -]; - -function SectionKeyFeatures() { - return ( -
    -
    - - Key Features - -

    - Reasoning-native, from the ground up. -

    -
    - {KEY_FEATURES.map((f, i) => ( -
    - {f.title} -

    {f.desc}

    +
    +
    + 3. No thought, no answer +
    +
    + Only reasoned output counts as an answer. Every response must be traceable through a semantic tree path — no hallucinated filler.
    - ))} -
    -
    -
    - ); -} - -const USE_CASES = [ - { - title: 'Financial reports', - desc: 'Extract specific KPIs from 10\u2011K, annual reports, or earnings transcripts \u2014 even across fiscal years.', - query: '\u201cWhat was the net profit margin for Q3 2024?\u201d', - answer: '18.4%, up from 16.2% in Q3 2023. Source: Section 6.2, page 34.', - }, - { - title: 'Legal & contracts', - desc: 'Locate clauses, definitions, or obligations across complex agreements without missing cross\u2011references.', - query: '\u201cWhich sections define \u2018force majeure\u2019 and what are the notice requirements?\u201d', - answer: 'Section 12.3(a) + 12.3(b) \u2014 30\u2011day written notice required.', - }, - { - title: 'Technical docs', - desc: 'Navigate large API references, internal wikis, or on\u2011prem manuals with step\u2011by\u2011step reasoning.', - query: '\u201cHow to configure authentication for the WebSocket gateway?\u201d', - answer: 'See \u201cWebSocket Auth\u201d \u2192 section 4.2.1: use Authorization: Bearer .', - }, - { - title: 'Research papers', - desc: 'Cross\u2011reference findings, tables, or citations across arXiv preprints or internal literature.', - query: '\u201cWhat datasets were used for evaluation in Section 4?\u201d', - answer: 'Table 2: SQuAD, Natural Questions, and TriviaQA.', - }, - { - title: 'Cross\u2011document analysis', - desc: 'Compare metrics, definitions, or timelines across multiple reports in one query.', - query: '\u201cCompare R&D spending from 2023 vs 2024 annual reports.\u201d', - answer: '2023: $12.4M (page 9) \u00b7 2024: $15.1M (page 11) \u2192 +21.8% YoY.', - }, - { - title: 'Compliance & audit', - desc: 'Trace every retrieved statement back to its source \u2014 full explainability for regulated industries.', - query: '\u201cShow me all references to data retention policy.\u201d', - answer: 'Section 3.2 (page 8), Section 5.1 (page 14), and Appendix B.', - }, -]; - -function SectionUseCases() { - const [current, setCurrent] = useState(0); - const outerRef = useRef(null); - const trackRef = useRef(null); - const [offset, setOffset] = useState(0); - - const total = USE_CASES.length; - - const measure = useCallback(() => { - if (!outerRef.current || !trackRef.current) return; - const outerW = outerRef.current.offsetWidth; - const firstCard = trackRef.current.children[0] as HTMLElement; - if (!firstCard) return; - const cardW = firstCard.offsetWidth; - const gap = 24; // 1.5rem - const step = cardW + gap; - const newOffset = outerW / 2 - current * step - cardW / 2; - setOffset(newOffset); - }, [current]); - - useEffect(() => { - measure(); - window.addEventListener('resize', measure); - return () => window.removeEventListener('resize', measure); - }, [measure]); - - const prev = () => setCurrent(i => Math.max(0, i - 1)); - const next = () => setCurrent(i => Math.min(total - 1, i + 1)); - - return ( -
    -
    - - Use cases · precision reasoning - -

    - Vectorless navigates through the structure of any document to retrieve exact context. -

    -
    -
    - {USE_CASES.map((c, i) => ( -
    - {c.title} -

    {c.desc}

    -
    -
    Query:
    -
    {c.query}
    -
    {c.answer}
    -
    -
    - ))} -
    -
    -
    - -
    - {USE_CASES.map((_, i) => ( -
    - -
    -
    -
    - ); -} - -function SectionCTA() { - const [pipLabel, setPipLabel] = useState('Copy'); - const [cargoLabel, setCargoLabel] = useState('Copy'); - - const handlePipCopy = () => { - navigator.clipboard.writeText('pip install vectorless'); - setPipLabel('\u2713'); - setTimeout(() => setPipLabel('Copy'), 1500); - }; - - const handleCargoCopy = () => { - navigator.clipboard.writeText('cargo add vectorless'); - setCargoLabel('\u2713'); - setTimeout(() => setCargoLabel('Copy'), 1500); - }; - return ( -
    -
    -
    - - Start reasoning, not vectoring - -
    - - - Star on GitHub - - -
    -
    -
    -
    $ pip install vectorless
    - -
    -
    -
    $ cargo add vectorless
    - -
    +
    + + reason, don't vector
    -
    + ); } +/* ===== Main Page ===== */ export default function Home(): ReactNode { const {siteConfig} = useDocusaurusContext(); return ( @@ -597,14 +117,7 @@ export default function Home(): ReactNode { title={`${siteConfig.title}`} description="Reasoning-based document engine. No embeddings, no chunking. Multiple agents navigate your documents to find exactly what's relevant."> -
    - - - - - - -
    +
    ); } diff --git a/docs/src/theme/Navbar/index.tsx b/docs/src/theme/Navbar/index.tsx index a647445..3acdbca 100644 --- a/docs/src/theme/Navbar/index.tsx +++ b/docs/src/theme/Navbar/index.tsx @@ -41,8 +41,7 @@ function ColorModeToggle(): React.ReactElement { export default function Navbar(): React.ReactElement { const {navbar: {items, logo, title}} = useThemeConfig(); - const leftItems = items.filter(item => item.position === 'left'); - const rightItems = items.filter(item => item.position === 'right'); + const centerItems = items; return ( @@ -59,10 +58,9 @@ export default function Navbar(): React.ReactElement {
    Vectorless
    - {leftItems.map((item, i) => )} + {centerItems.map((item, i) => )}
    - {rightItems.map((item, i) => )}
    diff --git a/docs/src/theme/Navbar/styles.module.css b/docs/src/theme/Navbar/styles.module.css index a61a760..a2265d3 100644 --- a/docs/src/theme/Navbar/styles.module.css +++ b/docs/src/theme/Navbar/styles.module.css @@ -1,10 +1,9 @@ .navbarContainer { - max-width: 1200px; - margin: 0 auto; - padding: 0 24px; + position: relative; + width: 100%; + height: 100%; display: flex; align-items: center; - height: 100%; } .navbarBrand { @@ -12,7 +11,7 @@ align-items: center; gap: 10px; flex-shrink: 0; - margin-right: 24rem; + padding-left: 24px; } .navbarLogoLink { @@ -47,10 +46,11 @@ /* Center: navigation links */ .navbarCenter { - flex: 1; + position: absolute; + left: 50%; + transform: translateX(-50%); display: flex; align-items: center; - justify-content: center; gap: 2rem; } @@ -86,7 +86,8 @@ align-items: center; gap: 1rem; flex-shrink: 0; - margin-left: 24rem; + margin-left: auto; + padding-right: 24px; } .githubStarWrapper { @@ -124,8 +125,12 @@ } @media (max-width: 996px) { - .navbarContainer { - padding: 0 16px; + .navbarBrand { + padding-left: 16px; + } + + .navbarRight { + padding-right: 16px; } .navbarCenter {