From c51cd07b521817b8545d040f2c54ce32234cd3e3 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Wed, 22 Apr 2026 16:12:09 +0800
Subject: [PATCH 1/2] docs(README): update project description and simplify
 documentation

- Change title from "Reasoning-based Document Engine" to
  "Document Understanding Engine for AI"
- Update subtitle to reflect new focus on "Think, then answer"
- Replace detailed comparison table with concise description of core
  functionality
- Rename "Agent-Based Retrieval" to "Agent-Based Understanding"
- Remove redundant features and supported documents sections to
  streamline documentation
---
 README.md | 41 +++++------------------------------------
 1 file changed, 5 insertions(+), 36 deletions(-)
diff --git a/README.md b/README.md
index c8391d1..613c70e 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,8 @@
 
 <img src="https://vectorless.dev/img/with-title.png" alt="Vectorless" width="400">
 
-<h1>Reasoning-based Document Engine</h1>
-<h5>Reason, don't vector · Structure, not chunks · Agents, not embeddings</h5>
+<h1>Document Understanding Engine for AI</h1>
+<h5>Reason, don't vector · Structure, not chunks · Think, then answer</h5>
 
 [![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/)
 [![PyPI Downloads](https://static.pepy.tech/badge/vectorless/month)](https://pepy.tech/projects/vectorless)
@@ -14,29 +14,15 @@
 
 </div>
 
-**Vectorless** is a reasoning-native document engine written in Rust. It compiles documents into navigable trees, then dispatches **multiple agents** to find exactly what's relevant across your **PDFs, Markdown, reports, contracts**. No embeddings, no chunking, no approximate nearest neighbors. Every retrieval is a **reasoning** act.
+**Vectorless** is a document understanding engine for AI. It reads documents as structured trees of meaning, then dispatches multiple agents to reason through headings, sections, and paragraphs — understanding how each part relates to the whole. The problem it solves is not "where to look", but "what does this mean in context". Every understanding is a **reasoning** act.
 
 Light up a star and shine with us! ⭐
 
 ## Three Rules
-- **Reason, don't vector.** Retrieval is a reasoning act, not a similarity computation.
+- **Reason, don't vector.** Understanding is reasoning, not similarity.
 - **Model fails, we fail.** No heuristic fallbacks, no silent degradation.
 - **No thought, no answer.** Only reasoned output counts as an answer.
 
-## Why Vectorless
-
-Traditional RAG systems split documents into chunks, embed them into vectors, and retrieve by similarity. Vectorless takes a different approach: it preserves document structure as a navigable tree and lets agents reason through it.
-
-| | Embedding-Based RAG | Vectorless |
-|---|---|---|
-| **Indexing** | Chunk → embed → vector store | Parse → compile → document tree |
-| **Retrieval** | Cosine similarity (approximate) | Multi-agent navigation (exact) |
-| **Structure** | Destroyed by chunking | Preserved as first-class tree |
-| **Query handling** | Keyword/similarity match | Intent classification + decomposition |
-| **Multi-hop reasoning** | Not supported | Orchestrator replans dynamically |
-| **Output** | Retrieved chunks | Original text passages, exact |
-| **Failure mode** | Silent degradation | Explicit — no reasoning, no answer |
-
 ## How It Works
 
 ### Four-Artifact Index Architecture
@@ -60,7 +46,7 @@ DocumentTree          NavigationIndex               ReasoningIndex            Do
 
 This separation means the agent makes routing decisions from lightweight metadata, not by scanning full content.
 
-### Agent-Based Retrieval
+### Agent-Based Understanding
 
 ```
 Engine.query("What drove the revenue decline?")
@@ -114,23 +100,6 @@ async def main():
 asyncio.run(main())
 ```
 
-## Key Features
-
-- **Rust Core** — The entire engine (indexing, retrieval, agent, storage) is implemented in Rust for performance and reliability. Python SDK via PyO3 bindings and a CLI are also provided.
-- **Multi-Agent Retrieval** — Every query is handled by multiple cooperating agents: an Orchestrator plans and evaluates, Workers navigate documents. Each retrieval is a reasoning act — not a similarity score, but a sequence of LLM decisions about where to look, what to read, and when to stop.
-- **Zero Vectors** — No embedding model, no vector store, no similarity search. This eliminates a class of failure modes: wrong chunk boundaries, stale embeddings, and similarity-score false positives.
-- **Tree Navigation** — Documents are compiled into hierarchical trees that preserve the original structure — headings, sections, paragraphs, lists. Workers navigate this tree the way a human would: scan the table of contents, jump to the relevant section, read the passage.
-- **Document-Exact Output** — Returns original text passages from the source document. No synthesis, no rewriting, no hallucinated content. What you get is what was written.
-- **Multi-Document Orchestration** — Query across multiple documents with a single call. The Orchestrator dispatches Workers, evaluates evidence, and fuses results. When one document is insufficient, it replans and expands the search scope.
-- **Query Understanding** — Every query passes through LLM-based intent classification, concept extraction, and strategy selection. Complex queries are decomposed into sub-queries. The system adapts its navigation strategy based on whether the query is factual, analytical, comparative, or navigational.
-- **Checkpointable Pipeline** — The 8-stage compile pipeline writes checkpoints at each stage. If indexing is interrupted (LLM rate limit, network failure), it resumes from the last completed stage — no wasted work.
-- **Incremental Updates** — Content fingerprinting detects changes at the node level. Re-indexing a modified document only recompiles the changed sections and their dependents.
-
-## Supported Documents
-
-- **PDF** — Full text extraction with page metadata
-- **Markdown** — Structure-aware parsing (headings, lists, code blocks)
-
 ## Resources
 
 - [Documentation](https://vectorless.dev) — Guides, architecture, API reference

From 9ca2fc35d322c59174402c6b403e2b406703e097 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Wed, 22 Apr 2026 16:34:45 +0800
Subject: [PATCH 2/2] docs: update project descriptions and terminology

- Replace "reasoning-native" and "reasoning-based" with "document
understanding" throughout documentation
- Update README.md to clarify that Vectorless compiles documents
into structured trees and provides reasoning acts, not retrieval
results
- Change "Fusion" to "Synthesis" in architecture diagram
- Update blog post tags from [vectorless, rag, llm, announcement]
to [vectorless, document-understanding, llm, ai, announcement]
- Modify docusaurus config and homepage header/description to
reflect new positioning as "Document Understanding Engine for AI"
- Streamline feature descriptions and remove redundant phrases
---
 README.md                              |  6 +++---
 docs/blog/2026-04-12-welcome/index.mdx | 10 +++++-----
 docs/docs/intro.mdx                    |  8 +++-----
 docs/docusaurus.config.ts              |  2 +-
 docs/src/pages/index.tsx               |  4 ++--
 5 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 613c70e..b42dc24 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 <img src="https://vectorless.dev/img/with-title.png" alt="Vectorless" width="400">
 
 <h1>Document Understanding Engine for AI</h1>
-<h5>Reason, don't vector · Structure, not chunks · Think, then answer</h5>
+<h3>Reason, don't vector · Structure, not chunks · Think, then answer</h3>
 
 [![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/)
 [![PyPI Downloads](https://static.pepy.tech/badge/vectorless/month)](https://pepy.tech/projects/vectorless)
@@ -14,7 +14,7 @@
 
 </div>
 
-**Vectorless** is a document understanding engine for AI. It reads documents as structured trees of meaning, then dispatches multiple agents to reason through headings, sections, and paragraphs — understanding how each part relates to the whole. The problem it solves is not "where to look", but "what does this mean in context". Every understanding is a **reasoning** act.
+**Vectorless** is a document understanding engine for AI. It compiles documents into structured trees of meaning, then dispatches multiple agents to reason through headings, sections, and paragraphs — evaluating how each part relates to the whole. The problem it solves is not "where to look", but "what does this mean in context". Every answer is a reasoning act, not a retrieval result.
 
 Light up a star and shine with us! ⭐
 
@@ -60,7 +60,7 @@ Engine.query("What drove the revenue decline?")
   │   │
   │   └─ evaluate ── insufficient? → replan → dispatch new paths → loop
   │
-  └─ Fusion ── dedup, LLM-scored relevance, return with source attribution
+  └─ Synthesis ── dedup, evidence scoring, reasoned answer with source chain
 ```
 
 Worker navigation commands:
diff --git a/docs/blog/2026-04-12-welcome/index.mdx b/docs/blog/2026-04-12-welcome/index.mdx
index fea3e94..bd30147 100644
--- a/docs/blog/2026-04-12-welcome/index.mdx
+++ b/docs/blog/2026-04-12-welcome/index.mdx
@@ -2,22 +2,22 @@
 slug: welcome
 title: Welcome to Vectorless
 authors: [zTgx]
-tags: [vectorless, rag, llm, announcement]
+tags: [vectorless, document-understanding, llm, ai, announcement]
 ---
 
-Vectorless is a reasoning-native document intelligence engine written in Rust — **no vector database, no embeddings, no similarity search**.
+Vectorless is a document understanding engine for AI. It compiles documents into structured trees of meaning, then dispatches multiple agents to reason through headings, sections, and paragraphs — evaluating how each part relates to the whole. The problem it solves is not "where to look", but "what does this mean in context". Every answer is a reasoning act, not a retrieval result.
 
 {/* truncate */}
 
 ## Why Vectorless?
 
-Traditional RAG systems rely on vector embeddings and similarity search. This approach loses document structure, requires a vector database, and often returns chunks that lack context.
+Understanding a document requires more than finding keywords — it requires navigating structure, cross-referencing sections, and evaluating whether the evidence is sufficient. Vectorless agents do exactly this: they reason through documents the way a human expert would.
 
-Vectorless takes a different path:
+Key capabilities:
 
 - **Hierarchical Semantic Trees** — Documents are parsed into a tree of sections, preserving structure and relationships.
 - **LLM Agent Navigation** — Queries are resolved by agents that navigate the tree using commands (ls, cd, cat, find, grep), making every decision through LLM reasoning.
-- **Zero Infrastructure** — No vector DB, no embedding models, no similarity search. Just an LLM API key.
+- **Zero Infrastructure** — Just an LLM API key, nothing else to deploy.
 
 ## Quick Start
 
diff --git a/docs/docs/intro.mdx b/docs/docs/intro.mdx
index beb3c30..eb13c61 100644
--- a/docs/docs/intro.mdx
+++ b/docs/docs/intro.mdx
@@ -4,9 +4,7 @@ sidebar_position: 1
 
 # Introduction
 
-**Vectorless** is a reasoning-native document intelligence engine written in Rust — **no vector database, no embeddings, no similarity search**.
-
-It transforms documents into hierarchical semantic trees and uses LLMs to navigate the structure, retrieving the most relevant content through deep contextual understanding instead of vector math.
+**Vectorless** is a document understanding engine for AI. It compiles documents into structured trees of meaning, then dispatches multiple agents to reason through headings, sections, and paragraphs — evaluating how each part relates to the whole. The problem it solves is not "where to look", but "what does this mean in context". Every answer is a reasoning act, not a retrieval result.
 
 ## How It Works
 
@@ -76,7 +74,7 @@ async fn main() -> vectorless::Result<()> {
 ## Features
 
 - **Hierarchical Semantic Trees** — Preserves document structure, not flat chunks
-- **LLM-Powered Agent Navigation** — Worker agents navigate the tree using commands (ls, cd, cat, find, grep), making every retrieval decision through LLM reasoning
+- **LLM-Powered Agent Navigation** — Worker agents navigate the tree using commands (ls, cd, cat, find, grep), making every decision through LLM reasoning
 - **Cross-Reference Resolution** — Automatically resolves "see Section 2.1", "Appendix G" references during indexing
 - **Synonym Expansion** — LLM-generated synonyms for indexed keywords improve recall for differently-worded queries
 - **Orchestrator Supervisor Loop** — Multi-document queries are coordinated by an LLM supervisor that dispatches Workers, evaluates evidence, and replans when needed
@@ -84,4 +82,4 @@ async fn main() -> vectorless::Result<()> {
 - **Incremental Indexing** — Content fingerprinting skips unchanged files
 - **DocCard Catalog** — Lightweight document metadata index enables fast multi-document analysis without loading full documents
 - **Multi-Format** — Markdown and PDF support
-- **Zero Infrastructure** — No vector DB, no embedding models, just an LLM API key
+- **Zero Infrastructure** — Just an LLM API key, nothing else to deploy
diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts
index 105e09c..76f4f87 100644
--- a/docs/docusaurus.config.ts
+++ b/docs/docusaurus.config.ts
@@ -6,7 +6,7 @@ import type * as Preset from '@docusaurus/preset-classic';
 
 const config: Config = {
   title: 'Vectorless',
-  tagline: 'Reasoning-based Document Engine',
+  tagline: 'Document Understanding Engine for AI',
   favicon: 'img/favicon.ico',
 
   future: {
diff --git a/docs/src/pages/index.tsx b/docs/src/pages/index.tsx
index e9863ec..d506d45 100644
--- a/docs/src/pages/index.tsx
+++ b/docs/src/pages/index.tsx
@@ -42,7 +42,7 @@ function HomepageHeader() {
         {/* Left: Brand + Features */}
         <div className={styles.heroContent}>
           <h1 className={styles.mainTitle}>Vectorless</h1>
-          <p className={styles.subTitle}>Reasoning-native Document Engine</p>
+          <p className={styles.subTitle}>Document Understanding Engine for AI</p>
 
           <div className={styles.featureList}>
             <div className={styles.featureItem}>
@@ -119,7 +119,7 @@ export default function Home(): ReactNode {
   return (
     <Layout
       title={`${siteConfig.title}`}
-      description="Reasoning-based document engine. No embeddings, no chunking. Multiple agents navigate your documents to find exactly what's relevant.">
+      description="Document understanding engine for AI. Agents reason through your documents — navigating structure, reading passages, cross-referencing across sections.">
       <HomepageHeader />
       <main />
     </Layout>