vectorlessflow · zTgx · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -3,7 +3,7 @@ members = ["rust", "python"]
 resolver = "2"
 
 [workspace.package]
-version = "0.1.30"
+version = "0.1.31"
 edition = "2024"
 authors = ["zTgx <beautifularea@gmail.com>"]
 license = "Apache-2.0"

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 <img src="https://vectorless.dev/img/with-title.png" alt="Vectorless" width="400">
 
-<h1>Document Engine for AI</h1>
+<h1>Agentic-based Document Engine</h1>
 
 [![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/)
 [![PyPI Downloads](https://static.pepy.tech/badge/vectorless/month)](https://pepy.tech/projects/vectorless)
@@ -13,11 +13,9 @@
 
 </div>
 
-**Vectorless** is a reasoning-native document engine with the core written in Rust. It will reason through any of your structured documents — **PDFs, Markdown, reports, contracts** — and retrieve only what's relevant. Nothing more, nothing less.
+**Reason, don't vector.**
 
-- **Reason, don't vector.** — Retrieval is guided by reasoning over document structure.
-- **Model fails, we fail.** — No silent degradation. No heuristic fallbacks.
-- **No thought, no answer.** — Only LLM-reasoned output counts as an answer.
+**Vectorless** is an agentic-based document engine with the core written in Rust. It will reason through any of your structured documents — **PDFs, Markdown, reports, contracts** — and retrieve only what's relevant. Nothing more, nothing less.
 
 
 ## Quick Start

diff --git a/docs/docs/sdk/python.mdx b/docs/docs/sdk/python.mdx
@@ -111,9 +111,8 @@ answer = await engine.query(
 answer = await engine.query(
     QueryContext("Explain the architecture")
     .with_doc_ids([doc_id])
-    .with_max_tokens(4000)              # Max tokens in result
-    .with_include_reasoning(True)       # Include reasoning chain
-    .with_depth_limit(10)               # Max traversal depth
+    .with_timeout_secs(60)              # Per-operation timeout
+    .with_force_analysis(True)          # Force Orchestrator analysis
 )
 ```
 

diff --git a/docs/docs/sdk/rust.mdx b/docs/docs/sdk/rust.mdx
@@ -52,14 +52,11 @@ let result = engine.index(
 
 ```rust
 use vectorless::QueryContext;
-use vectorless::StrategyPreference;
 
 let result = engine.query(
     QueryContext::new("What is the total revenue?")
         .with_doc_ids(vec![doc_id.to_string()])
-        .with_strategy(StrategyPreference::ForceHybrid)
-        .with_max_tokens(4000)
-        .with_include_reasoning(true)
+        .with_timeout_secs(60)
 ).await?;
 
 if let Some(item) = result.single() {

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "vectorless"
-version = "0.1.9"
+version = "0.1.10"
 description = "Reasoning-native document intelligence engine for AI"
 readme = "README.md"
 requires-python = ">=3.9"

diff --git a/python/README.md b/python/README.md
@@ -116,9 +116,8 @@ class QueryContext:
 
     def with_doc_ids(self, doc_ids: list[str]) -> QueryContext: ...
     def with_workspace(self) -> QueryContext: ...
-    def with_max_tokens(self, tokens: int) -> QueryContext: ...
-    def with_include_reasoning(self, include: bool) -> QueryContext: ...
-    def with_depth_limit(self, depth: int) -> QueryContext: ...
+    def with_timeout_secs(self, secs: int) -> QueryContext: ...
+    def with_force_analysis(self, force: bool) -> QueryContext: ...
 ```
 
 ### IndexResult

diff --git a/python/src/context.rs b/python/src/context.rs
@@ -44,13 +44,14 @@ pub struct PyIndexOptions {
 #[pymethods]
 impl PyIndexOptions {
     #[new]
-    #[pyo3(signature = (mode="default", generate_summaries=true, generate_description=false, generate_ids=true, enable_synonym_expansion=false))]
+    #[pyo3(signature = (mode="default", generate_summaries=true, generate_description=false, generate_ids=true, enable_synonym_expansion=true, timeout_secs=None))]
     fn new(
         mode: &str,
         generate_summaries: bool,
         generate_description: bool,
         generate_ids: bool,
         enable_synonym_expansion: bool,
+        timeout_secs: Option<u64>,
     ) -> PyResult<Self> {
         let mut opts = IndexOptions::new();
         match mode {
@@ -71,6 +72,9 @@ impl PyIndexOptions {
         opts.generate_description = generate_description;
         opts.generate_ids = generate_ids;
         opts.enable_synonym_expansion = enable_synonym_expansion;
+        if let Some(secs) = timeout_secs {
+            opts = opts.with_timeout_secs(secs);
+        }
         Ok(Self { inner: opts })
     }
 
@@ -260,9 +264,15 @@ impl PyQueryContext {
         Self { inner: ctx }
     }
 
-    /// Set the maximum tokens for the result content.
-    fn with_max_tokens(&self, tokens: usize) -> Self {
-        let ctx = self.inner.clone().with_max_tokens(tokens);
+    /// Set per-operation timeout in seconds.
+    fn with_timeout_secs(&self, secs: u64) -> Self {
+        let ctx = self.inner.clone().with_timeout_secs(secs);
+        Self { inner: ctx }
+    }
+
+    /// Force the Orchestrator to analyze documents before dispatching Workers.
+    fn with_force_analysis(&self, force: bool) -> Self {
+        let ctx = self.inner.clone().with_force_analysis(force);
         Self { inner: ctx }
     }
 

diff --git a/python/src/lib.rs b/python/src/lib.rs
@@ -20,9 +20,7 @@ use document::PyDocumentInfo;
 use engine::PyEngine;
 use error::VectorlessError;
 use graph::{PyDocumentGraph, PyDocumentGraphNode, PyEdgeEvidence, PyGraphEdge, PyWeightedKeyword};
-use metrics::{
-    PyLlmMetricsReport, PyMetricsReport, PyPilotMetricsReport, PyRetrievalMetricsReport,
-};
+use metrics::{PyLlmMetricsReport, PyMetricsReport, PyRetrievalMetricsReport};
 use results::{
     PyEvidenceItem, PyFailedItem, PyIndexItem, PyIndexMetrics, PyIndexResult, PyQueryMetrics,
     PyQueryResult, PyQueryResultItem,
@@ -59,7 +57,6 @@ fn _vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<PyEdgeEvidence>()?;
     m.add_class::<PyWeightedKeyword>()?;
     m.add_class::<PyLlmMetricsReport>()?;
-    m.add_class::<PyPilotMetricsReport>()?;
     m.add_class::<PyRetrievalMetricsReport>()?;
     m.add_class::<PyMetricsReport>()?;
     m.add_class::<PyConfig>()?;

diff --git a/python/src/metrics.rs b/python/src/metrics.rs
@@ -5,7 +5,7 @@
 
 use pyo3::prelude::*;
 
-use ::vectorless::{LlmMetricsReport, MetricsReport, PilotMetricsReport, RetrievalMetricsReport};
+use ::vectorless::{LlmMetricsReport, MetricsReport, RetrievalMetricsReport};
 
 /// LLM usage metrics report.
 #[pyclass(name = "LlmMetricsReport")]
@@ -101,106 +101,6 @@ impl PyLlmMetricsReport {
     }
 }
 
-/// Pilot decision metrics report.
-#[pyclass(name = "PilotMetricsReport")]
-pub struct PyPilotMetricsReport {
-    pub(crate) inner: PilotMetricsReport,
-}
-
-#[pymethods]
-impl PyPilotMetricsReport {
-    /// Total number of Pilot decisions.
-    #[getter]
-    fn total_decisions(&self) -> u64 {
-        self.inner.total_decisions
-    }
-
-    /// Number of start guidance calls.
-    #[getter]
-    fn start_guidance_calls(&self) -> u64 {
-        self.inner.start_guidance_calls
-    }
-
-    /// Number of fork decisions.
-    #[getter]
-    fn fork_decisions(&self) -> u64 {
-        self.inner.fork_decisions
-    }
-
-    /// Number of backtrack calls.
-    #[getter]
-    fn backtrack_calls(&self) -> u64 {
-        self.inner.backtrack_calls
-    }
-
-    /// Number of evaluate calls.
-    #[getter]
-    fn evaluate_calls(&self) -> u64 {
-        self.inner.evaluate_calls
-    }
-
-    /// Decision accuracy based on feedback (0.0 - 1.0).
-    #[getter]
-    fn accuracy(&self) -> f64 {
-        self.inner.accuracy
-    }
-
-    /// Number of correct decisions.
-    #[getter]
-    fn correct_decisions(&self) -> u64 {
-        self.inner.correct_decisions
-    }
-
-    /// Number of incorrect decisions.
-    #[getter]
-    fn incorrect_decisions(&self) -> u64 {
-        self.inner.incorrect_decisions
-    }
-
-    /// Average confidence across all decisions.
-    #[getter]
-    fn avg_confidence(&self) -> f64 {
-        self.inner.avg_confidence
-    }
-
-    /// Number of LLM calls made by Pilot.
-    #[getter]
-    fn llm_calls(&self) -> u64 {
-        self.inner.llm_calls
-    }
-
-    /// Number of interventions.
-    #[getter]
-    fn interventions(&self) -> u64 {
-        self.inner.interventions
-    }
-
-    /// Number of skipped interventions.
-    #[getter]
-    fn skipped_interventions(&self) -> u64 {
-        self.inner.skipped_interventions
-    }
-
-    /// Number of budget exhausted events.
-    #[getter]
-    fn budget_exhausted(&self) -> u64 {
-        self.inner.budget_exhausted
-    }
-
-    /// Number of algorithm fallbacks.
-    #[getter]
-    fn algorithm_fallbacks(&self) -> u64 {
-        self.inner.algorithm_fallbacks
-    }
-
-    fn __repr__(&self) -> String {
-        format!(
-            "PilotMetricsReport(decisions={}, accuracy={:.2}, avg_confidence={:.2})",
-            self.inner.total_decisions, self.inner.accuracy, self.inner.avg_confidence,
-        )
-    }
-}
-
 /// Retrieval operation metrics report.
 #[pyclass(name = "RetrievalMetricsReport")]
 pub struct PyRetrievalMetricsReport {
@@ -337,14 +237,6 @@ impl PyMetricsReport {
         }
     }
 
-    /// Pilot metrics.
-    #[getter]
-    fn pilot(&self) -> PyPilotMetricsReport {
-        PyPilotMetricsReport {
-            inner: self.inner.pilot.clone(),
-        }
-    }
-
     /// Retrieval metrics.
     #[getter]
     fn retrieval(&self) -> PyRetrievalMetricsReport {

diff --git a/python/vectorless/cli/commands/query.py b/python/vectorless/cli/commands/query.py
@@ -12,7 +12,7 @@ def query_cmd(
     workspace_scope: bool = False,
     fmt: str = "text",
     verbose: bool = False,
-    max_tokens: Optional[int] = None,
+    timeout_secs: Optional[int] = None,
 ) -> None:
     """Execute a single query against indexed documents.
 
@@ -22,19 +22,12 @@ def query_cmd(
         workspace_scope: Query across all documents.
         fmt: Output format — "text" or "json".
         verbose: Show Agent navigation steps.
-        max_tokens: Max result tokens.
+        timeout_secs: Per-operation timeout in seconds.
 
     Uses:
         Engine.query(QueryContext(question)
             .with_doc_ids([...])  or  .with_workspace()
-            .with_max_tokens(n))
+            .with_timeout_secs(n))
         -> QueryResult
-
-    Verbose mode prints Agent navigation:
-        [1/8] Bird's-eye: 3 top-level branches
-        [2/8] Descend → payment-configuration
-        [3/8] GetContent → doc 29139b
-        [4/8] Evaluate → sufficient
-        → Answer: ...
     """
     raise NotImplementedError
diff --git a/rust/examples/flow.rs b/rust/examples/flow.rs
@@ -42,7 +42,7 @@ The indexing pipeline processes documents through multiple stages: parsing, tree
 
 ### Retrieval Engine
 
-The retrieval engine supports multiple search strategies including greedy depth-first search, beam search, and MCTS. A Pilot component provides LLM-guided navigation at key decision points during tree traversal. The engine is budget-aware, tracking token usage and making cost-conscious decisions about when to invoke the LLM versus using cheaper heuristic scoring.
+The retrieval engine uses an agent-based architecture where an Orchestrator coordinates Workers that navigate the document tree using LLM-guided decisions (ls, cd, cat, find, grep). The Orchestrator evaluates progress after each step and can replan when results are insufficient. The engine is budget-aware, tracking token usage and making cost-conscious decisions about when to invoke the LLM versus using cheaper heuristic scoring.
 
 ## Performance