From a98e3134e4aeee0daef51f549245af9d055147b7 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Tue, 21 Apr 2026 00:31:31 +0800
Subject: [PATCH 1/5] feat(python): add timeout_secs option and remove pilot
 metrics from PyIndexOptions

- Add timeout_secs parameter to PyIndexOptions constructor with optional u64 value
- Implement timeout setting logic using with_timeout_secs method
- Remove Pilot metrics report functionality from Python bindings
- Remove PyPilotMetricsReport class registration from module
- Update method documentation to reflect timeout functionality instead of max tokens

BREAKING CHANGE: Removed pilot metrics reporting functionality
---
 docs/docs/sdk/python.mdx                |   5 +-
 docs/docs/sdk/rust.mdx                  |   5 +-
 python/README.md                        |   5 +-
 python/src/context.rs                   |  18 +-
 python/src/lib.rs                       |   5 +-
 python/src/metrics.rs                   | 110 +----------
 python/vectorless/cli/commands/query.py |  13 +-
 rust/src/client/query_context.rs        |  33 ----
 rust/src/config/mod.rs                  |   3 +-
 rust/src/config/types/llm_pool.rs       |  26 +--
 rust/src/config/types/metrics.rs        |  31 ---
 rust/src/config/types/mod.rs            |   4 +-
 rust/src/config/validator.rs            |   7 -
 rust/src/lib.rs                         |   4 +-
 rust/src/llm/pool.rs                    |  12 --
 rust/src/metrics/hub.rs                 |  84 +-------
 rust/src/metrics/mod.rs                 |  22 +--
 rust/src/metrics/pilot.rs               | 246 ------------------------
 18 files changed, 38 insertions(+), 595 deletions(-)
 delete mode 100644 rust/src/metrics/pilot.rs

diff --git a/docs/docs/sdk/python.mdx b/docs/docs/sdk/python.mdx
index bc3ae0ba..d26b6f4e 100644
--- a/docs/docs/sdk/python.mdx
+++ b/docs/docs/sdk/python.mdx
@@ -111,9 +111,8 @@ answer = await engine.query(
 answer = await engine.query(
     QueryContext("Explain the architecture")
     .with_doc_ids([doc_id])
-    .with_max_tokens(4000)              # Max tokens in result
-    .with_include_reasoning(True)       # Include reasoning chain
-    .with_depth_limit(10)               # Max traversal depth
+    .with_timeout_secs(60)              # Per-operation timeout
+    .with_force_analysis(True)          # Force Orchestrator analysis
 )
 ```
 
diff --git a/docs/docs/sdk/rust.mdx b/docs/docs/sdk/rust.mdx
index 1117302d..c9e3efc1 100644
--- a/docs/docs/sdk/rust.mdx
+++ b/docs/docs/sdk/rust.mdx
@@ -52,14 +52,11 @@ let result = engine.index(
 
 ```rust
 use vectorless::QueryContext;
-use vectorless::StrategyPreference;
 
 let result = engine.query(
     QueryContext::new("What is the total revenue?")
         .with_doc_ids(vec![doc_id.to_string()])
-        .with_strategy(StrategyPreference::ForceHybrid)
-        .with_max_tokens(4000)
-        .with_include_reasoning(true)
+        .with_timeout_secs(60)
 ).await?;
 
 if let Some(item) = result.single() {
diff --git a/python/README.md b/python/README.md
index 4ca5fa40..be4761d0 100644
--- a/python/README.md
+++ b/python/README.md
@@ -116,9 +116,8 @@ class QueryContext:
 
     def with_doc_ids(self, doc_ids: list[str]) -> QueryContext: ...
     def with_workspace(self) -> QueryContext: ...
-    def with_max_tokens(self, tokens: int) -> QueryContext: ...
-    def with_include_reasoning(self, include: bool) -> QueryContext: ...
-    def with_depth_limit(self, depth: int) -> QueryContext: ...
+    def with_timeout_secs(self, secs: int) -> QueryContext: ...
+    def with_force_analysis(self, force: bool) -> QueryContext: ...
 ```
 
 ### IndexResult
diff --git a/python/src/context.rs b/python/src/context.rs
index bdf37c6c..2bf0ae94 100644
--- a/python/src/context.rs
+++ b/python/src/context.rs
@@ -44,13 +44,14 @@ pub struct PyIndexOptions {
 #[pymethods]
 impl PyIndexOptions {
     #[new]
-    #[pyo3(signature = (mode="default", generate_summaries=true, generate_description=false, generate_ids=true, enable_synonym_expansion=false))]
+    #[pyo3(signature = (mode="default", generate_summaries=true, generate_description=false, generate_ids=true, enable_synonym_expansion=true, timeout_secs=None))]
     fn new(
         mode: &str,
         generate_summaries: bool,
         generate_description: bool,
         generate_ids: bool,
         enable_synonym_expansion: bool,
+        timeout_secs: Option<u64>,
     ) -> PyResult<Self> {
         let mut opts = IndexOptions::new();
         match mode {
@@ -71,6 +72,9 @@ impl PyIndexOptions {
         opts.generate_description = generate_description;
         opts.generate_ids = generate_ids;
         opts.enable_synonym_expansion = enable_synonym_expansion;
+        if let Some(secs) = timeout_secs {
+            opts = opts.with_timeout_secs(secs);
+        }
         Ok(Self { inner: opts })
     }
 
@@ -260,9 +264,15 @@ impl PyQueryContext {
         Self { inner: ctx }
     }
 
-    /// Set the maximum tokens for the result content.
-    fn with_max_tokens(&self, tokens: usize) -> Self {
-        let ctx = self.inner.clone().with_max_tokens(tokens);
+    /// Set per-operation timeout in seconds.
+    fn with_timeout_secs(&self, secs: u64) -> Self {
+        let ctx = self.inner.clone().with_timeout_secs(secs);
+        Self { inner: ctx }
+    }
+
+    /// Force the Orchestrator to analyze documents before dispatching Workers.
+    fn with_force_analysis(&self, force: bool) -> Self {
+        let ctx = self.inner.clone().with_force_analysis(force);
         Self { inner: ctx }
     }
 
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 6a7eb913..a3951cd5 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -20,9 +20,7 @@ use document::PyDocumentInfo;
 use engine::PyEngine;
 use error::VectorlessError;
 use graph::{PyDocumentGraph, PyDocumentGraphNode, PyEdgeEvidence, PyGraphEdge, PyWeightedKeyword};
-use metrics::{
-    PyLlmMetricsReport, PyMetricsReport, PyPilotMetricsReport, PyRetrievalMetricsReport,
-};
+use metrics::{PyLlmMetricsReport, PyMetricsReport, PyRetrievalMetricsReport};
 use results::{
     PyEvidenceItem, PyFailedItem, PyIndexItem, PyIndexMetrics, PyIndexResult, PyQueryMetrics,
     PyQueryResult, PyQueryResultItem,
@@ -59,7 +57,6 @@ fn _vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<PyEdgeEvidence>()?;
     m.add_class::<PyWeightedKeyword>()?;
     m.add_class::<PyLlmMetricsReport>()?;
-    m.add_class::<PyPilotMetricsReport>()?;
     m.add_class::<PyRetrievalMetricsReport>()?;
     m.add_class::<PyMetricsReport>()?;
     m.add_class::<PyConfig>()?;
diff --git a/python/src/metrics.rs b/python/src/metrics.rs
index 27a71dcb..f194cd82 100644
--- a/python/src/metrics.rs
+++ b/python/src/metrics.rs
@@ -5,7 +5,7 @@
 
 use pyo3::prelude::*;
 
-use ::vectorless::{LlmMetricsReport, MetricsReport, PilotMetricsReport, RetrievalMetricsReport};
+use ::vectorless::{LlmMetricsReport, MetricsReport, RetrievalMetricsReport};
 
 /// LLM usage metrics report.
 #[pyclass(name = "LlmMetricsReport")]
@@ -101,106 +101,6 @@ impl PyLlmMetricsReport {
     }
 }
 
-/// Pilot decision metrics report.
-#[pyclass(name = "PilotMetricsReport")]
-pub struct PyPilotMetricsReport {
-    pub(crate) inner: PilotMetricsReport,
-}
-
-#[pymethods]
-impl PyPilotMetricsReport {
-    /// Total number of Pilot decisions.
-    #[getter]
-    fn total_decisions(&self) -> u64 {
-        self.inner.total_decisions
-    }
-
-    /// Number of start guidance calls.
-    #[getter]
-    fn start_guidance_calls(&self) -> u64 {
-        self.inner.start_guidance_calls
-    }
-
-    /// Number of fork decisions.
-    #[getter]
-    fn fork_decisions(&self) -> u64 {
-        self.inner.fork_decisions
-    }
-
-    /// Number of backtrack calls.
-    #[getter]
-    fn backtrack_calls(&self) -> u64 {
-        self.inner.backtrack_calls
-    }
-
-    /// Number of evaluate calls.
-    #[getter]
-    fn evaluate_calls(&self) -> u64 {
-        self.inner.evaluate_calls
-    }
-
-    /// Decision accuracy based on feedback (0.0 - 1.0).
-    #[getter]
-    fn accuracy(&self) -> f64 {
-        self.inner.accuracy
-    }
-
-    /// Number of correct decisions.
-    #[getter]
-    fn correct_decisions(&self) -> u64 {
-        self.inner.correct_decisions
-    }
-
-    /// Number of incorrect decisions.
-    #[getter]
-    fn incorrect_decisions(&self) -> u64 {
-        self.inner.incorrect_decisions
-    }
-
-    /// Average confidence across all decisions.
-    #[getter]
-    fn avg_confidence(&self) -> f64 {
-        self.inner.avg_confidence
-    }
-
-    /// Number of LLM calls made by Pilot.
-    #[getter]
-    fn llm_calls(&self) -> u64 {
-        self.inner.llm_calls
-    }
-
-    /// Number of interventions.
-    #[getter]
-    fn interventions(&self) -> u64 {
-        self.inner.interventions
-    }
-
-    /// Number of skipped interventions.
-    #[getter]
-    fn skipped_interventions(&self) -> u64 {
-        self.inner.skipped_interventions
-    }
-
-    /// Number of budget exhausted events.
-    #[getter]
-    fn budget_exhausted(&self) -> u64 {
-        self.inner.budget_exhausted
-    }
-
-    /// Number of algorithm fallbacks.
-    #[getter]
-    fn algorithm_fallbacks(&self) -> u64 {
-        self.inner.algorithm_fallbacks
-    }
-
-    fn __repr__(&self) -> String {
-        format!(
-            "PilotMetricsReport(decisions={}, accuracy={:.2}, avg_confidence={:.2})",
-            self.inner.total_decisions, self.inner.accuracy, self.inner.avg_confidence,
-        )
-    }
-}
-
 /// Retrieval operation metrics report.
 #[pyclass(name = "RetrievalMetricsReport")]
 pub struct PyRetrievalMetricsReport {
@@ -337,14 +237,6 @@ impl PyMetricsReport {
         }
     }
 
-    /// Pilot metrics.
-    #[getter]
-    fn pilot(&self) -> PyPilotMetricsReport {
-        PyPilotMetricsReport {
-            inner: self.inner.pilot.clone(),
-        }
-    }
-
     /// Retrieval metrics.
     #[getter]
     fn retrieval(&self) -> PyRetrievalMetricsReport {
diff --git a/python/vectorless/cli/commands/query.py b/python/vectorless/cli/commands/query.py
index f928ab46..79638666 100644
--- a/python/vectorless/cli/commands/query.py
+++ b/python/vectorless/cli/commands/query.py
@@ -12,7 +12,7 @@ def query_cmd(
     workspace_scope: bool = False,
     fmt: str = "text",
     verbose: bool = False,
-    max_tokens: Optional[int] = None,
+    timeout_secs: Optional[int] = None,
 ) -> None:
     """Execute a single query against indexed documents.
 
@@ -22,19 +22,12 @@ def query_cmd(
         workspace_scope: Query across all documents.
         fmt: Output format — "text" or "json".
         verbose: Show Agent navigation steps.
-        max_tokens: Max result tokens.
+        timeout_secs: Per-operation timeout in seconds.
 
     Uses:
         Engine.query(QueryContext(question)
             .with_doc_ids([...])  or  .with_workspace()
-            .with_max_tokens(n))
+            .with_timeout_secs(n))
         -> QueryResult
-
-    Verbose mode prints Agent navigation:
-        [1/8] Bird's-eye: 3 top-level branches
-        [2/8] Descend → payment-configuration
-        [3/8] GetContent → doc 29139b
-        [4/8] Evaluate → sufficient
-        → Answer: ...
     """
     raise NotImplementedError
diff --git a/rust/src/client/query_context.rs b/rust/src/client/query_context.rs
index e9513315..48d9ad2a 100644
--- a/rust/src/client/query_context.rs
+++ b/rust/src/client/query_context.rs
@@ -49,12 +49,6 @@ pub struct QueryContext {
     pub(crate) query: String,
     /// Target scope.
     pub(crate) scope: QueryScope,
-    /// Maximum tokens for the result content.
-    pub(crate) max_tokens: Option<usize>,
-    /// Whether to include the pilot reasoning chain in the result.
-    pub(crate) include_reasoning: bool,
-    /// Maximum tree traversal depth for the pilot.
-    pub(crate) depth_limit: Option<usize>,
     /// Per-operation timeout (seconds). `None` means no timeout.
     pub(crate) timeout_secs: Option<u64>,
     /// Force Orchestrator analysis even when documents are specified.
@@ -72,9 +66,6 @@ impl QueryContext {
         Self {
             query: query.into(),
             scope: QueryScope::Workspace,
-            max_tokens: None,
-            include_reasoning: true,
-            depth_limit: None,
             timeout_secs: None,
             force_analysis: false,
         }
@@ -95,24 +86,6 @@ impl QueryContext {
         self
     }
 
-    /// Set the maximum tokens for the result content.
-    pub fn with_max_tokens(mut self, tokens: usize) -> Self {
-        self.max_tokens = Some(tokens);
-        self
-    }
-
-    /// Set whether to include the pilot reasoning chain.
-    pub fn with_include_reasoning(mut self, include: bool) -> Self {
-        self.include_reasoning = include;
-        self
-    }
-
-    /// Set the maximum tree traversal depth for the pilot.
-    pub fn with_depth_limit(mut self, depth: usize) -> Self {
-        self.depth_limit = Some(depth);
-        self
-    }
-
     /// Set per-operation timeout in seconds.
     pub fn with_timeout_secs(mut self, secs: u64) -> Self {
         self.timeout_secs = Some(secs);
@@ -193,14 +166,8 @@ mod tests {
     fn test_builder_options() {
         let ctx = QueryContext::new("test")
             .with_doc_ids(vec!["doc-1".to_string()])
-            .with_max_tokens(4000)
-            .with_include_reasoning(false)
-            .with_depth_limit(5)
             .with_timeout_secs(60);
 
-        assert_eq!(ctx.max_tokens, Some(4000));
-        assert!(!ctx.include_reasoning);
-        assert_eq!(ctx.depth_limit, Some(5));
         assert_eq!(ctx.timeout_secs, Some(60));
     }
 
diff --git a/rust/src/config/mod.rs b/rust/src/config/mod.rs
index 26c73ac3..3ece0fe6 100644
--- a/rust/src/config/mod.rs
+++ b/rust/src/config/mod.rs
@@ -12,6 +12,5 @@ mod validator;
 pub use types::Config;
 pub(crate) use types::{
     CompressionAlgorithm, FallbackBehavior, FallbackConfig, IndexerConfig, LlmConfig,
-    LlmMetricsConfig, MetricsConfig, OnAllFailedBehavior, PilotMetricsConfig,
-    RetrievalMetricsConfig, SlotConfig,
+    LlmMetricsConfig, MetricsConfig, OnAllFailedBehavior, RetrievalMetricsConfig, SlotConfig,
 };
diff --git a/rust/src/config/types/llm_pool.rs b/rust/src/config/types/llm_pool.rs
index fc092a12..b38497aa 100644
--- a/rust/src/config/types/llm_pool.rs
+++ b/rust/src/config/types/llm_pool.rs
@@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize};
 ///
 /// Contains:
 /// - Global credentials (`api_key`, `model`, `endpoint`)
-/// - Per-purpose slot overrides (`index`, `retrieval`, `pilot`)
+/// - Per-purpose slot overrides (`index`, `retrieval`)
 /// - Infrastructure settings (`retry`, `throttle`, `fallback`)
 ///
 /// # Simple usage (via EngineBuilder)
@@ -71,11 +71,6 @@ pub struct LlmConfig {
     #[serde(default = "default_retrieval_slot")]
     pub retrieval: SlotConfig,
 
-    /// Pilot slot (navigation guidance).
-    /// Uses a fast model with higher token limit.
-    #[serde(default = "default_pilot_slot")]
-    pub pilot: SlotConfig,
-
     /// Retry configuration for LLM calls.
     #[serde(default)]
     pub retry: RetryConfig,
@@ -96,13 +91,6 @@ fn default_retrieval_slot() -> SlotConfig {
     }
 }
 
-fn default_pilot_slot() -> SlotConfig {
-    SlotConfig {
-        max_tokens: 300,
-        ..SlotConfig::default()
-    }
-}
-
 impl Default for LlmConfig {
     fn default() -> Self {
         Self {
@@ -111,7 +99,6 @@ impl Default for LlmConfig {
             endpoint: None,
             index: SlotConfig::default(),
             retrieval: default_retrieval_slot(),
-            pilot: default_pilot_slot(),
             retry: RetryConfig::default(),
             throttle: ThrottleConfig::default(),
             fallback: FallbackConfig::default(),
@@ -158,12 +145,6 @@ impl LlmConfig {
         self
     }
 
-    /// Set the pilot slot configuration.
-    pub fn with_pilot(mut self, slot: SlotConfig) -> Self {
-        self.pilot = slot;
-        self
-    }
-
     /// Set the retry configuration.
     pub fn with_retry(mut self, retry: RetryConfig) -> Self {
         self.retry = retry;
@@ -199,7 +180,7 @@ impl LlmConfig {
 /// Per-purpose LLM slot override.
 ///
 /// Controls model selection and generation parameters for a specific
-/// LLM usage (index, retrieval, or pilot).
+/// LLM usage (index or retrieval).
 ///
 /// - `model`: Override the default model (optional).
 /// - `max_tokens`: Maximum response tokens.
@@ -591,10 +572,8 @@ mod tests {
         assert!(config.endpoint.is_none());
         assert!(config.index.model.is_none());
         assert!(config.retrieval.model.is_none());
-        assert!(config.pilot.model.is_none());
         assert_eq!(config.index.max_tokens, 200);
         assert_eq!(config.retrieval.max_tokens, 100);
-        assert_eq!(config.pilot.max_tokens, 300);
     }
 
     #[test]
@@ -617,7 +596,6 @@ mod tests {
 
         assert_eq!(config.resolve_model(&config.index), "gpt-4o");
         assert_eq!(config.resolve_model(&config.retrieval), "gpt-4o-mini");
-        assert_eq!(config.resolve_model(&config.pilot), "gpt-4o");
     }
 
     #[test]
diff --git a/rust/src/config/types/metrics.rs b/rust/src/config/types/metrics.rs
index 230686ac..c1f4e766 100644
--- a/rust/src/config/types/metrics.rs
+++ b/rust/src/config/types/metrics.rs
@@ -24,10 +24,6 @@ pub struct MetricsConfig {
     #[serde(default)]
     pub llm: LlmMetricsConfig,
 
-    /// Pilot metrics configuration.
-    #[serde(default)]
-    pub pilot: PilotMetricsConfig,
-
     /// Retrieval metrics configuration.
     #[serde(default)]
     pub retrieval: RetrievalMetricsConfig,
@@ -52,7 +48,6 @@ impl Default for MetricsConfig {
             storage_path: default_storage_path(),
             retention_days: default_retention_days(),
             llm: LlmMetricsConfig::default(),
-            pilot: PilotMetricsConfig::default(),
             retrieval: RetrievalMetricsConfig::default(),
         }
     }
@@ -125,32 +120,6 @@ impl LlmMetricsConfig {
     }
 }
 
-/// Pilot-specific metrics configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct PilotMetricsConfig {
-    /// Track Pilot decisions.
-    #[serde(default = "default_true")]
-    pub track_decisions: bool,
-
-    /// Track decision accuracy (requires feedback).
-    #[serde(default = "default_true")]
-    pub track_accuracy: bool,
-
-    /// Track user feedback.
-    #[serde(default = "default_true")]
-    pub track_feedback: bool,
-}
-
-impl Default for PilotMetricsConfig {
-    fn default() -> Self {
-        Self {
-            track_decisions: default_true(),
-            track_accuracy: default_true(),
-            track_feedback: default_true(),
-        }
-    }
-}
-
 /// Retrieval-specific metrics configuration.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RetrievalMetricsConfig {
diff --git a/rust/src/config/types/mod.rs b/rust/src/config/types/mod.rs
index b4421110..e6ba3f8b 100644
--- a/rust/src/config/types/mod.rs
+++ b/rust/src/config/types/mod.rs
@@ -15,9 +15,7 @@ pub(crate) use indexer::IndexerConfig;
 pub(crate) use llm_pool::{
     FallbackBehavior, FallbackConfig, LlmConfig, OnAllFailedBehavior, SlotConfig,
 };
-pub(crate) use metrics::{
-    LlmMetricsConfig, MetricsConfig, PilotMetricsConfig, RetrievalMetricsConfig,
-};
+pub(crate) use metrics::{LlmMetricsConfig, MetricsConfig, RetrievalMetricsConfig};
 pub(crate) use retrieval::RetrievalConfig;
 pub(crate) use storage::{CompressionAlgorithm, StorageConfig};
 
diff --git a/rust/src/config/validator.rs b/rust/src/config/validator.rs
index 5d08c89a..c3f55422 100644
--- a/rust/src/config/validator.rs
+++ b/rust/src/config/validator.rs
@@ -97,13 +97,6 @@ impl ValidationRule for RangeValidator {
             ));
         }
 
-        if config.llm.pilot.max_tokens == 0 {
-            errors.push(ValidationError::error(
-                "llm.pilot.max_tokens",
-                "Pilot max tokens must be greater than 0",
-            ));
-        }
-
         // Retrieval ranges
         if config.retrieval.top_k == 0 {
             errors.push(ValidationError::error(
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index 9ddc9b35..e9aa745c 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -83,9 +83,7 @@ pub use graph::{DocumentGraph, DocumentGraphNode, EdgeEvidence, GraphEdge, Weigh
 pub use events::{EventEmitter, IndexEvent, QueryEvent, WorkspaceEvent};
 
 // Metrics
-pub use metrics::{
-    IndexMetrics, LlmMetricsReport, MetricsReport, PilotMetricsReport, RetrievalMetricsReport,
-};
+pub use metrics::{IndexMetrics, LlmMetricsReport, MetricsReport, RetrievalMetricsReport};
 
 // Errors
 pub use error::{Error, Result};
diff --git a/rust/src/llm/pool.rs b/rust/src/llm/pool.rs
index 09bb4acd..9acef8ca 100644
--- a/rust/src/llm/pool.rs
+++ b/rust/src/llm/pool.rs
@@ -39,7 +39,6 @@ use crate::metrics::MetricsHub;
 pub struct LlmPool {
     index: Arc<LlmClient>,
     retrieval: Arc<LlmClient>,
-    pilot: Arc<LlmClient>,
 }
 
 impl LlmPool {
@@ -103,7 +102,6 @@ impl LlmPool {
         Self {
             index: build_client(&config.index),
             retrieval: build_client(&config.retrieval),
-            pilot: build_client(&config.pilot),
         }
     }
 
@@ -121,11 +119,6 @@ impl LlmPool {
     pub fn retrieval(&self) -> &LlmClient {
         &self.retrieval
     }
-
-    /// Get the pilot client.
-    pub fn pilot(&self) -> &LlmClient {
-        &self.pilot
-    }
 }
 
 impl Default for LlmPool {
@@ -149,7 +142,6 @@ mod tests {
 
         assert_eq!(pool.index().config().model, "gpt-4o-mini");
         assert_eq!(pool.retrieval().config().model, "gpt-4o");
-        assert_eq!(pool.pilot().config().model, "gpt-4o");
         assert_eq!(pool.index().config().max_tokens, 100);
     }
 
@@ -162,15 +154,11 @@ mod tests {
         let hub = MetricsHub::shared();
         let pool = LlmPool::from_config(&config, Some(hub.clone()));
 
-        // Verify each client has fallback (which means executor was built correctly)
         assert!(pool.index().fallback().is_some());
         assert!(pool.retrieval().fallback().is_some());
-        assert!(pool.pilot().fallback().is_some());
 
-        // Verify models are resolved correctly
         assert_eq!(pool.index().config().model, "gpt-4o");
         assert_eq!(pool.retrieval().config().model, "gpt-4o");
-        assert_eq!(pool.pilot().config().model, "gpt-4o");
     }
 
     #[test]
diff --git a/rust/src/metrics/hub.rs b/rust/src/metrics/hub.rs
index c00471cc..c4f70fe4 100644
--- a/rust/src/metrics/hub.rs
+++ b/rust/src/metrics/hub.rs
@@ -6,7 +6,6 @@
 use std::sync::Arc;
 
 use super::llm::{LlmMetrics, LlmMetricsReport};
-use super::pilot::{InterventionPoint, PilotMetrics, PilotMetricsReport};
 use super::retrieval::{RetrievalMetrics, RetrievalMetricsReport};
 use crate::config::MetricsConfig;
 
@@ -14,7 +13,6 @@ use crate::config::MetricsConfig;
 ///
 /// Provides a single point for all metrics collection across:
 /// - LLM operations (tokens, latency, cost)
-/// - Pilot decisions (accuracy, confidence, feedback)
 /// - Retrieval operations (paths, scores, cache)
 ///
 /// # Thread Safety
@@ -24,7 +22,7 @@ use crate::config::MetricsConfig;
 /// # Example
 ///
 /// ```rust
-/// use vectorless::metrics::{MetricsHub, MetricsConfig, InterventionPoint};
+/// use vectorless::metrics::{MetricsHub, MetricsConfig};
 ///
 /// let config = MetricsConfig::default();
 /// let hub = MetricsHub::new(config);
@@ -32,9 +30,6 @@ use crate::config::MetricsConfig;
 /// // Record LLM call
 /// hub.record_llm_call(100, 50, 150, true);
 ///
-/// // Record Pilot decision
-/// hub.record_pilot_decision(0.85, InterventionPoint::Fork);
-///
 /// // Get report
 /// let report = hub.generate_report();
 /// ```
@@ -42,7 +37,6 @@ use crate::config::MetricsConfig;
 pub struct MetricsHub {
     config: MetricsConfig,
     llm: LlmMetrics,
-    pilot: PilotMetrics,
     retrieval: RetrievalMetrics,
 }
 
@@ -52,7 +46,6 @@ impl MetricsHub {
         Self {
             config,
             llm: LlmMetrics::new(),
-            pilot: PilotMetrics::new(),
             retrieval: RetrievalMetrics::new(),
         }
     }
@@ -132,67 +125,6 @@ impl MetricsHub {
         self.llm.generate_report()
     }
 
-    // ========================================================================
-    // Pilot Metrics
-    // ========================================================================
-
-    /// Record a Pilot decision.
-    pub fn record_pilot_decision(&self, confidence: f64, point: InterventionPoint) {
-        if !self.config.enabled || !self.config.pilot.track_decisions {
-            return;
-        }
-        self.pilot
-            .record_decision(confidence, point, &self.config.pilot);
-    }
-
-    /// Record feedback on a Pilot decision.
-    pub fn record_pilot_feedback(&self, was_correct: bool) {
-        if !self.config.enabled || !self.config.pilot.track_feedback {
-            return;
-        }
-        self.pilot.record_feedback(was_correct, &self.config.pilot);
-    }
-
-    /// Record a Pilot LLM call.
-    pub fn record_pilot_llm_call(&self) {
-        if self.config.enabled {
-            self.pilot.record_llm_call();
-        }
-    }
-
-    /// Record a Pilot intervention.
-    pub fn record_pilot_intervention(&self) {
-        if self.config.enabled {
-            self.pilot.record_intervention();
-        }
-    }
-
-    /// Record a skipped Pilot intervention.
-    pub fn record_pilot_intervention_skipped(&self) {
-        if self.config.enabled {
-            self.pilot.record_skipped_intervention();
-        }
-    }
-
-    /// Record Pilot budget exhausted.
-    pub fn record_pilot_budget_exhausted(&self) {
-        if self.config.enabled {
-            self.pilot.record_budget_exhausted();
-        }
-    }
-
-    /// Record Pilot fallback to algorithm.
-    pub fn record_pilot_algorithm_fallback(&self) {
-        if self.config.enabled {
-            self.pilot.record_algorithm_fallback();
-        }
-    }
-
-    /// Get Pilot metrics report.
-    pub fn pilot_report(&self) -> PilotMetricsReport {
-        self.pilot.generate_report()
-    }
-
     // ========================================================================
     // Retrieval Metrics
     // ========================================================================
@@ -261,7 +193,6 @@ impl MetricsHub {
     /// Reset all metrics.
     pub fn reset(&self) {
         self.llm.reset();
-        self.pilot.reset();
         self.retrieval.reset();
     }
 
@@ -269,7 +200,6 @@ impl MetricsHub {
     pub fn generate_report(&self) -> MetricsReport {
         MetricsReport {
             llm: self.llm_report(),
-            pilot: self.pilot_report(),
             retrieval: self.retrieval_report(),
         }
     }
@@ -286,8 +216,6 @@ impl Default for MetricsHub {
 pub struct MetricsReport {
     /// LLM metrics.
     pub llm: LlmMetricsReport,
-    /// Pilot metrics.
-    pub pilot: PilotMetricsReport,
     /// Retrieval metrics.
     pub retrieval: RetrievalMetricsReport,
 }
@@ -307,15 +235,12 @@ mod tests {
     fn test_metrics_hub_recording() {
         let hub = MetricsHub::with_defaults();
 
-        // Record various metrics
         hub.record_llm_call(100, 50, 150, true);
-        hub.record_pilot_decision(0.9, InterventionPoint::Fork);
         hub.record_retrieval_query(5, 10, 100);
 
         let report = hub.generate_report();
 
         assert_eq!(report.llm.total_calls, 1);
-        assert_eq!(report.pilot.total_decisions, 1);
         assert_eq!(report.retrieval.total_queries, 1);
     }
 
@@ -325,12 +250,10 @@ mod tests {
         let hub = MetricsHub::new(config);
 
         hub.record_llm_call(100, 50, 150, true);
-        hub.record_pilot_decision(0.9, InterventionPoint::Fork);
 
         let report = hub.generate_report();
 
         assert_eq!(report.llm.total_calls, 0);
-        assert_eq!(report.pilot.total_decisions, 0);
     }
 
     #[test]
@@ -348,11 +271,8 @@ mod tests {
     fn test_llm_metrics_success_and_failure() {
         let hub = MetricsHub::with_defaults();
 
-        // Record successes
         hub.record_llm_call(100, 50, 150, true);
         hub.record_llm_call(200, 100, 300, true);
-
-        // Record failure
         hub.record_llm_call(0, 0, 50, false);
 
         let report = hub.llm_report();
@@ -383,7 +303,6 @@ mod tests {
     fn test_shared_arc_metrics() {
         let hub = MetricsHub::shared();
 
-        // Clone the Arc — both references point to the same hub
         let hub2 = hub.clone();
         hub.record_llm_call(100, 50, 100, true);
         hub2.record_llm_call(200, 100, 200, true);
@@ -400,7 +319,6 @@ mod tests {
         hub.record_llm_call(1000, 500, 200, true);
 
         let report = hub.generate_report();
-        // Cost should be positive (exact value depends on config pricing)
         assert!(report.total_cost_usd() >= 0.0);
     }
 }
diff --git a/rust/src/metrics/mod.rs b/rust/src/metrics/mod.rs
index bc724988..26ab6411 100644
--- a/rust/src/metrics/mod.rs
+++ b/rust/src/metrics/mod.rs
@@ -5,7 +5,6 @@
 //!
 //! This module provides centralized metrics collection across all components:
 //! - **LLM Metrics** — Token usage, latency, cost
-//! - **Pilot Metrics** — Decisions, accuracy, feedback
 //! - **Retrieval Metrics** — Paths, scores, iterations, cache
 //!
 //! # Architecture
@@ -14,13 +13,13 @@
 //! ┌─────────────────────────────────────────────────────────────────┐
 //! │                        MetricsHub                                │
 //! │                                                                  │
-//! │   ┌─────────────┐   ┌─────────────┐   ┌─────────────┐          │
-//! │   │  LlmMetrics │   │PilotMetrics │   │RetrievalMetrics│        │
-//! │   │             │   │             │   │             │          │
-//! │   │ - tokens    │   │ - decisions │   │ - paths     │          │
-//! │   │ - latency   │   │ - accuracy  │   │ - scores    │          │
-//! │   │ - cost      │   │ - feedback  │   │ - cache     │          │
-//! │   └─────────────┘   └─────────────┘   └─────────────┘          │
+//! │   ┌─────────────┐   ┌──────────────────┐                       │
+//! │   │  LlmMetrics │   │RetrievalMetrics  │                       │
+//! │   │             │   │                  │                       │
+//! │   │ - tokens    │   │ - paths          │                       │
+//! │   │ - latency   │   │ - scores         │                       │
+//! │   │ - cost      │   │ - cache          │                       │
+//! │   └─────────────┘   └──────────────────┘                       │
 //! │                                                                  │
 //! │   ┌─────────────────────────────────────────────────────────┐  │
 //! │   │                    MetricsReport                         │  │
@@ -33,7 +32,7 @@
 //! # Example
 //!
 //! ```rust
-//! use vectorless::metrics::{MetricsHub, MetricsConfig, InterventionPoint};
+//! use vectorless::metrics::{MetricsHub, MetricsConfig};
 //!
 //! let config = MetricsConfig::default();
 //! let hub = MetricsHub::new(config);
@@ -41,9 +40,6 @@
 //! // Record LLM call
 //! hub.record_llm_call(100, 50, 150, true);
 //!
-//! // Record Pilot decision
-//! hub.record_pilot_decision(0.85, InterventionPoint::Fork);
-//!
 //! // Generate report
 //! let report = hub.generate_report();
 //! println!("Total cost: ${:.4}", report.llm.estimated_cost_usd);
@@ -52,11 +48,9 @@
 mod hub;
 mod index;
 mod llm;
-mod pilot;
 mod retrieval;
 
 pub use hub::{MetricsHub, MetricsReport};
 pub use index::IndexMetrics;
 pub use llm::LlmMetricsReport;
-pub use pilot::PilotMetricsReport;
 pub use retrieval::RetrievalMetricsReport;
diff --git a/rust/src/metrics/pilot.rs b/rust/src/metrics/pilot.rs
deleted file mode 100644
index f8365e45..00000000
--- a/rust/src/metrics/pilot.rs
+++ /dev/null
@@ -1,246 +0,0 @@
-// Copyright (c) 2026 vectorless developers
-// SPDX-License-Identifier: Apache-2.0
-
-//! Pilot metrics collection.
-
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use crate::config::PilotMetricsConfig;
-
-/// Intervention point type.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum InterventionPoint {
-    /// At search start.
-    Start,
-    /// At a fork (multiple candidates).
-    Fork,
-    /// During backtracking.
-    Backtrack,
-    /// Evaluating content sufficiency.
-    Evaluate,
-    /// Binary pruning for wide nodes.
-    Prune,
-}
-
-/// Pilot metrics tracker.
-#[derive(Debug, Default)]
-pub struct PilotMetrics {
-    /// Total number of Pilot decisions.
-    pub total_decisions: AtomicU64,
-    /// Number of start guidance calls.
-    pub start_guidance_calls: AtomicU64,
-    /// Number of fork decisions.
-    pub fork_decisions: AtomicU64,
-    /// Number of backtrack guidance calls.
-    pub backtrack_calls: AtomicU64,
-    /// Number of evaluate calls.
-    pub evaluate_calls: AtomicU64,
-    /// Number of correct decisions (based on feedback).
-    pub correct_decisions: AtomicU64,
-    /// Number of incorrect decisions (based on feedback).
-    pub incorrect_decisions: AtomicU64,
-    /// Sum of confidence values stored as u64 bits (for atomic ops).
-    /// We store the sum scaled by 1,000,000 to maintain precision.
-    pub confidence_sum_scaled: AtomicU64,
-    /// Number of confidence samples.
-    pub confidence_count: AtomicU64,
-    /// Number of LLM calls made by Pilot.
-    pub llm_calls: AtomicU64,
-    /// Number of times Pilot intervened.
-    pub interventions: AtomicU64,
-    /// Number of times Pilot skipped intervention (algorithm was confident).
-    pub skipped_interventions: AtomicU64,
-    /// Number of budget exhausted events.
-    pub budget_exhausted: AtomicU64,
-    /// Number of fallback to algorithm.
-    pub algorithm_fallbacks: AtomicU64,
-}
-
-impl PilotMetrics {
-    /// Create new Pilot metrics.
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Record a Pilot decision.
-    pub fn record_decision(
-        &self,
-        confidence: f64,
-        point: InterventionPoint,
-        config: &PilotMetricsConfig,
-    ) {
-        if !config.track_decisions {
-            return;
-        }
-
-        self.total_decisions.fetch_add(1, Ordering::Relaxed);
-
-        match point {
-            InterventionPoint::Start => {
-                self.start_guidance_calls.fetch_add(1, Ordering::Relaxed);
-            }
-            InterventionPoint::Fork | InterventionPoint::Prune => {
-                self.fork_decisions.fetch_add(1, Ordering::Relaxed);
-            }
-            InterventionPoint::Backtrack => {
-                self.backtrack_calls.fetch_add(1, Ordering::Relaxed);
-            }
-            InterventionPoint::Evaluate => {
-                self.evaluate_calls.fetch_add(1, Ordering::Relaxed);
-            }
-        }
-
-        // Update average confidence (store as scaled integer for atomic operations)
-        let scaled_confidence = (confidence * 1_000_000.0) as u64;
-        self.confidence_sum_scaled
-            .fetch_add(scaled_confidence, Ordering::Relaxed);
-        self.confidence_count.fetch_add(1, Ordering::Relaxed);
-    }
-
-    /// Record feedback on a decision.
-    pub fn record_feedback(&self, was_correct: bool, config: &PilotMetricsConfig) {
-        if !config.track_feedback {
-            return;
-        }
-
-        if was_correct {
-            self.correct_decisions.fetch_add(1, Ordering::Relaxed);
-        } else {
-            self.incorrect_decisions.fetch_add(1, Ordering::Relaxed);
-        }
-    }
-
-    /// Record an LLM call made by Pilot.
-    pub fn record_llm_call(&self) {
-        self.llm_calls.fetch_add(1, Ordering::Relaxed);
-    }
-
-    /// Record an intervention.
-    pub fn record_intervention(&self) {
-        self.interventions.fetch_add(1, Ordering::Relaxed);
-    }
-
-    /// Record a skipped intervention.
-    pub fn record_skipped_intervention(&self) {
-        self.skipped_interventions.fetch_add(1, Ordering::Relaxed);
-    }
-
-    /// Record budget exhausted.
-    pub fn record_budget_exhausted(&self) {
-        self.budget_exhausted.fetch_add(1, Ordering::Relaxed);
-    }
-
-    /// Record algorithm fallback.
-    pub fn record_algorithm_fallback(&self) {
-        self.algorithm_fallbacks.fetch_add(1, Ordering::Relaxed);
-    }
-
-    /// Reset all metrics.
-    pub fn reset(&self) {
-        self.total_decisions.store(0, Ordering::Relaxed);
-        self.start_guidance_calls.store(0, Ordering::Relaxed);
-        self.fork_decisions.store(0, Ordering::Relaxed);
-        self.backtrack_calls.store(0, Ordering::Relaxed);
-        self.evaluate_calls.store(0, Ordering::Relaxed);
-        self.correct_decisions.store(0, Ordering::Relaxed);
-        self.incorrect_decisions.store(0, Ordering::Relaxed);
-        self.confidence_sum_scaled.store(0, Ordering::Relaxed);
-        self.confidence_count.store(0, Ordering::Relaxed);
-        self.llm_calls.store(0, Ordering::Relaxed);
-        self.interventions.store(0, Ordering::Relaxed);
-        self.skipped_interventions.store(0, Ordering::Relaxed);
-        self.budget_exhausted.store(0, Ordering::Relaxed);
-        self.algorithm_fallbacks.store(0, Ordering::Relaxed);
-    }
-
-    /// Generate a report snapshot.
-    pub fn generate_report(&self) -> PilotMetricsReport {
-        let total_decisions = self.total_decisions.load(Ordering::Relaxed);
-        let correct = self.correct_decisions.load(Ordering::Relaxed);
-        let total_feedback = correct + self.incorrect_decisions.load(Ordering::Relaxed);
-        let confidence_count = self.confidence_count.load(Ordering::Relaxed);
-        let confidence_sum_scaled = self.confidence_sum_scaled.load(Ordering::Relaxed);
-
-        PilotMetricsReport {
-            total_decisions,
-            start_guidance_calls: self.start_guidance_calls.load(Ordering::Relaxed),
-            fork_decisions: self.fork_decisions.load(Ordering::Relaxed),
-            backtrack_calls: self.backtrack_calls.load(Ordering::Relaxed),
-            evaluate_calls: self.evaluate_calls.load(Ordering::Relaxed),
-            accuracy: if total_feedback > 0 {
-                correct as f64 / total_feedback as f64
-            } else {
-                0.0
-            },
-            correct_decisions: correct,
-            incorrect_decisions: self.incorrect_decisions.load(Ordering::Relaxed),
-            avg_confidence: if confidence_count > 0 {
-                (confidence_sum_scaled as f64 / 1_000_000.0) / confidence_count as f64
-            } else {
-                0.0
-            },
-            llm_calls: self.llm_calls.load(Ordering::Relaxed),
-            interventions: self.interventions.load(Ordering::Relaxed),
-            skipped_interventions: self.skipped_interventions.load(Ordering::Relaxed),
-            budget_exhausted: self.budget_exhausted.load(Ordering::Relaxed),
-            algorithm_fallbacks: self.algorithm_fallbacks.load(Ordering::Relaxed),
-        }
-    }
-}
-
-/// Pilot metrics report.
-#[derive(Debug, Clone)]
-pub struct PilotMetricsReport {
-    /// Total number of decisions.
-    pub total_decisions: u64,
-    /// Number of start guidance calls.
-    pub start_guidance_calls: u64,
-    /// Number of fork decisions.
-    pub fork_decisions: u64,
-    /// Number of backtrack calls.
-    pub backtrack_calls: u64,
-    /// Number of evaluate calls.
-    pub evaluate_calls: u64,
-    /// Decision accuracy based on feedback.
-    pub accuracy: f64,
-    /// Number of correct decisions.
-    pub correct_decisions: u64,
-    /// Number of incorrect decisions.
-    pub incorrect_decisions: u64,
-    /// Average confidence across all decisions.
-    pub avg_confidence: f64,
-    /// Number of LLM calls made by Pilot.
-    pub llm_calls: u64,
-    /// Number of interventions.
-    pub interventions: u64,
-    /// Number of skipped interventions.
-    pub skipped_interventions: u64,
-    /// Number of budget exhausted events.
-    pub budget_exhausted: u64,
-    /// Number of algorithm fallbacks.
-    pub algorithm_fallbacks: u64,
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_pilot_metrics_recording() {
-        let config = PilotMetricsConfig::default();
-        let metrics = PilotMetrics::new();
-
-        metrics.record_decision(0.9, InterventionPoint::Start, &config);
-        metrics.record_decision(0.8, InterventionPoint::Fork, &config);
-        metrics.record_decision(0.7, InterventionPoint::Fork, &config);
-
-        metrics.record_feedback(true, &config);
-        metrics.record_feedback(false, &config);
-
-        let report = metrics.generate_report();
-        assert_eq!(report.total_decisions, 3);
-        assert_eq!(report.fork_decisions, 2);
-        assert!((report.accuracy - 0.5).abs() < 0.01);
-        assert!((report.avg_confidence - 0.8).abs() < 0.01);
-    }
-}

From 3c44f37832872bb4d1007ba587e4d8fd642e5c9d Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Tue, 21 Apr 2026 00:41:23 +0800
Subject: [PATCH 2/5] feat(client): enable description generation by default

Set generate_description to true in IndexOptions default configuration
to enable automatic description generation for improved search
functionality.
---
 rust/src/client/types.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust/src/client/types.rs b/rust/src/client/types.rs
index 7da62176..8995f6f2 100644
--- a/rust/src/client/types.rs
+++ b/rust/src/client/types.rs
@@ -92,7 +92,7 @@ impl Default for IndexOptions {
             mode: IndexMode::Default,
             generate_summaries: true,
             generate_ids: true,
-            generate_description: false,
+            generate_description: true,
             enable_synonym_expansion: true,
             timeout_secs: None,
         }

From 70fc475b7e9d1a6662731468792e77985ff82bf8 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Tue, 21 Apr 2026 00:52:35 +0800
Subject: [PATCH 3/5] refactor(engine): remove cancellation mechanism and
 active operations tracking

- Remove CancelFlag and active_ops fields from Engine struct
- Delete all cancellation-related methods (cancel, reset_cancel, is_cancelled, check_cancel)
- Remove ActiveGuard struct and associated increment/decrement logic
- Eliminate cancellation checks from index, query, and related operations
- Update metrics_report documentation to reflect removed pilot decision metrics
- Clean up unused imports and dependencies
---
 rust/src/client/engine.rs | 130 +-------------------------------------
 1 file changed, 2 insertions(+), 128 deletions(-)

diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs
index e23d79d5..d32b4e6b 100644
--- a/rust/src/client/engine.rs
+++ b/rust/src/client/engine.rs
@@ -37,12 +37,7 @@
 //! # }
 //! ```
 
-use std::{
-    collections::HashMap,
-    sync::Arc,
-    sync::Mutex,
-    sync::atomic::{AtomicBool, Ordering},
-};
+use std::{collections::HashMap, sync::Arc};
 
 use futures::StreamExt;
 use tracing::info;
@@ -70,9 +65,6 @@ use super::{
     workspace::WorkspaceClient,
 };
 
-/// Shared cancel state: `true` means cancelled.
-type CancelFlag = Arc<AtomicBool>;
-
 /// The main Engine client.
 ///
 /// Provides high-level operations for document indexing and retrieval.
@@ -101,12 +93,6 @@ pub struct Engine {
 
     /// Central metrics hub for unified collection.
     metrics_hub: Arc<MetricsHub>,
-
-    /// Shared cancel flag — set by `cancel()`, checked by long-running operations.
-    cancelled: CancelFlag,
-
-    /// Active operation count so `cancel()` can wait for drain.
-    active_ops: Arc<Mutex<usize>>,
 }
 
 impl Engine {
@@ -142,8 +128,6 @@ impl Engine {
             retriever,
             workspace: workspace_client,
             metrics_hub,
-            cancelled: Arc::new(AtomicBool::new(false)),
-            active_ops: Arc::new(Mutex::new(0)),
         })
     }
 
@@ -160,12 +144,10 @@ impl Engine {
     /// Returns an [`IndexResult`] containing the indexed document metadata.
     #[tracing::instrument(skip_all, fields(sources = ctx.sources.len()))]
     pub async fn index(&self, ctx: IndexContext) -> Result<IndexResult> {
-        self.check_cancel()?;
         if ctx.is_empty() {
             return Err(Error::Config("No document sources provided".into()));
         }
 
-        let _guard = self.inc_active();
         let timeout_secs = ctx.options.timeout_secs;
 
         self.with_timeout(timeout_secs, async move {
@@ -252,16 +234,6 @@ impl Engine {
         options: &super::types::IndexOptions,
         name: Option<&str>,
     ) -> (Vec<IndexItem>, Vec<FailedItem>) {
-        if self.is_cancelled() {
-            return (
-                Vec::new(),
-                vec![FailedItem::new(
-                    source.to_string(),
-                    "Operation cancelled".to_string(),
-                )],
-            );
-        }
-
         let source_label = source.to_string();
 
         match self.resolve_index_action(source, options).await {
@@ -352,10 +324,6 @@ impl Engine {
         let max_attempts = retry.max_attempts;
 
         for attempt in 0..max_attempts {
-            if self.is_cancelled() {
-                return Err(Error::Config("Operation cancelled".into()));
-            }
-
             let result = if let Some(tree) = existing_tree {
                 self.indexer
                     .index_with_existing(source, name, pipeline_options.clone(), Some(tree))
@@ -445,8 +413,6 @@ impl Engine {
     /// (single document, multiple documents, or entire workspace).
     #[tracing::instrument(skip_all, fields(query = %ctx.query))]
     pub async fn query(&self, ctx: QueryContext) -> Result<QueryResult> {
-        self.check_cancel()?;
-        let _guard = self.inc_active();
         let timeout_secs = ctx.timeout_secs;
 
         self.with_timeout(timeout_secs, async move {
@@ -480,9 +446,6 @@ impl Engine {
     /// Events are translated from the agent's internal event stream
     /// into the public `RetrieveEventReceiver` stream.
     pub async fn query_stream(&self, ctx: QueryContext) -> Result<RetrieveEventReceiver> {
-        self.check_cancel()?;
-        let _guard = self.inc_active();
-
         let doc_ids = self.resolve_scope(&ctx.scope).await?;
         let query = ctx.query.clone();
 
@@ -844,32 +807,11 @@ impl Engine {
     /// Generate a complete metrics report.
     ///
     /// Returns a [`MetricsReport`](crate::metrics::MetricsReport) containing
-    /// LLM usage, pilot decision, and retrieval operation metrics.
+    /// LLM usage and retrieval operation metrics.
     pub fn metrics_report(&self) -> crate::metrics::MetricsReport {
         self.metrics_hub.generate_report()
     }
 
-    /// Cancel all in-flight `index()` and `query()` operations.
-    ///
-    /// After calling this, running operations will return at the next
-    /// convenient point with a cancellation error. New operations will
-    /// also fail until [`reset_cancel`](Self::reset_cancel) is called.
-    pub fn cancel(&self) {
-        self.cancelled.store(true, Ordering::Relaxed);
-        tracing::info!("Cancellation requested");
-    }
-
-    /// Reset the cancel flag so new operations can proceed.
-    pub fn reset_cancel(&self) {
-        self.cancelled.store(false, Ordering::Relaxed);
-        tracing::info!("Cancel flag reset");
-    }
-
-    /// Returns `true` if cancellation has been requested.
-    pub fn is_cancelled(&self) -> bool {
-        self.cancelled.load(Ordering::Relaxed)
-    }
-
     // ============================================================
     // Internal
     // ============================================================
@@ -907,23 +849,6 @@ impl Engine {
         Ok((documents, failed))
     }
 
-    /// Check cancel flag, returning an error if cancelled.
-    fn check_cancel(&self) -> Result<()> {
-        if self.cancelled.load(Ordering::Relaxed) {
-            return Err(Error::Config("Operation cancelled".into()));
-        }
-        Ok(())
-    }
-
-    /// Increment active operation counter. Returns a guard that decrements on drop.
-    fn inc_active(&self) -> ActiveGuard {
-        let mut ops = self.active_ops.lock().unwrap();
-        *ops += 1;
-        ActiveGuard {
-            active_ops: Arc::clone(&self.active_ops),
-        }
-    }
-
     /// Run a future with an optional timeout.
     /// If `timeout_secs` is `Some`, wraps the future in `tokio::time::timeout`.
     async fn with_timeout<F, T>(&self, timeout_secs: Option<u64>, fut: F) -> Result<T>
@@ -1135,24 +1060,10 @@ impl Clone for Engine {
             retriever: self.retriever.clone(),
             workspace: self.workspace.clone(),
             metrics_hub: Arc::clone(&self.metrics_hub),
-            cancelled: Arc::clone(&self.cancelled),
-            active_ops: Arc::clone(&self.active_ops),
         }
     }
 }
 
-/// RAII guard that decrements `active_ops` on drop.
-struct ActiveGuard {
-    active_ops: Arc<Mutex<usize>>,
-}
-
-impl Drop for ActiveGuard {
-    fn drop(&mut self) {
-        let mut ops = self.active_ops.lock().unwrap();
-        *ops = ops.saturating_sub(1);
-    }
-}
-
 impl std::fmt::Debug for Engine {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("Engine").finish_non_exhaustive()
@@ -1164,43 +1075,6 @@ mod tests {
     use super::*;
     use crate::client::types::IndexMode;
 
-    // ── Cancel ────────────────────────────────────────────────────────────
-
-    #[test]
-    fn test_cancel_flag() {
-        // We can't construct a full Engine without async + LLM, so test the
-        // underlying primitives directly.
-        let flag = Arc::new(AtomicBool::new(false));
-        assert!(!flag.load(Ordering::Relaxed));
-
-        flag.store(true, Ordering::Relaxed);
-        assert!(flag.load(Ordering::Relaxed));
-
-        flag.store(false, Ordering::Relaxed);
-        assert!(!flag.load(Ordering::Relaxed));
-    }
-
-    #[test]
-    fn test_active_guard_decrement() {
-        let active_ops: Arc<Mutex<usize>> = Arc::new(Mutex::new(0));
-
-        // Increment
-        {
-            let mut ops = active_ops.lock().unwrap();
-            *ops += 1;
-        }
-
-        assert_eq!(*active_ops.lock().unwrap(), 1);
-
-        // Drop guard (simulate ActiveGuard drop)
-        {
-            let mut ops = active_ops.lock().unwrap();
-            *ops = ops.saturating_sub(1);
-        }
-
-        assert_eq!(*active_ops.lock().unwrap(), 0);
-    }
-
     // ── resolve_index_action Default mode ──────────────────────────────────
 
     // We can't call resolve_index_action without a workspace, but we can

From 1daecbe8fd0c75729e9598fd60e8cf78fe7c666a Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Tue, 21 Apr 2026 01:17:08 +0800
Subject: [PATCH 4/5] docs(README): update Vectorless description to reflect
 agentic-based approach

Replace the old "reasoning-native" terminology with "agentic-based" to better
describe the document engine's architecture. Remove outdated bullet points
about reasoning principles that are no longer accurate.

feat(retrieval): refactor engine to use agent-based architecture

Change the retrieval engine from using search strategies (greedy DFS, beam
search, MCTS) to an agent-based system with Orchestrator coordinating
Workers. The new system uses LLM-guided navigation commands (ls, cd, cat,
find, grep) with progress evaluation and replanning capabilities while
maintaining budget awareness.
---
 README.md             | 8 +++-----
 rust/examples/flow.rs | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 847b0540..82c01c74 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 <img src="https://vectorless.dev/img/with-title.png" alt="Vectorless" width="400">
 
-<h1>Document Engine for AI</h1>
+<h1>Agentic-based Document Engine</h1>
 
 [![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/)
 [![PyPI Downloads](https://static.pepy.tech/badge/vectorless/month)](https://pepy.tech/projects/vectorless)
@@ -13,11 +13,9 @@
 
 </div>
 
-**Vectorless** is a reasoning-native document engine with the core written in Rust. It will reason through any of your structured documents — **PDFs, Markdown, reports, contracts** — and retrieve only what's relevant. Nothing more, nothing less.
+**Reason, don't vector.**
 
-- **Reason, don't vector.** — Retrieval is guided by reasoning over document structure.
-- **Model fails, we fail.** — No silent degradation. No heuristic fallbacks.
-- **No thought, no answer.** — Only LLM-reasoned output counts as an answer.
+**Vectorless** is an agentic-based document engine with the core written in Rust. It will reason through any of your structured documents — **PDFs, Markdown, reports, contracts** — and retrieve only what's relevant. Nothing more, nothing less.
 
 
 ## Quick Start
diff --git a/rust/examples/flow.rs b/rust/examples/flow.rs
index 759c0b2e..ce13d80b 100644
--- a/rust/examples/flow.rs
+++ b/rust/examples/flow.rs
@@ -42,7 +42,7 @@ The indexing pipeline processes documents through multiple stages: parsing, tree
 
 ### Retrieval Engine
 
-The retrieval engine supports multiple search strategies including greedy depth-first search, beam search, and MCTS. A Pilot component provides LLM-guided navigation at key decision points during tree traversal. The engine is budget-aware, tracking token usage and making cost-conscious decisions about when to invoke the LLM versus using cheaper heuristic scoring.
+The retrieval engine uses an agent-based architecture where an Orchestrator coordinates Workers that navigate the document tree using LLM-guided decisions (ls, cd, cat, find, grep). The Orchestrator evaluates progress after each step and can replan when results are insufficient. The engine is budget-aware, tracking token usage and making cost-conscious decisions about when to invoke the LLM versus using cheaper heuristic scoring.
 
 ## Performance
 

From f44fdbe61f4ea798f017eaf62054366286d91c81 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Tue, 21 Apr 2026 01:18:54 +0800
Subject: [PATCH 5/5] chore(release): bump workspace and package versions

- Update workspace package version from 0.1.30 to 0.1.31 in Cargo.toml
- Update vectorless package version from 0.1.9 to 0.1.10 in pyproject.toml
---
 Cargo.toml     | 2 +-
 pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 8e278a32..ea675029 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ members = ["rust", "python"]
 resolver = "2"
 
 [workspace.package]
-version = "0.1.30"
+version = "0.1.31"
 edition = "2024"
 authors = ["zTgx <beautifularea@gmail.com>"]
 license = "Apache-2.0"
diff --git a/pyproject.toml b/pyproject.toml
index 1d8d38ea..6d57600e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "vectorless"
-version = "0.1.9"
+version = "0.1.10"
 description = "Reasoning-native document intelligence engine for AI"
 readme = "README.md"
 requires-python = ">=3.9"