From a98e3134e4aeee0daef51f549245af9d055147b7 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 21 Apr 2026 00:31:31 +0800 Subject: [PATCH 1/5] feat(python): add timeout_secs option and remove pilot metrics from PyIndexOptions - Add timeout_secs parameter to PyIndexOptions constructor with optional u64 value - Implement timeout setting logic using with_timeout_secs method - Remove Pilot metrics report functionality from Python bindings - Remove PyPilotMetricsReport class registration from module - Update method documentation to reflect timeout functionality instead of max tokens BREAKING CHANGE: Removed pilot metrics reporting functionality --- docs/docs/sdk/python.mdx | 5 +- docs/docs/sdk/rust.mdx | 5 +- python/README.md | 5 +- python/src/context.rs | 18 +- python/src/lib.rs | 5 +- python/src/metrics.rs | 110 +---------- python/vectorless/cli/commands/query.py | 13 +- rust/src/client/query_context.rs | 33 ---- rust/src/config/mod.rs | 3 +- rust/src/config/types/llm_pool.rs | 26 +-- rust/src/config/types/metrics.rs | 31 --- rust/src/config/types/mod.rs | 4 +- rust/src/config/validator.rs | 7 - rust/src/lib.rs | 4 +- rust/src/llm/pool.rs | 12 -- rust/src/metrics/hub.rs | 84 +------- rust/src/metrics/mod.rs | 22 +-- rust/src/metrics/pilot.rs | 246 ------------------------ 18 files changed, 38 insertions(+), 595 deletions(-) delete mode 100644 rust/src/metrics/pilot.rs diff --git a/docs/docs/sdk/python.mdx b/docs/docs/sdk/python.mdx index bc3ae0ba..d26b6f4e 100644 --- a/docs/docs/sdk/python.mdx +++ b/docs/docs/sdk/python.mdx @@ -111,9 +111,8 @@ answer = await engine.query( answer = await engine.query( QueryContext("Explain the architecture") .with_doc_ids([doc_id]) - .with_max_tokens(4000) # Max tokens in result - .with_include_reasoning(True) # Include reasoning chain - .with_depth_limit(10) # Max traversal depth + .with_timeout_secs(60) # Per-operation timeout + .with_force_analysis(True) # Force Orchestrator analysis ) ``` diff --git a/docs/docs/sdk/rust.mdx b/docs/docs/sdk/rust.mdx index 1117302d..c9e3efc1 100644 --- a/docs/docs/sdk/rust.mdx +++ b/docs/docs/sdk/rust.mdx @@ -52,14 +52,11 @@ let result = engine.index( ```rust use vectorless::QueryContext; -use vectorless::StrategyPreference; let result = engine.query( QueryContext::new("What is the total revenue?") .with_doc_ids(vec![doc_id.to_string()]) - .with_strategy(StrategyPreference::ForceHybrid) - .with_max_tokens(4000) - .with_include_reasoning(true) + .with_timeout_secs(60) ).await?; if let Some(item) = result.single() { diff --git a/python/README.md b/python/README.md index 4ca5fa40..be4761d0 100644 --- a/python/README.md +++ b/python/README.md @@ -116,9 +116,8 @@ class QueryContext: def with_doc_ids(self, doc_ids: list[str]) -> QueryContext: ... def with_workspace(self) -> QueryContext: ... - def with_max_tokens(self, tokens: int) -> QueryContext: ... - def with_include_reasoning(self, include: bool) -> QueryContext: ... - def with_depth_limit(self, depth: int) -> QueryContext: ... + def with_timeout_secs(self, secs: int) -> QueryContext: ... + def with_force_analysis(self, force: bool) -> QueryContext: ... ``` ### IndexResult diff --git a/python/src/context.rs b/python/src/context.rs index bdf37c6c..2bf0ae94 100644 --- a/python/src/context.rs +++ b/python/src/context.rs @@ -44,13 +44,14 @@ pub struct PyIndexOptions { #[pymethods] impl PyIndexOptions { #[new] - #[pyo3(signature = (mode="default", generate_summaries=true, generate_description=false, generate_ids=true, enable_synonym_expansion=false))] + #[pyo3(signature = (mode="default", generate_summaries=true, generate_description=false, generate_ids=true, enable_synonym_expansion=true, timeout_secs=None))] fn new( mode: &str, generate_summaries: bool, generate_description: bool, generate_ids: bool, enable_synonym_expansion: bool, + timeout_secs: Option, ) -> PyResult { let mut opts = IndexOptions::new(); match mode { @@ -71,6 +72,9 @@ impl PyIndexOptions { opts.generate_description = generate_description; opts.generate_ids = generate_ids; opts.enable_synonym_expansion = enable_synonym_expansion; + if let Some(secs) = timeout_secs { + opts = opts.with_timeout_secs(secs); + } Ok(Self { inner: opts }) } @@ -260,9 +264,15 @@ impl PyQueryContext { Self { inner: ctx } } - /// Set the maximum tokens for the result content. - fn with_max_tokens(&self, tokens: usize) -> Self { - let ctx = self.inner.clone().with_max_tokens(tokens); + /// Set per-operation timeout in seconds. + fn with_timeout_secs(&self, secs: u64) -> Self { + let ctx = self.inner.clone().with_timeout_secs(secs); + Self { inner: ctx } + } + + /// Force the Orchestrator to analyze documents before dispatching Workers. + fn with_force_analysis(&self, force: bool) -> Self { + let ctx = self.inner.clone().with_force_analysis(force); Self { inner: ctx } } diff --git a/python/src/lib.rs b/python/src/lib.rs index 6a7eb913..a3951cd5 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -20,9 +20,7 @@ use document::PyDocumentInfo; use engine::PyEngine; use error::VectorlessError; use graph::{PyDocumentGraph, PyDocumentGraphNode, PyEdgeEvidence, PyGraphEdge, PyWeightedKeyword}; -use metrics::{ - PyLlmMetricsReport, PyMetricsReport, PyPilotMetricsReport, PyRetrievalMetricsReport, -}; +use metrics::{PyLlmMetricsReport, PyMetricsReport, PyRetrievalMetricsReport}; use results::{ PyEvidenceItem, PyFailedItem, PyIndexItem, PyIndexMetrics, PyIndexResult, PyQueryMetrics, PyQueryResult, PyQueryResultItem, @@ -59,7 +57,6 @@ fn _vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/python/src/metrics.rs b/python/src/metrics.rs index 27a71dcb..f194cd82 100644 --- a/python/src/metrics.rs +++ b/python/src/metrics.rs @@ -5,7 +5,7 @@ use pyo3::prelude::*; -use ::vectorless::{LlmMetricsReport, MetricsReport, PilotMetricsReport, RetrievalMetricsReport}; +use ::vectorless::{LlmMetricsReport, MetricsReport, RetrievalMetricsReport}; /// LLM usage metrics report. #[pyclass(name = "LlmMetricsReport")] @@ -101,106 +101,6 @@ impl PyLlmMetricsReport { } } -/// Pilot decision metrics report. -#[pyclass(name = "PilotMetricsReport")] -pub struct PyPilotMetricsReport { - pub(crate) inner: PilotMetricsReport, -} - -#[pymethods] -impl PyPilotMetricsReport { - /// Total number of Pilot decisions. - #[getter] - fn total_decisions(&self) -> u64 { - self.inner.total_decisions - } - - /// Number of start guidance calls. - #[getter] - fn start_guidance_calls(&self) -> u64 { - self.inner.start_guidance_calls - } - - /// Number of fork decisions. - #[getter] - fn fork_decisions(&self) -> u64 { - self.inner.fork_decisions - } - - /// Number of backtrack calls. - #[getter] - fn backtrack_calls(&self) -> u64 { - self.inner.backtrack_calls - } - - /// Number of evaluate calls. - #[getter] - fn evaluate_calls(&self) -> u64 { - self.inner.evaluate_calls - } - - /// Decision accuracy based on feedback (0.0 - 1.0). - #[getter] - fn accuracy(&self) -> f64 { - self.inner.accuracy - } - - /// Number of correct decisions. - #[getter] - fn correct_decisions(&self) -> u64 { - self.inner.correct_decisions - } - - /// Number of incorrect decisions. - #[getter] - fn incorrect_decisions(&self) -> u64 { - self.inner.incorrect_decisions - } - - /// Average confidence across all decisions. - #[getter] - fn avg_confidence(&self) -> f64 { - self.inner.avg_confidence - } - - /// Number of LLM calls made by Pilot. - #[getter] - fn llm_calls(&self) -> u64 { - self.inner.llm_calls - } - - /// Number of interventions. - #[getter] - fn interventions(&self) -> u64 { - self.inner.interventions - } - - /// Number of skipped interventions. - #[getter] - fn skipped_interventions(&self) -> u64 { - self.inner.skipped_interventions - } - - /// Number of budget exhausted events. - #[getter] - fn budget_exhausted(&self) -> u64 { - self.inner.budget_exhausted - } - - /// Number of algorithm fallbacks. - #[getter] - fn algorithm_fallbacks(&self) -> u64 { - self.inner.algorithm_fallbacks - } - - fn __repr__(&self) -> String { - format!( - "PilotMetricsReport(decisions={}, accuracy={:.2}, avg_confidence={:.2})", - self.inner.total_decisions, self.inner.accuracy, self.inner.avg_confidence, - ) - } -} - /// Retrieval operation metrics report. #[pyclass(name = "RetrievalMetricsReport")] pub struct PyRetrievalMetricsReport { @@ -337,14 +237,6 @@ impl PyMetricsReport { } } - /// Pilot metrics. - #[getter] - fn pilot(&self) -> PyPilotMetricsReport { - PyPilotMetricsReport { - inner: self.inner.pilot.clone(), - } - } - /// Retrieval metrics. #[getter] fn retrieval(&self) -> PyRetrievalMetricsReport { diff --git a/python/vectorless/cli/commands/query.py b/python/vectorless/cli/commands/query.py index f928ab46..79638666 100644 --- a/python/vectorless/cli/commands/query.py +++ b/python/vectorless/cli/commands/query.py @@ -12,7 +12,7 @@ def query_cmd( workspace_scope: bool = False, fmt: str = "text", verbose: bool = False, - max_tokens: Optional[int] = None, + timeout_secs: Optional[int] = None, ) -> None: """Execute a single query against indexed documents. @@ -22,19 +22,12 @@ def query_cmd( workspace_scope: Query across all documents. fmt: Output format — "text" or "json". verbose: Show Agent navigation steps. - max_tokens: Max result tokens. + timeout_secs: Per-operation timeout in seconds. Uses: Engine.query(QueryContext(question) .with_doc_ids([...]) or .with_workspace() - .with_max_tokens(n)) + .with_timeout_secs(n)) -> QueryResult - - Verbose mode prints Agent navigation: - [1/8] Bird's-eye: 3 top-level branches - [2/8] Descend → payment-configuration - [3/8] GetContent → doc 29139b - [4/8] Evaluate → sufficient - → Answer: ... """ raise NotImplementedError diff --git a/rust/src/client/query_context.rs b/rust/src/client/query_context.rs index e9513315..48d9ad2a 100644 --- a/rust/src/client/query_context.rs +++ b/rust/src/client/query_context.rs @@ -49,12 +49,6 @@ pub struct QueryContext { pub(crate) query: String, /// Target scope. pub(crate) scope: QueryScope, - /// Maximum tokens for the result content. - pub(crate) max_tokens: Option, - /// Whether to include the pilot reasoning chain in the result. - pub(crate) include_reasoning: bool, - /// Maximum tree traversal depth for the pilot. - pub(crate) depth_limit: Option, /// Per-operation timeout (seconds). `None` means no timeout. pub(crate) timeout_secs: Option, /// Force Orchestrator analysis even when documents are specified. @@ -72,9 +66,6 @@ impl QueryContext { Self { query: query.into(), scope: QueryScope::Workspace, - max_tokens: None, - include_reasoning: true, - depth_limit: None, timeout_secs: None, force_analysis: false, } @@ -95,24 +86,6 @@ impl QueryContext { self } - /// Set the maximum tokens for the result content. - pub fn with_max_tokens(mut self, tokens: usize) -> Self { - self.max_tokens = Some(tokens); - self - } - - /// Set whether to include the pilot reasoning chain. - pub fn with_include_reasoning(mut self, include: bool) -> Self { - self.include_reasoning = include; - self - } - - /// Set the maximum tree traversal depth for the pilot. - pub fn with_depth_limit(mut self, depth: usize) -> Self { - self.depth_limit = Some(depth); - self - } - /// Set per-operation timeout in seconds. pub fn with_timeout_secs(mut self, secs: u64) -> Self { self.timeout_secs = Some(secs); @@ -193,14 +166,8 @@ mod tests { fn test_builder_options() { let ctx = QueryContext::new("test") .with_doc_ids(vec!["doc-1".to_string()]) - .with_max_tokens(4000) - .with_include_reasoning(false) - .with_depth_limit(5) .with_timeout_secs(60); - assert_eq!(ctx.max_tokens, Some(4000)); - assert!(!ctx.include_reasoning); - assert_eq!(ctx.depth_limit, Some(5)); assert_eq!(ctx.timeout_secs, Some(60)); } diff --git a/rust/src/config/mod.rs b/rust/src/config/mod.rs index 26c73ac3..3ece0fe6 100644 --- a/rust/src/config/mod.rs +++ b/rust/src/config/mod.rs @@ -12,6 +12,5 @@ mod validator; pub use types::Config; pub(crate) use types::{ CompressionAlgorithm, FallbackBehavior, FallbackConfig, IndexerConfig, LlmConfig, - LlmMetricsConfig, MetricsConfig, OnAllFailedBehavior, PilotMetricsConfig, - RetrievalMetricsConfig, SlotConfig, + LlmMetricsConfig, MetricsConfig, OnAllFailedBehavior, RetrievalMetricsConfig, SlotConfig, }; diff --git a/rust/src/config/types/llm_pool.rs b/rust/src/config/types/llm_pool.rs index fc092a12..b38497aa 100644 --- a/rust/src/config/types/llm_pool.rs +++ b/rust/src/config/types/llm_pool.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; /// /// Contains: /// - Global credentials (`api_key`, `model`, `endpoint`) -/// - Per-purpose slot overrides (`index`, `retrieval`, `pilot`) +/// - Per-purpose slot overrides (`index`, `retrieval`) /// - Infrastructure settings (`retry`, `throttle`, `fallback`) /// /// # Simple usage (via EngineBuilder) @@ -71,11 +71,6 @@ pub struct LlmConfig { #[serde(default = "default_retrieval_slot")] pub retrieval: SlotConfig, - /// Pilot slot (navigation guidance). - /// Uses a fast model with higher token limit. - #[serde(default = "default_pilot_slot")] - pub pilot: SlotConfig, - /// Retry configuration for LLM calls. #[serde(default)] pub retry: RetryConfig, @@ -96,13 +91,6 @@ fn default_retrieval_slot() -> SlotConfig { } } -fn default_pilot_slot() -> SlotConfig { - SlotConfig { - max_tokens: 300, - ..SlotConfig::default() - } -} - impl Default for LlmConfig { fn default() -> Self { Self { @@ -111,7 +99,6 @@ impl Default for LlmConfig { endpoint: None, index: SlotConfig::default(), retrieval: default_retrieval_slot(), - pilot: default_pilot_slot(), retry: RetryConfig::default(), throttle: ThrottleConfig::default(), fallback: FallbackConfig::default(), @@ -158,12 +145,6 @@ impl LlmConfig { self } - /// Set the pilot slot configuration. - pub fn with_pilot(mut self, slot: SlotConfig) -> Self { - self.pilot = slot; - self - } - /// Set the retry configuration. pub fn with_retry(mut self, retry: RetryConfig) -> Self { self.retry = retry; @@ -199,7 +180,7 @@ impl LlmConfig { /// Per-purpose LLM slot override. /// /// Controls model selection and generation parameters for a specific -/// LLM usage (index, retrieval, or pilot). +/// LLM usage (index or retrieval). /// /// - `model`: Override the default model (optional). /// - `max_tokens`: Maximum response tokens. @@ -591,10 +572,8 @@ mod tests { assert!(config.endpoint.is_none()); assert!(config.index.model.is_none()); assert!(config.retrieval.model.is_none()); - assert!(config.pilot.model.is_none()); assert_eq!(config.index.max_tokens, 200); assert_eq!(config.retrieval.max_tokens, 100); - assert_eq!(config.pilot.max_tokens, 300); } #[test] @@ -617,7 +596,6 @@ mod tests { assert_eq!(config.resolve_model(&config.index), "gpt-4o"); assert_eq!(config.resolve_model(&config.retrieval), "gpt-4o-mini"); - assert_eq!(config.resolve_model(&config.pilot), "gpt-4o"); } #[test] diff --git a/rust/src/config/types/metrics.rs b/rust/src/config/types/metrics.rs index 230686ac..c1f4e766 100644 --- a/rust/src/config/types/metrics.rs +++ b/rust/src/config/types/metrics.rs @@ -24,10 +24,6 @@ pub struct MetricsConfig { #[serde(default)] pub llm: LlmMetricsConfig, - /// Pilot metrics configuration. - #[serde(default)] - pub pilot: PilotMetricsConfig, - /// Retrieval metrics configuration. #[serde(default)] pub retrieval: RetrievalMetricsConfig, @@ -52,7 +48,6 @@ impl Default for MetricsConfig { storage_path: default_storage_path(), retention_days: default_retention_days(), llm: LlmMetricsConfig::default(), - pilot: PilotMetricsConfig::default(), retrieval: RetrievalMetricsConfig::default(), } } @@ -125,32 +120,6 @@ impl LlmMetricsConfig { } } -/// Pilot-specific metrics configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PilotMetricsConfig { - /// Track Pilot decisions. - #[serde(default = "default_true")] - pub track_decisions: bool, - - /// Track decision accuracy (requires feedback). - #[serde(default = "default_true")] - pub track_accuracy: bool, - - /// Track user feedback. - #[serde(default = "default_true")] - pub track_feedback: bool, -} - -impl Default for PilotMetricsConfig { - fn default() -> Self { - Self { - track_decisions: default_true(), - track_accuracy: default_true(), - track_feedback: default_true(), - } - } -} - /// Retrieval-specific metrics configuration. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RetrievalMetricsConfig { diff --git a/rust/src/config/types/mod.rs b/rust/src/config/types/mod.rs index b4421110..e6ba3f8b 100644 --- a/rust/src/config/types/mod.rs +++ b/rust/src/config/types/mod.rs @@ -15,9 +15,7 @@ pub(crate) use indexer::IndexerConfig; pub(crate) use llm_pool::{ FallbackBehavior, FallbackConfig, LlmConfig, OnAllFailedBehavior, SlotConfig, }; -pub(crate) use metrics::{ - LlmMetricsConfig, MetricsConfig, PilotMetricsConfig, RetrievalMetricsConfig, -}; +pub(crate) use metrics::{LlmMetricsConfig, MetricsConfig, RetrievalMetricsConfig}; pub(crate) use retrieval::RetrievalConfig; pub(crate) use storage::{CompressionAlgorithm, StorageConfig}; diff --git a/rust/src/config/validator.rs b/rust/src/config/validator.rs index 5d08c89a..c3f55422 100644 --- a/rust/src/config/validator.rs +++ b/rust/src/config/validator.rs @@ -97,13 +97,6 @@ impl ValidationRule for RangeValidator { )); } - if config.llm.pilot.max_tokens == 0 { - errors.push(ValidationError::error( - "llm.pilot.max_tokens", - "Pilot max tokens must be greater than 0", - )); - } - // Retrieval ranges if config.retrieval.top_k == 0 { errors.push(ValidationError::error( diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 9ddc9b35..e9aa745c 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -83,9 +83,7 @@ pub use graph::{DocumentGraph, DocumentGraphNode, EdgeEvidence, GraphEdge, Weigh pub use events::{EventEmitter, IndexEvent, QueryEvent, WorkspaceEvent}; // Metrics -pub use metrics::{ - IndexMetrics, LlmMetricsReport, MetricsReport, PilotMetricsReport, RetrievalMetricsReport, -}; +pub use metrics::{IndexMetrics, LlmMetricsReport, MetricsReport, RetrievalMetricsReport}; // Errors pub use error::{Error, Result}; diff --git a/rust/src/llm/pool.rs b/rust/src/llm/pool.rs index 09bb4acd..9acef8ca 100644 --- a/rust/src/llm/pool.rs +++ b/rust/src/llm/pool.rs @@ -39,7 +39,6 @@ use crate::metrics::MetricsHub; pub struct LlmPool { index: Arc, retrieval: Arc, - pilot: Arc, } impl LlmPool { @@ -103,7 +102,6 @@ impl LlmPool { Self { index: build_client(&config.index), retrieval: build_client(&config.retrieval), - pilot: build_client(&config.pilot), } } @@ -121,11 +119,6 @@ impl LlmPool { pub fn retrieval(&self) -> &LlmClient { &self.retrieval } - - /// Get the pilot client. - pub fn pilot(&self) -> &LlmClient { - &self.pilot - } } impl Default for LlmPool { @@ -149,7 +142,6 @@ mod tests { assert_eq!(pool.index().config().model, "gpt-4o-mini"); assert_eq!(pool.retrieval().config().model, "gpt-4o"); - assert_eq!(pool.pilot().config().model, "gpt-4o"); assert_eq!(pool.index().config().max_tokens, 100); } @@ -162,15 +154,11 @@ mod tests { let hub = MetricsHub::shared(); let pool = LlmPool::from_config(&config, Some(hub.clone())); - // Verify each client has fallback (which means executor was built correctly) assert!(pool.index().fallback().is_some()); assert!(pool.retrieval().fallback().is_some()); - assert!(pool.pilot().fallback().is_some()); - // Verify models are resolved correctly assert_eq!(pool.index().config().model, "gpt-4o"); assert_eq!(pool.retrieval().config().model, "gpt-4o"); - assert_eq!(pool.pilot().config().model, "gpt-4o"); } #[test] diff --git a/rust/src/metrics/hub.rs b/rust/src/metrics/hub.rs index c00471cc..c4f70fe4 100644 --- a/rust/src/metrics/hub.rs +++ b/rust/src/metrics/hub.rs @@ -6,7 +6,6 @@ use std::sync::Arc; use super::llm::{LlmMetrics, LlmMetricsReport}; -use super::pilot::{InterventionPoint, PilotMetrics, PilotMetricsReport}; use super::retrieval::{RetrievalMetrics, RetrievalMetricsReport}; use crate::config::MetricsConfig; @@ -14,7 +13,6 @@ use crate::config::MetricsConfig; /// /// Provides a single point for all metrics collection across: /// - LLM operations (tokens, latency, cost) -/// - Pilot decisions (accuracy, confidence, feedback) /// - Retrieval operations (paths, scores, cache) /// /// # Thread Safety @@ -24,7 +22,7 @@ use crate::config::MetricsConfig; /// # Example /// /// ```rust -/// use vectorless::metrics::{MetricsHub, MetricsConfig, InterventionPoint}; +/// use vectorless::metrics::{MetricsHub, MetricsConfig}; /// /// let config = MetricsConfig::default(); /// let hub = MetricsHub::new(config); @@ -32,9 +30,6 @@ use crate::config::MetricsConfig; /// // Record LLM call /// hub.record_llm_call(100, 50, 150, true); /// -/// // Record Pilot decision -/// hub.record_pilot_decision(0.85, InterventionPoint::Fork); -/// /// // Get report /// let report = hub.generate_report(); /// ``` @@ -42,7 +37,6 @@ use crate::config::MetricsConfig; pub struct MetricsHub { config: MetricsConfig, llm: LlmMetrics, - pilot: PilotMetrics, retrieval: RetrievalMetrics, } @@ -52,7 +46,6 @@ impl MetricsHub { Self { config, llm: LlmMetrics::new(), - pilot: PilotMetrics::new(), retrieval: RetrievalMetrics::new(), } } @@ -132,67 +125,6 @@ impl MetricsHub { self.llm.generate_report() } - // ======================================================================== - // Pilot Metrics - // ======================================================================== - - /// Record a Pilot decision. - pub fn record_pilot_decision(&self, confidence: f64, point: InterventionPoint) { - if !self.config.enabled || !self.config.pilot.track_decisions { - return; - } - self.pilot - .record_decision(confidence, point, &self.config.pilot); - } - - /// Record feedback on a Pilot decision. - pub fn record_pilot_feedback(&self, was_correct: bool) { - if !self.config.enabled || !self.config.pilot.track_feedback { - return; - } - self.pilot.record_feedback(was_correct, &self.config.pilot); - } - - /// Record a Pilot LLM call. - pub fn record_pilot_llm_call(&self) { - if self.config.enabled { - self.pilot.record_llm_call(); - } - } - - /// Record a Pilot intervention. - pub fn record_pilot_intervention(&self) { - if self.config.enabled { - self.pilot.record_intervention(); - } - } - - /// Record a skipped Pilot intervention. - pub fn record_pilot_intervention_skipped(&self) { - if self.config.enabled { - self.pilot.record_skipped_intervention(); - } - } - - /// Record Pilot budget exhausted. - pub fn record_pilot_budget_exhausted(&self) { - if self.config.enabled { - self.pilot.record_budget_exhausted(); - } - } - - /// Record Pilot fallback to algorithm. - pub fn record_pilot_algorithm_fallback(&self) { - if self.config.enabled { - self.pilot.record_algorithm_fallback(); - } - } - - /// Get Pilot metrics report. - pub fn pilot_report(&self) -> PilotMetricsReport { - self.pilot.generate_report() - } - // ======================================================================== // Retrieval Metrics // ======================================================================== @@ -261,7 +193,6 @@ impl MetricsHub { /// Reset all metrics. pub fn reset(&self) { self.llm.reset(); - self.pilot.reset(); self.retrieval.reset(); } @@ -269,7 +200,6 @@ impl MetricsHub { pub fn generate_report(&self) -> MetricsReport { MetricsReport { llm: self.llm_report(), - pilot: self.pilot_report(), retrieval: self.retrieval_report(), } } @@ -286,8 +216,6 @@ impl Default for MetricsHub { pub struct MetricsReport { /// LLM metrics. pub llm: LlmMetricsReport, - /// Pilot metrics. - pub pilot: PilotMetricsReport, /// Retrieval metrics. pub retrieval: RetrievalMetricsReport, } @@ -307,15 +235,12 @@ mod tests { fn test_metrics_hub_recording() { let hub = MetricsHub::with_defaults(); - // Record various metrics hub.record_llm_call(100, 50, 150, true); - hub.record_pilot_decision(0.9, InterventionPoint::Fork); hub.record_retrieval_query(5, 10, 100); let report = hub.generate_report(); assert_eq!(report.llm.total_calls, 1); - assert_eq!(report.pilot.total_decisions, 1); assert_eq!(report.retrieval.total_queries, 1); } @@ -325,12 +250,10 @@ mod tests { let hub = MetricsHub::new(config); hub.record_llm_call(100, 50, 150, true); - hub.record_pilot_decision(0.9, InterventionPoint::Fork); let report = hub.generate_report(); assert_eq!(report.llm.total_calls, 0); - assert_eq!(report.pilot.total_decisions, 0); } #[test] @@ -348,11 +271,8 @@ mod tests { fn test_llm_metrics_success_and_failure() { let hub = MetricsHub::with_defaults(); - // Record successes hub.record_llm_call(100, 50, 150, true); hub.record_llm_call(200, 100, 300, true); - - // Record failure hub.record_llm_call(0, 0, 50, false); let report = hub.llm_report(); @@ -383,7 +303,6 @@ mod tests { fn test_shared_arc_metrics() { let hub = MetricsHub::shared(); - // Clone the Arc — both references point to the same hub let hub2 = hub.clone(); hub.record_llm_call(100, 50, 100, true); hub2.record_llm_call(200, 100, 200, true); @@ -400,7 +319,6 @@ mod tests { hub.record_llm_call(1000, 500, 200, true); let report = hub.generate_report(); - // Cost should be positive (exact value depends on config pricing) assert!(report.total_cost_usd() >= 0.0); } } diff --git a/rust/src/metrics/mod.rs b/rust/src/metrics/mod.rs index bc724988..26ab6411 100644 --- a/rust/src/metrics/mod.rs +++ b/rust/src/metrics/mod.rs @@ -5,7 +5,6 @@ //! //! This module provides centralized metrics collection across all components: //! - **LLM Metrics** — Token usage, latency, cost -//! - **Pilot Metrics** — Decisions, accuracy, feedback //! - **Retrieval Metrics** — Paths, scores, iterations, cache //! //! # Architecture @@ -14,13 +13,13 @@ //! ┌─────────────────────────────────────────────────────────────────┐ //! │ MetricsHub │ //! │ │ -//! │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ -//! │ │ LlmMetrics │ │PilotMetrics │ │RetrievalMetrics│ │ -//! │ │ │ │ │ │ │ │ -//! │ │ - tokens │ │ - decisions │ │ - paths │ │ -//! │ │ - latency │ │ - accuracy │ │ - scores │ │ -//! │ │ - cost │ │ - feedback │ │ - cache │ │ -//! │ └─────────────┘ └─────────────┘ └─────────────┘ │ +//! │ ┌─────────────┐ ┌──────────────────┐ │ +//! │ │ LlmMetrics │ │RetrievalMetrics │ │ +//! │ │ │ │ │ │ +//! │ │ - tokens │ │ - paths │ │ +//! │ │ - latency │ │ - scores │ │ +//! │ │ - cost │ │ - cache │ │ +//! │ └─────────────┘ └──────────────────┘ │ //! │ │ //! │ ┌─────────────────────────────────────────────────────────┐ │ //! │ │ MetricsReport │ │ @@ -33,7 +32,7 @@ //! # Example //! //! ```rust -//! use vectorless::metrics::{MetricsHub, MetricsConfig, InterventionPoint}; +//! use vectorless::metrics::{MetricsHub, MetricsConfig}; //! //! let config = MetricsConfig::default(); //! let hub = MetricsHub::new(config); @@ -41,9 +40,6 @@ //! // Record LLM call //! hub.record_llm_call(100, 50, 150, true); //! -//! // Record Pilot decision -//! hub.record_pilot_decision(0.85, InterventionPoint::Fork); -//! //! // Generate report //! let report = hub.generate_report(); //! println!("Total cost: ${:.4}", report.llm.estimated_cost_usd); @@ -52,11 +48,9 @@ mod hub; mod index; mod llm; -mod pilot; mod retrieval; pub use hub::{MetricsHub, MetricsReport}; pub use index::IndexMetrics; pub use llm::LlmMetricsReport; -pub use pilot::PilotMetricsReport; pub use retrieval::RetrievalMetricsReport; diff --git a/rust/src/metrics/pilot.rs b/rust/src/metrics/pilot.rs deleted file mode 100644 index f8365e45..00000000 --- a/rust/src/metrics/pilot.rs +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright (c) 2026 vectorless developers -// SPDX-License-Identifier: Apache-2.0 - -//! Pilot metrics collection. - -use std::sync::atomic::{AtomicU64, Ordering}; - -use crate::config::PilotMetricsConfig; - -/// Intervention point type. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum InterventionPoint { - /// At search start. - Start, - /// At a fork (multiple candidates). - Fork, - /// During backtracking. - Backtrack, - /// Evaluating content sufficiency. - Evaluate, - /// Binary pruning for wide nodes. - Prune, -} - -/// Pilot metrics tracker. -#[derive(Debug, Default)] -pub struct PilotMetrics { - /// Total number of Pilot decisions. - pub total_decisions: AtomicU64, - /// Number of start guidance calls. - pub start_guidance_calls: AtomicU64, - /// Number of fork decisions. - pub fork_decisions: AtomicU64, - /// Number of backtrack guidance calls. - pub backtrack_calls: AtomicU64, - /// Number of evaluate calls. - pub evaluate_calls: AtomicU64, - /// Number of correct decisions (based on feedback). - pub correct_decisions: AtomicU64, - /// Number of incorrect decisions (based on feedback). - pub incorrect_decisions: AtomicU64, - /// Sum of confidence values stored as u64 bits (for atomic ops). - /// We store the sum scaled by 1,000,000 to maintain precision. - pub confidence_sum_scaled: AtomicU64, - /// Number of confidence samples. - pub confidence_count: AtomicU64, - /// Number of LLM calls made by Pilot. - pub llm_calls: AtomicU64, - /// Number of times Pilot intervened. - pub interventions: AtomicU64, - /// Number of times Pilot skipped intervention (algorithm was confident). - pub skipped_interventions: AtomicU64, - /// Number of budget exhausted events. - pub budget_exhausted: AtomicU64, - /// Number of fallback to algorithm. - pub algorithm_fallbacks: AtomicU64, -} - -impl PilotMetrics { - /// Create new Pilot metrics. - pub fn new() -> Self { - Self::default() - } - - /// Record a Pilot decision. - pub fn record_decision( - &self, - confidence: f64, - point: InterventionPoint, - config: &PilotMetricsConfig, - ) { - if !config.track_decisions { - return; - } - - self.total_decisions.fetch_add(1, Ordering::Relaxed); - - match point { - InterventionPoint::Start => { - self.start_guidance_calls.fetch_add(1, Ordering::Relaxed); - } - InterventionPoint::Fork | InterventionPoint::Prune => { - self.fork_decisions.fetch_add(1, Ordering::Relaxed); - } - InterventionPoint::Backtrack => { - self.backtrack_calls.fetch_add(1, Ordering::Relaxed); - } - InterventionPoint::Evaluate => { - self.evaluate_calls.fetch_add(1, Ordering::Relaxed); - } - } - - // Update average confidence (store as scaled integer for atomic operations) - let scaled_confidence = (confidence * 1_000_000.0) as u64; - self.confidence_sum_scaled - .fetch_add(scaled_confidence, Ordering::Relaxed); - self.confidence_count.fetch_add(1, Ordering::Relaxed); - } - - /// Record feedback on a decision. - pub fn record_feedback(&self, was_correct: bool, config: &PilotMetricsConfig) { - if !config.track_feedback { - return; - } - - if was_correct { - self.correct_decisions.fetch_add(1, Ordering::Relaxed); - } else { - self.incorrect_decisions.fetch_add(1, Ordering::Relaxed); - } - } - - /// Record an LLM call made by Pilot. - pub fn record_llm_call(&self) { - self.llm_calls.fetch_add(1, Ordering::Relaxed); - } - - /// Record an intervention. - pub fn record_intervention(&self) { - self.interventions.fetch_add(1, Ordering::Relaxed); - } - - /// Record a skipped intervention. - pub fn record_skipped_intervention(&self) { - self.skipped_interventions.fetch_add(1, Ordering::Relaxed); - } - - /// Record budget exhausted. - pub fn record_budget_exhausted(&self) { - self.budget_exhausted.fetch_add(1, Ordering::Relaxed); - } - - /// Record algorithm fallback. - pub fn record_algorithm_fallback(&self) { - self.algorithm_fallbacks.fetch_add(1, Ordering::Relaxed); - } - - /// Reset all metrics. - pub fn reset(&self) { - self.total_decisions.store(0, Ordering::Relaxed); - self.start_guidance_calls.store(0, Ordering::Relaxed); - self.fork_decisions.store(0, Ordering::Relaxed); - self.backtrack_calls.store(0, Ordering::Relaxed); - self.evaluate_calls.store(0, Ordering::Relaxed); - self.correct_decisions.store(0, Ordering::Relaxed); - self.incorrect_decisions.store(0, Ordering::Relaxed); - self.confidence_sum_scaled.store(0, Ordering::Relaxed); - self.confidence_count.store(0, Ordering::Relaxed); - self.llm_calls.store(0, Ordering::Relaxed); - self.interventions.store(0, Ordering::Relaxed); - self.skipped_interventions.store(0, Ordering::Relaxed); - self.budget_exhausted.store(0, Ordering::Relaxed); - self.algorithm_fallbacks.store(0, Ordering::Relaxed); - } - - /// Generate a report snapshot. - pub fn generate_report(&self) -> PilotMetricsReport { - let total_decisions = self.total_decisions.load(Ordering::Relaxed); - let correct = self.correct_decisions.load(Ordering::Relaxed); - let total_feedback = correct + self.incorrect_decisions.load(Ordering::Relaxed); - let confidence_count = self.confidence_count.load(Ordering::Relaxed); - let confidence_sum_scaled = self.confidence_sum_scaled.load(Ordering::Relaxed); - - PilotMetricsReport { - total_decisions, - start_guidance_calls: self.start_guidance_calls.load(Ordering::Relaxed), - fork_decisions: self.fork_decisions.load(Ordering::Relaxed), - backtrack_calls: self.backtrack_calls.load(Ordering::Relaxed), - evaluate_calls: self.evaluate_calls.load(Ordering::Relaxed), - accuracy: if total_feedback > 0 { - correct as f64 / total_feedback as f64 - } else { - 0.0 - }, - correct_decisions: correct, - incorrect_decisions: self.incorrect_decisions.load(Ordering::Relaxed), - avg_confidence: if confidence_count > 0 { - (confidence_sum_scaled as f64 / 1_000_000.0) / confidence_count as f64 - } else { - 0.0 - }, - llm_calls: self.llm_calls.load(Ordering::Relaxed), - interventions: self.interventions.load(Ordering::Relaxed), - skipped_interventions: self.skipped_interventions.load(Ordering::Relaxed), - budget_exhausted: self.budget_exhausted.load(Ordering::Relaxed), - algorithm_fallbacks: self.algorithm_fallbacks.load(Ordering::Relaxed), - } - } -} - -/// Pilot metrics report. -#[derive(Debug, Clone)] -pub struct PilotMetricsReport { - /// Total number of decisions. - pub total_decisions: u64, - /// Number of start guidance calls. - pub start_guidance_calls: u64, - /// Number of fork decisions. - pub fork_decisions: u64, - /// Number of backtrack calls. - pub backtrack_calls: u64, - /// Number of evaluate calls. - pub evaluate_calls: u64, - /// Decision accuracy based on feedback. - pub accuracy: f64, - /// Number of correct decisions. - pub correct_decisions: u64, - /// Number of incorrect decisions. - pub incorrect_decisions: u64, - /// Average confidence across all decisions. - pub avg_confidence: f64, - /// Number of LLM calls made by Pilot. - pub llm_calls: u64, - /// Number of interventions. - pub interventions: u64, - /// Number of skipped interventions. - pub skipped_interventions: u64, - /// Number of budget exhausted events. - pub budget_exhausted: u64, - /// Number of algorithm fallbacks. - pub algorithm_fallbacks: u64, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_pilot_metrics_recording() { - let config = PilotMetricsConfig::default(); - let metrics = PilotMetrics::new(); - - metrics.record_decision(0.9, InterventionPoint::Start, &config); - metrics.record_decision(0.8, InterventionPoint::Fork, &config); - metrics.record_decision(0.7, InterventionPoint::Fork, &config); - - metrics.record_feedback(true, &config); - metrics.record_feedback(false, &config); - - let report = metrics.generate_report(); - assert_eq!(report.total_decisions, 3); - assert_eq!(report.fork_decisions, 2); - assert!((report.accuracy - 0.5).abs() < 0.01); - assert!((report.avg_confidence - 0.8).abs() < 0.01); - } -} From 3c44f37832872bb4d1007ba587e4d8fd642e5c9d Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 21 Apr 2026 00:41:23 +0800 Subject: [PATCH 2/5] feat(client): enable description generation by default Set generate_description to true in IndexOptions default configuration to enable automatic description generation for improved search functionality. --- rust/src/client/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/src/client/types.rs b/rust/src/client/types.rs index 7da62176..8995f6f2 100644 --- a/rust/src/client/types.rs +++ b/rust/src/client/types.rs @@ -92,7 +92,7 @@ impl Default for IndexOptions { mode: IndexMode::Default, generate_summaries: true, generate_ids: true, - generate_description: false, + generate_description: true, enable_synonym_expansion: true, timeout_secs: None, } From 70fc475b7e9d1a6662731468792e77985ff82bf8 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 21 Apr 2026 00:52:35 +0800 Subject: [PATCH 3/5] refactor(engine): remove cancellation mechanism and active operations tracking - Remove CancelFlag and active_ops fields from Engine struct - Delete all cancellation-related methods (cancel, reset_cancel, is_cancelled, check_cancel) - Remove ActiveGuard struct and associated increment/decrement logic - Eliminate cancellation checks from index, query, and related operations - Update metrics_report documentation to reflect removed pilot decision metrics - Clean up unused imports and dependencies --- rust/src/client/engine.rs | 130 +------------------------------------- 1 file changed, 2 insertions(+), 128 deletions(-) diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index e23d79d5..d32b4e6b 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -37,12 +37,7 @@ //! # } //! ``` -use std::{ - collections::HashMap, - sync::Arc, - sync::Mutex, - sync::atomic::{AtomicBool, Ordering}, -}; +use std::{collections::HashMap, sync::Arc}; use futures::StreamExt; use tracing::info; @@ -70,9 +65,6 @@ use super::{ workspace::WorkspaceClient, }; -/// Shared cancel state: `true` means cancelled. -type CancelFlag = Arc; - /// The main Engine client. /// /// Provides high-level operations for document indexing and retrieval. @@ -101,12 +93,6 @@ pub struct Engine { /// Central metrics hub for unified collection. metrics_hub: Arc, - - /// Shared cancel flag — set by `cancel()`, checked by long-running operations. - cancelled: CancelFlag, - - /// Active operation count so `cancel()` can wait for drain. - active_ops: Arc>, } impl Engine { @@ -142,8 +128,6 @@ impl Engine { retriever, workspace: workspace_client, metrics_hub, - cancelled: Arc::new(AtomicBool::new(false)), - active_ops: Arc::new(Mutex::new(0)), }) } @@ -160,12 +144,10 @@ impl Engine { /// Returns an [`IndexResult`] containing the indexed document metadata. #[tracing::instrument(skip_all, fields(sources = ctx.sources.len()))] pub async fn index(&self, ctx: IndexContext) -> Result { - self.check_cancel()?; if ctx.is_empty() { return Err(Error::Config("No document sources provided".into())); } - let _guard = self.inc_active(); let timeout_secs = ctx.options.timeout_secs; self.with_timeout(timeout_secs, async move { @@ -252,16 +234,6 @@ impl Engine { options: &super::types::IndexOptions, name: Option<&str>, ) -> (Vec, Vec) { - if self.is_cancelled() { - return ( - Vec::new(), - vec![FailedItem::new( - source.to_string(), - "Operation cancelled".to_string(), - )], - ); - } - let source_label = source.to_string(); match self.resolve_index_action(source, options).await { @@ -352,10 +324,6 @@ impl Engine { let max_attempts = retry.max_attempts; for attempt in 0..max_attempts { - if self.is_cancelled() { - return Err(Error::Config("Operation cancelled".into())); - } - let result = if let Some(tree) = existing_tree { self.indexer .index_with_existing(source, name, pipeline_options.clone(), Some(tree)) @@ -445,8 +413,6 @@ impl Engine { /// (single document, multiple documents, or entire workspace). #[tracing::instrument(skip_all, fields(query = %ctx.query))] pub async fn query(&self, ctx: QueryContext) -> Result { - self.check_cancel()?; - let _guard = self.inc_active(); let timeout_secs = ctx.timeout_secs; self.with_timeout(timeout_secs, async move { @@ -480,9 +446,6 @@ impl Engine { /// Events are translated from the agent's internal event stream /// into the public `RetrieveEventReceiver` stream. pub async fn query_stream(&self, ctx: QueryContext) -> Result { - self.check_cancel()?; - let _guard = self.inc_active(); - let doc_ids = self.resolve_scope(&ctx.scope).await?; let query = ctx.query.clone(); @@ -844,32 +807,11 @@ impl Engine { /// Generate a complete metrics report. /// /// Returns a [`MetricsReport`](crate::metrics::MetricsReport) containing - /// LLM usage, pilot decision, and retrieval operation metrics. + /// LLM usage and retrieval operation metrics. pub fn metrics_report(&self) -> crate::metrics::MetricsReport { self.metrics_hub.generate_report() } - /// Cancel all in-flight `index()` and `query()` operations. - /// - /// After calling this, running operations will return at the next - /// convenient point with a cancellation error. New operations will - /// also fail until [`reset_cancel`](Self::reset_cancel) is called. - pub fn cancel(&self) { - self.cancelled.store(true, Ordering::Relaxed); - tracing::info!("Cancellation requested"); - } - - /// Reset the cancel flag so new operations can proceed. - pub fn reset_cancel(&self) { - self.cancelled.store(false, Ordering::Relaxed); - tracing::info!("Cancel flag reset"); - } - - /// Returns `true` if cancellation has been requested. - pub fn is_cancelled(&self) -> bool { - self.cancelled.load(Ordering::Relaxed) - } - // ============================================================ // Internal // ============================================================ @@ -907,23 +849,6 @@ impl Engine { Ok((documents, failed)) } - /// Check cancel flag, returning an error if cancelled. - fn check_cancel(&self) -> Result<()> { - if self.cancelled.load(Ordering::Relaxed) { - return Err(Error::Config("Operation cancelled".into())); - } - Ok(()) - } - - /// Increment active operation counter. Returns a guard that decrements on drop. - fn inc_active(&self) -> ActiveGuard { - let mut ops = self.active_ops.lock().unwrap(); - *ops += 1; - ActiveGuard { - active_ops: Arc::clone(&self.active_ops), - } - } - /// Run a future with an optional timeout. /// If `timeout_secs` is `Some`, wraps the future in `tokio::time::timeout`. async fn with_timeout(&self, timeout_secs: Option, fut: F) -> Result @@ -1135,24 +1060,10 @@ impl Clone for Engine { retriever: self.retriever.clone(), workspace: self.workspace.clone(), metrics_hub: Arc::clone(&self.metrics_hub), - cancelled: Arc::clone(&self.cancelled), - active_ops: Arc::clone(&self.active_ops), } } } -/// RAII guard that decrements `active_ops` on drop. -struct ActiveGuard { - active_ops: Arc>, -} - -impl Drop for ActiveGuard { - fn drop(&mut self) { - let mut ops = self.active_ops.lock().unwrap(); - *ops = ops.saturating_sub(1); - } -} - impl std::fmt::Debug for Engine { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Engine").finish_non_exhaustive() @@ -1164,43 +1075,6 @@ mod tests { use super::*; use crate::client::types::IndexMode; - // ── Cancel ──────────────────────────────────────────────────────────── - - #[test] - fn test_cancel_flag() { - // We can't construct a full Engine without async + LLM, so test the - // underlying primitives directly. - let flag = Arc::new(AtomicBool::new(false)); - assert!(!flag.load(Ordering::Relaxed)); - - flag.store(true, Ordering::Relaxed); - assert!(flag.load(Ordering::Relaxed)); - - flag.store(false, Ordering::Relaxed); - assert!(!flag.load(Ordering::Relaxed)); - } - - #[test] - fn test_active_guard_decrement() { - let active_ops: Arc> = Arc::new(Mutex::new(0)); - - // Increment - { - let mut ops = active_ops.lock().unwrap(); - *ops += 1; - } - - assert_eq!(*active_ops.lock().unwrap(), 1); - - // Drop guard (simulate ActiveGuard drop) - { - let mut ops = active_ops.lock().unwrap(); - *ops = ops.saturating_sub(1); - } - - assert_eq!(*active_ops.lock().unwrap(), 0); - } - // ── resolve_index_action Default mode ────────────────────────────────── // We can't call resolve_index_action without a workspace, but we can From 1daecbe8fd0c75729e9598fd60e8cf78fe7c666a Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 21 Apr 2026 01:17:08 +0800 Subject: [PATCH 4/5] docs(README): update Vectorless description to reflect agentic-based approach Replace the old "reasoning-native" terminology with "agentic-based" to better describe the document engine's architecture. Remove outdated bullet points about reasoning principles that are no longer accurate. feat(retrieval): refactor engine to use agent-based architecture Change the retrieval engine from using search strategies (greedy DFS, beam search, MCTS) to an agent-based system with Orchestrator coordinating Workers. The new system uses LLM-guided navigation commands (ls, cd, cat, find, grep) with progress evaluation and replanning capabilities while maintaining budget awareness. --- README.md | 8 +++----- rust/examples/flow.rs | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 847b0540..82c01c74 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Vectorless -

Document Engine for AI

+

Agentic-based Document Engine

[![PyPI](https://img.shields.io/pypi/v/vectorless.svg)](https://pypi.org/project/vectorless/) [![PyPI Downloads](https://static.pepy.tech/badge/vectorless/month)](https://pepy.tech/projects/vectorless) @@ -13,11 +13,9 @@ -**Vectorless** is a reasoning-native document engine with the core written in Rust. It will reason through any of your structured documents — **PDFs, Markdown, reports, contracts** — and retrieve only what's relevant. Nothing more, nothing less. +**Reason, don't vector.** -- **Reason, don't vector.** — Retrieval is guided by reasoning over document structure. -- **Model fails, we fail.** — No silent degradation. No heuristic fallbacks. -- **No thought, no answer.** — Only LLM-reasoned output counts as an answer. +**Vectorless** is an agentic-based document engine with the core written in Rust. It will reason through any of your structured documents — **PDFs, Markdown, reports, contracts** — and retrieve only what's relevant. Nothing more, nothing less. ## Quick Start diff --git a/rust/examples/flow.rs b/rust/examples/flow.rs index 759c0b2e..ce13d80b 100644 --- a/rust/examples/flow.rs +++ b/rust/examples/flow.rs @@ -42,7 +42,7 @@ The indexing pipeline processes documents through multiple stages: parsing, tree ### Retrieval Engine -The retrieval engine supports multiple search strategies including greedy depth-first search, beam search, and MCTS. A Pilot component provides LLM-guided navigation at key decision points during tree traversal. The engine is budget-aware, tracking token usage and making cost-conscious decisions about when to invoke the LLM versus using cheaper heuristic scoring. +The retrieval engine uses an agent-based architecture where an Orchestrator coordinates Workers that navigate the document tree using LLM-guided decisions (ls, cd, cat, find, grep). The Orchestrator evaluates progress after each step and can replan when results are insufficient. The engine is budget-aware, tracking token usage and making cost-conscious decisions about when to invoke the LLM versus using cheaper heuristic scoring. ## Performance From f44fdbe61f4ea798f017eaf62054366286d91c81 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Tue, 21 Apr 2026 01:18:54 +0800 Subject: [PATCH 5/5] chore(release): bump workspace and package versions - Update workspace package version from 0.1.30 to 0.1.31 in Cargo.toml - Update vectorless package version from 0.1.9 to 0.1.10 in pyproject.toml --- Cargo.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8e278a32..ea675029 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["rust", "python"] resolver = "2" [workspace.package] -version = "0.1.30" +version = "0.1.31" edition = "2024" authors = ["zTgx "] license = "Apache-2.0" diff --git a/pyproject.toml b/pyproject.toml index 1d8d38ea..6d57600e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "vectorless" -version = "0.1.9" +version = "0.1.10" description = "Reasoning-native document intelligence engine for AI" readme = "README.md" requires-python = ">=3.9"