From a17fab42b673d8763b5aa90f6827123b2c677921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Thu, 7 May 2026 23:32:45 +0200 Subject: [PATCH 1/5] WIP: new view_documents tool and search in documents Using "transmutation" with support for converting a whole range of document types to markdown including PDF and DOCX. --- Cargo.lock | 903 +++++++++++++++++- crates/code_assistant/Cargo.toml | 6 + .../assets/icons/file_generic.svg | 7 +- .../code_assistant/assets/icons/todo_list.svg | 5 + crates/code_assistant/src/agent/runner.rs | 4 +- .../code_assistant/src/tools/core/registry.rs | 5 +- .../src/tools/impls/document_search.rs | 195 ++++ crates/code_assistant/src/tools/impls/mod.rs | 4 + .../src/tools/impls/search_files.rs | 300 ++++-- .../src/tools/impls/view_documents.rs | 499 ++++++++++ .../code_assistant/src/ui/gpui/file_icons.rs | 6 +- .../src/ui/gpui/tool_block_renderers.rs | 4 + 12 files changed, 1822 insertions(+), 116 deletions(-) create mode 100644 crates/code_assistant/assets/icons/todo_list.svg create mode 100644 crates/code_assistant/src/tools/impls/document_search.rs create mode 100644 crates/code_assistant/src/tools/impls/view_documents.rs diff --git a/Cargo.lock b/Cargo.lock index 655026fa..3773bcd6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,12 +2,37 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ab_glyph" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01c0457472c38ea5bd1c3b5ada5e368271cb550be7a4ca4a0b4634e9913f6cc2" +dependencies = [ + "ab_glyph_rasterizer", + "owned_ttf_parser", +] + +[[package]] +name = "ab_glyph_rasterizer" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "366ffbaa4442f4684d91e2cd7c5ea7c4ed8add41959a31447066e279e432b618" + [[package]] name = "adler2" version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "adobe-cmap-parser" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8abfa9a4688de8fc9f42b3f013b6fffec18ed8a554f5f113577e0b9b3212a3" +dependencies = [ + "pom", +] + [[package]] name = "aes" version = "0.8.4" @@ -201,6 +226,15 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "ar_archive_writer" version = "0.5.1" @@ -215,6 +249,9 @@ name = "arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] [[package]] name = "arboard" @@ -939,6 +976,20 @@ dependencies = [ "profiling", ] +[[package]] +name = "blake3" +version = "1.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq 0.4.2", + "cpufeatures 0.3.0", +] + [[package]] name = "block" version = "0.1.6" @@ -1016,6 +1067,12 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "bytemuck" version = "1.25.0" @@ -1057,6 +1114,15 @@ dependencies = [ "serde", ] +[[package]] +name = "bzip2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" +dependencies = [ + "libbz2-rs-sys", +] + [[package]] name = "calloop" version = "0.13.0" @@ -1083,6 +1149,15 @@ dependencies = [ "wayland-client", ] +[[package]] +name = "caseless" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6fd507454086c8edfd769ca6ada439193cdb209c7681712ef6275cccbfe5d8" +dependencies = [ + "unicode-normalization", +] + [[package]] name = "cassowary" version = "0.3.0" @@ -1146,6 +1221,17 @@ dependencies = [ "nom 7.1.3", ] +[[package]] +name = "cfb" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a4f8e55be323b378facfcf1f06aa97f6ec17cf4ac84fb17325093aaf62da41" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -1291,6 +1377,7 @@ dependencies = [ "anstyle", "clap_lex", "strsim", + "terminal_size", ] [[package]] @@ -1446,6 +1533,7 @@ dependencies = [ "tokio-util", "tracing", "tracing-subscriber", + "transmutation", "tui-markdown", "unicode-segmentation", "unicode-width 0.1.14", @@ -1529,6 +1617,27 @@ version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +[[package]] +name = "comrak" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8c32ff8b21372fab0e9ecc4e42536055702dc5faa418362bffd1544f9d12637" +dependencies = [ + "caseless", + "clap", + "derive_builder", + "entities", + "memchr", + "once_cell", + "regex", + "shell-words", + "slug", + "syntect", + "typed-arena", + "unicode_categories", + "xdg", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1564,6 +1673,18 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "content_inspector" version = "0.2.4" @@ -1773,6 +1894,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" + [[package]] name = "crc32fast" version = "1.5.0" @@ -1880,6 +2016,19 @@ dependencies = [ "smallvec", ] +[[package]] +name = "cssparser" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf 0.11.3", + "smallvec", +] + [[package]] name = "cssparser-macros" version = "0.6.1" @@ -2019,6 +2168,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "derive_builder" version = "0.20.2" @@ -2086,6 +2246,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + [[package]] name = "diff" version = "0.1.13" @@ -2192,6 +2358,27 @@ dependencies = [ "libloading", ] +[[package]] +name = "doc-comment" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "780955b8b195a21ab8e4ac6b60dd1dbdcec1dc6c51c0617964b08c81785e12c9" + +[[package]] +name = "docx-rs" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed73cbf5e1c37baa23f4132569ac1187829f03922c206bd68fe109e3001a343d" +dependencies = [ + "base64 0.22.1", + "image", + "quick-xml 0.36.2", + "serde", + "serde_json", + "thiserror 2.0.18", + "zip 0.6.6", +] + [[package]] name = "dotenv" version = "0.15.0" @@ -2264,6 +2451,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642" +[[package]] +name = "ego-tree" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c6ba7d4eec39eaa9ab24d44a0e73a7949a1095a8b3f3abb11eddf27dbb56a53" + [[package]] name = "either" version = "1.15.0" @@ -2305,6 +2498,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" +[[package]] +name = "entities" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" + [[package]] name = "enum-iterator" version = "2.3.0" @@ -2421,10 +2620,19 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc89bf99e5dc15954a60f707c1e09d7540e5cd9af85fa75caa0b510bc08c5342" dependencies = [ - "euclid", + "euclid 0.22.14", "svg_fmt", ] +[[package]] +name = "euclid" +version = "0.20.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad" +dependencies = [ + "num-traits", +] + [[package]] name = "euclid" version = "0.22.14" @@ -2487,6 +2695,28 @@ dependencies = [ "zune-inflate", ] +[[package]] +name = "fancy-regex" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" +dependencies = [ + "bit-set", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "fancy-regex" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f" +dependencies = [ + "bit-set", + "regex-automata", + "regex-syntax", +] + [[package]] name = "fastrand" version = "1.9.0" @@ -2542,6 +2772,12 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "file-format" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ef3d5e8ae27277c8285ac43ed153158178ef0f79567f32024ca8140a0c7cd8" + [[package]] name = "filedescriptor" version = "0.8.3" @@ -2578,6 +2814,7 @@ checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", + "zlib-rs", ] [[package]] @@ -3605,6 +3842,33 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "html5ever" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" +dependencies = [ + "log", + "mac", + "markup5ever 0.14.1", + "match_token", +] + +[[package]] +name = "html_parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f56db07b6612644f6f7719f8ef944f75fff9d6378fdf3d316fd32194184abd" +dependencies = [ + "doc-comment", + "pest", + "pest_derive", + "serde", + "serde_derive", + "serde_json", + "thiserror 1.0.69", +] + [[package]] name = "http" version = "0.2.12" @@ -3980,12 +4244,36 @@ dependencies = [ "quick-error", ] +[[package]] +name = "imageproc" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "602b4e8a4cc3e98372b766cd184ab532999bc0e839b7469e759511ccabc65d77" +dependencies = [ + "ab_glyph", + "approx", + "getrandom 0.2.17", + "image", + "itertools 0.12.1", + "nalgebra", + "num", + "rand 0.8.5", + "rand_distr", + "rayon", +] + [[package]] name = "imagesize" version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edcd27d72f2f071c64249075f42e205ff93c9a4c5f6c6da53e79ed9f9832c285" +[[package]] +name = "imagesize" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09e54e57b4c48b40f7aec75635392b12b3421fa26fe8b4332e63138ed278459c" + [[package]] name = "imgref" version = "1.12.0" @@ -4148,6 +4436,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -4260,7 +4557,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c62026ae44756f8a599ba21140f350303d4f08dcdcc71b5ad9c9bb8128c13c62" dependencies = [ "arrayvec", - "euclid", + "euclid 0.22.14", "smallvec", ] @@ -4309,6 +4606,12 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8" +[[package]] +name = "libbz2-rs-sys" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f" + [[package]] name = "libc" version = "0.2.184" @@ -4475,6 +4778,48 @@ dependencies = [ "imgref", ] +[[package]] +name = "lopdf" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5c8ecfc6c72051981c0459f75ccc585e7ff67c70829560cda8e647882a9abff" +dependencies = [ + "encoding_rs", + "flate2", + "indexmap", + "itoa", + "log", + "md-5", + "nom 7.1.3", + "rangemap", + "time", + "weezl", +] + +[[package]] +name = "lopdf" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7c1d3350d071cb86987a6bcb205c7019a0eb70dcad92b454fec722cca8d68b" +dependencies = [ + "aes", + "cbc", + "chrono", + "encoding_rs", + "flate2", + "indexmap", + "itoa", + "log", + "md-5", + "nom 7.1.3", + "nom_locate", + "rangemap", + "rayon", + "thiserror 2.0.18", + "time", + "weezl", +] + [[package]] name = "lru" version = "0.12.5" @@ -4530,7 +4875,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4336502e29e32af93cf2dad2214ed6003c17ceb5bd499df77b1de663b9042b92" dependencies = [ "arrayvec", - "euclid", + "euclid 0.22.14", "num-traits", ] @@ -4555,6 +4900,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "lzma-rust2" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c60a23ffb90d527e23192f1246b14746e2f7f071cb84476dd879071696c18a4a" +dependencies = [ + "crc", + "sha2 0.10.9", +] + [[package]] name = "mac" version = "0.1.1" @@ -4617,8 +4972,22 @@ dependencies = [ ] [[package]] -name = "markup5ever_rcdom" -version = "0.3.0" +name = "markup5ever" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" +dependencies = [ + "log", + "phf 0.11.3", + "phf_codegen 0.11.3", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18" dependencies = [ @@ -4628,6 +4997,17 @@ dependencies = [ "xml5ever", ] +[[package]] +name = "match_token" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "matchers" version = "0.2.0" @@ -4643,6 +5023,16 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "matrixmultiply" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +dependencies = [ + "autocfg", + "rawpointer", +] + [[package]] name = "maybe-rayon" version = "0.1.1" @@ -4811,6 +5201,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "nalgebra" +version = "0.32.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5c17de023a86f59ed79891b2e5d5a94c705dbe904a5b5c9c952ea6221b03e4" +dependencies = [ + "approx", + "matrixmultiply", + "num-complex", + "num-rational", + "num-traits", + "simba", + "typenum", +] + [[package]] name = "nanorand" version = "0.7.0" @@ -4907,6 +5312,17 @@ dependencies = [ "memchr", ] +[[package]] +name = "nom_locate" +version = "4.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3" +dependencies = [ + "bytecount", + "memchr", + "nom 7.1.3", +] + [[package]] name = "noop_proc_macro" version = "0.3.0" @@ -5423,6 +5839,15 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "owned_ttf_parser" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36820e9051aca1014ddc75770aab4d68bc1e9e632f0f5627c4086bc216fb583b" +dependencies = [ + "ttf-parser 0.25.1", +] + [[package]] name = "parking" version = "2.2.1" @@ -5505,12 +5930,70 @@ dependencies = [ "hmac", ] +[[package]] +name = "pdf-extract" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbb3a5387b94b9053c1e69d8abfd4dd6dae7afda65a5c5279bc1f42ab39df575" +dependencies = [ + "adobe-cmap-parser", + "encoding_rs", + "euclid 0.20.14", + "lopdf 0.34.0", + "postscript", + "type1-encoding-parser", + "unicode-normalization", +] + [[package]] name = "percent-encoding" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pest" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "pest_meta" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" +dependencies = [ + "pest", + "sha2 0.10.9", +] + [[package]] name = "phf" version = "0.10.1" @@ -5737,6 +6220,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5da3b0203fd7ee5720aa0b5e790b591aa5d3f41c3ed2c34a3a393382198af2f7" +[[package]] +name = "pom" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6" + [[package]] name = "postage" version = "0.5.0" @@ -5754,6 +6243,12 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "postscript" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78451badbdaebaf17f053fd9152b3ffb33b516104eacb45e7864aaa9c712f306" + [[package]] name = "potential_utf" version = "0.1.5" @@ -5769,6 +6264,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppmd-rust" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efca4c95a19a79d1c98f791f10aebd5c1363b473244630bb7dbde1dc98455a24" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -5932,6 +6433,26 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" +dependencies = [ + "encoding_rs", + "memchr", +] + +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quick-xml" version = "0.38.4" @@ -6095,6 +6616,16 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rangemap" version = "1.7.1" @@ -6189,6 +6720,12 @@ dependencies = [ "raw-window-handle", ] +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + [[package]] name = "rayon" version = "1.11.0" @@ -6757,6 +7294,15 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "safe_arch" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b02de82ddbe1b636e6170c21be622223aea188ef2e139be0a5b219ec215323" +dependencies = [ + "bytemuck", +] + [[package]] name = "same-file" version = "1.0.6" @@ -6829,12 +7375,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "585480e3719b311b78a573db1c9d9c4c1f8010c2dee4cc59c2efe58ea4dbc3e1" dependencies = [ "ahash", - "cssparser", - "ego-tree", + "cssparser 0.31.2", + "ego-tree 0.6.3", "getopts", "html5ever 0.26.0", "once_cell", - "selectors", + "selectors 0.25.0", + "tendril", +] + +[[package]] +name = "scraper" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0e749d29b2064585327af5038a5a8eb73aeebad4a3472e83531a436563f7208" +dependencies = [ + "ahash", + "cssparser 0.34.0", + "ego-tree 0.9.0", + "getopts", + "html5ever 0.29.1", + "precomputed-hash", + "selectors 0.26.0", "tendril", ] @@ -6939,7 +7501,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" dependencies = [ "bitflags 2.11.0", - "cssparser", + "cssparser 0.31.2", "derive_more 0.99.20", "fxhash", "log", @@ -6947,7 +7509,26 @@ dependencies = [ "phf 0.10.1", "phf_codegen 0.10.0", "precomputed-hash", - "servo_arc", + "servo_arc 0.3.0", + "smallvec", +] + +[[package]] +name = "selectors" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" +dependencies = [ + "bitflags 2.11.0", + "cssparser 0.34.0", + "derive_more 0.99.20", + "fxhash", + "log", + "new_debug_unreachable", + "phf 0.11.3", + "phf_codegen 0.11.3", + "precomputed-hash", + "servo_arc 0.4.3", "smallvec", ] @@ -7114,6 +7695,15 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "servo_arc" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.10.6" @@ -7162,6 +7752,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shell-words" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77" + [[package]] name = "shlex" version = "1.3.0" @@ -7199,6 +7795,19 @@ dependencies = [ "libc", ] +[[package]] +name = "simba" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", + "wide", +] + [[package]] name = "simd-adler32" version = "0.3.9" @@ -7272,6 +7881,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "slug" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882a80f72ee45de3cc9a5afeb2da0331d58df69e4e7d8eeb5d3c7784ae67e724" +dependencies = [ + "deunicode", + "wasm-bindgen", +] + [[package]] name = "smallvec" version = "1.15.1" @@ -7668,6 +8287,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "656b45c05d95a5704399aeef6bd0ddec7b2b3531b7c9e900abbf7c4d2190c925" dependencies = [ "bincode", + "fancy-regex 0.16.2", "flate2", "fnv", "once_cell", @@ -7832,6 +8452,16 @@ dependencies = [ "terminal_view", ] +[[package]] +name = "terminal_size" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" +dependencies = [ + "rustix 1.1.4", + "windows-sys 0.61.2", +] + [[package]] name = "terminal_view" version = "0.1.0" @@ -7853,6 +8483,12 @@ dependencies = [ "unicode-width 0.2.1", ] +[[package]] +name = "thin-vec" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f7e269b48f0a7dd0146680fa24b50cc67fc0373f086a5b2f99bd084639b482" + [[package]] name = "thiserror" version = "1.0.69" @@ -7893,6 +8529,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "thousands" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" + [[package]] name = "thread_local" version = "1.1.9" @@ -8292,6 +8934,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.23" @@ -8302,12 +8954,56 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex-automata", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", +] + +[[package]] +name = "transmutation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "043e6abc35e333be2b66c04f28b38ef6a29097ab53ace0cc84e2a9c9f833a7a4" +dependencies = [ + "anyhow", + "async-trait", + "blake3", + "comrak", + "dirs 5.0.1", + "docx-rs", + "file-format", + "futures", + "html5ever 0.29.1", + "image", + "imageproc", + "lopdf 0.35.0", + "mime", + "mime_guess", + "num_cpus", + "once_cell", + "pdf-extract", + "pulldown-cmark", + "quick-xml 0.37.5", + "rayon", + "regex", + "scraper 0.21.0", + "serde", + "serde_json", + "sha2 0.10.9", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tracing", + "tracing-subscriber", + "umya-spreadsheet", + "walkdir", + "zip 6.0.0", ] [[package]] @@ -8432,6 +9128,21 @@ dependencies = [ "utf-8", ] +[[package]] +name = "type1-encoding-parser" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa10c302f5a53b7ad27fd42a3996e23d096ba39b5b8dd6d9e683a05b01bee749" +dependencies = [ + "pom", +] + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "typeid" version = "1.0.3" @@ -8444,6 +9155,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "uds_windows" version = "1.2.1" @@ -8455,6 +9172,35 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "umya-spreadsheet" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "408c7e039c96ec1d517a1111ade7fadab889f32c096dac691a1e3b8018c3e39a" +dependencies = [ + "aes", + "ahash", + "base64 0.22.1", + "byteorder", + "cbc", + "cfb", + "chrono", + "encoding_rs", + "fancy-regex 0.14.0", + "getrandom 0.2.17", + "hmac", + "html_parser", + "imagesize 0.14.0", + "lazy_static", + "md-5", + "quick-xml 0.37.5", + "regex", + "sha2 0.10.9", + "thin-vec", + "thousands", + "zip 2.4.2", +] + [[package]] name = "unicase" version = "2.9.0" @@ -8509,6 +9255,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-properties" version = "0.1.4" @@ -8562,6 +9317,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "unsafe-libyaml" version = "0.2.11" @@ -8603,7 +9364,7 @@ dependencies = [ "data-url", "flate2", "fontdb 0.23.0", - "imagesize", + "imagesize 0.13.0", "kurbo", "log", "pico-args", @@ -9011,7 +9772,7 @@ dependencies = [ "percent-encoding", "regex", "reqwest", - "scraper", + "scraper 0.18.1", "serde", "serde_json", "tempfile", @@ -9105,6 +9866,16 @@ dependencies = [ "winsafe", ] +[[package]] +name = "wide" +version = "0.7.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce5da8ecb62bcd8ec8b7ea19f69a51275e91299be594ea5cc6ef7819e16cd03" +dependencies = [ + "bytemuck", + "safe_arch", +] + [[package]] name = "winapi" version = "0.3.9" @@ -9902,6 +10673,12 @@ version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bec9e4a500ca8864c5b47b8b482a73d62e4237670e5b5f1d6b9e3cae50f28f2b" +[[package]] +name = "xdg" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546" + [[package]] name = "xdg-home" version = "1.3.0" @@ -10390,12 +11167,114 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "zip" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" +dependencies = [ + "byteorder", + "crc32fast", + "crossbeam-utils", + "flate2", +] + +[[package]] +name = "zip" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50" +dependencies = [ + "arbitrary", + "crc32fast", + "crossbeam-utils", + "displaydoc", + "flate2", + "indexmap", + "memchr", + "thiserror 2.0.18", + "zopfli", +] + +[[package]] +name = "zip" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +dependencies = [ + "aes", + "arbitrary", + "bzip2", + "constant_time_eq 0.3.1", + "crc32fast", + "deflate64", + "flate2", + "getrandom 0.3.4", + "hmac", + "indexmap", + "lzma-rust2", + "memchr", + "pbkdf2", + "ppmd-rust", + "sha1", + "time", + "zeroize", + "zopfli", + "zstd", +] + +[[package]] +name = "zlib-rs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" + [[package]] name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" +[[package]] +name = "zopfli" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zune-core" version = "0.5.1" diff --git a/crates/code_assistant/Cargo.toml b/crates/code_assistant/Cargo.toml index b25775ba..bd3d462d 100644 --- a/crates/code_assistant/Cargo.toml +++ b/crates/code_assistant/Cargo.toml @@ -3,6 +3,9 @@ name = "code-assistant" version = "0.2.7" edition = "2021" +[features] +default = ["document-conversion"] +document-conversion = ["transmutation"] [dependencies] command_executor = { path = "../command_executor" } @@ -86,6 +89,9 @@ base64 = "0.22" # Image processing image = "0.25" +# Document conversion (behind feature flag) +transmutation = { version = "0.3", features = ["office"], optional = true } + # Clipboard access for paste arboard = "3" diff --git a/crates/code_assistant/assets/icons/file_generic.svg b/crates/code_assistant/assets/icons/file_generic.svg index 3c72bd33..3b9f7d01 100644 --- a/crates/code_assistant/assets/icons/file_generic.svg +++ b/crates/code_assistant/assets/icons/file_generic.svg @@ -1,5 +1,4 @@ - - - - + + + diff --git a/crates/code_assistant/assets/icons/todo_list.svg b/crates/code_assistant/assets/icons/todo_list.svg new file mode 100644 index 00000000..826b57a3 --- /dev/null +++ b/crates/code_assistant/assets/icons/todo_list.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/crates/code_assistant/src/agent/runner.rs b/crates/code_assistant/src/agent/runner.rs index 884ced91..482b4f6b 100644 --- a/crates/code_assistant/src/agent/runner.rs +++ b/crates/code_assistant/src/agent/runner.rs @@ -1231,10 +1231,10 @@ impl Agent { guidance_section.push_str("\n\n# Repository Guidance\n"); for (file_name, content) in guidance_files { - guidance_section.push_str("\n"); + guidance_section.push('\n'); guidance_section.push_str(&format!("Loaded from `{file_name}`.\n\n")); guidance_section.push_str(&content); - guidance_section.push_str("\n"); + guidance_section.push('\n'); } system_message.push_str(&guidance_section); diff --git a/crates/code_assistant/src/tools/core/registry.rs b/crates/code_assistant/src/tools/core/registry.rs index d9e21a13..c740cad1 100644 --- a/crates/code_assistant/src/tools/core/registry.rs +++ b/crates/code_assistant/src/tools/core/registry.rs @@ -91,8 +91,8 @@ impl ToolRegistry { use crate::tools::impls::{ DeleteFilesTool, EditTool, ExecuteCommandTool, GlobFilesTool, ListFilesTool, ListProjectsTool, NameSessionTool, PerplexityAskTool, ReadFilesTool, ReplaceInFileTool, - SearchFilesTool, SpawnAgentTool, UpdatePlanTool, ViewImagesTool, WebFetchTool, - WebSearchTool, WriteFileTool, + SearchFilesTool, SpawnAgentTool, UpdatePlanTool, ViewDocumentsTool, ViewImagesTool, + WebFetchTool, WebSearchTool, WriteFileTool, }; // Register all tools - the ToolScope system will filter which ones are available @@ -109,6 +109,7 @@ impl ToolRegistry { self.register(Box::new(SearchFilesTool)); self.register(Box::new(SpawnAgentTool)); self.register(Box::new(UpdatePlanTool)); + self.register(Box::new(ViewDocumentsTool)); self.register(Box::new(ViewImagesTool)); self.register(Box::new(WebFetchTool)); self.register(Box::new(WebSearchTool)); diff --git a/crates/code_assistant/src/tools/impls/document_search.rs b/crates/code_assistant/src/tools/impls/document_search.rs new file mode 100644 index 00000000..57e142ce --- /dev/null +++ b/crates/code_assistant/src/tools/impls/document_search.rs @@ -0,0 +1,195 @@ +//! Document search support for search_files. +//! +//! When the `document-conversion` feature is enabled, this module walks the project +//! directory looking for supported document files (PDF, DOCX, XLSX, PPTX, ODT, RTF), +//! converts them to Markdown page-by-page using `transmutation`, and searches the +//! resulting text with the user's regex pattern. Matches are reported with page numbers. + +use super::search_files::DocumentMatchResult; +use regex::RegexBuilder; +use std::path::{Path, PathBuf}; +use transmutation::{Converter, OutputFormat}; +use walkdir::WalkDir; + +/// Document extensions we search within. +const DOCUMENT_EXTENSIONS: &[&str] = &["pdf", "docx", "xlsx", "pptx", "odt", "rtf"]; + +/// Maximum number of documents to search (to avoid huge delays). +const MAX_DOCUMENTS_TO_SEARCH: usize = 20; + +/// Maximum file size for documents to search (10 MB). +const MAX_SEARCH_DOCUMENT_SIZE: u64 = 10 * 1024 * 1024; + +/// Context characters around a match in the excerpt. +const EXCERPT_CONTEXT: usize = 80; + +/// Search for `regex_pattern` within document files under `root_dir`. +/// +/// If `paths` is provided, only documents under those directories are searched. +/// Returns a list of matches with page numbers and excerpts. +pub async fn search_in_documents( + root_dir: &Path, + regex_pattern: &str, + paths: Option<&[String]>, +) -> Vec { + // Build the regex (case-insensitive by default, matching search_files behavior) + let regex = match RegexBuilder::new(regex_pattern) + .case_insensitive(true) + .build() + { + Ok(r) => r, + Err(_) => return Vec::new(), + }; + + // Collect document files to search + let search_roots: Vec = if let Some(paths) = paths { + paths + .iter() + .map(|p| root_dir.join(p)) + .filter(|p| p.exists()) + .collect() + } else { + vec![root_dir.to_path_buf()] + }; + + let mut document_files: Vec = Vec::new(); + for search_root in &search_roots { + for entry in WalkDir::new(search_root) + .follow_links(false) + .into_iter() + .filter_map(|e| e.ok()) + { + if document_files.len() >= MAX_DOCUMENTS_TO_SEARCH { + break; + } + + let path = entry.path(); + if !path.is_file() { + continue; + } + + // Check extension + let ext = match path.extension().and_then(|e| e.to_str()) { + Some(e) => e.to_ascii_lowercase(), + None => continue, + }; + + if !DOCUMENT_EXTENSIONS.contains(&ext.as_str()) { + continue; + } + + // Skip large files + if let Ok(metadata) = path.metadata() { + if metadata.len() > MAX_SEARCH_DOCUMENT_SIZE { + continue; + } + } + + document_files.push(path.to_path_buf()); + } + } + + if document_files.is_empty() { + return Vec::new(); + } + + // Initialize converter once + let converter = match Converter::new() { + Ok(c) => c, + Err(_) => return Vec::new(), + }; + + let mut results: Vec = Vec::new(); + + for doc_path in &document_files { + let path_str = match doc_path.to_str() { + Some(s) => s, + None => continue, + }; + + let ext = doc_path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + .to_ascii_lowercase(); + + let format_name = match ext.as_str() { + "pdf" => "PDF", + "docx" => "DOCX", + "xlsx" => "XLSX", + "pptx" => "PPTX", + "odt" => "ODT", + "rtf" => "RTF", + _ => continue, + }; + + // Convert with split_pages so we can report page numbers + let conversion = converter + .convert(path_str) + .to(OutputFormat::Markdown { + split_pages: true, + optimize_for_llm: true, + }) + .execute() + .await; + + let conversion_result = match conversion { + Ok(r) => r, + Err(_) => continue, + }; + + // Search each page + for (page_idx, page_output) in conversion_result.content.iter().enumerate() { + let page_text = match String::from_utf8(page_output.data.clone()) { + Ok(t) => t, + Err(_) => continue, + }; + let matches: Vec<_> = regex.find_iter(&page_text).collect(); + + if matches.is_empty() { + continue; + } + + // Build excerpt around the first match + let first_match = &matches[0]; + let start = first_match.start().saturating_sub(EXCERPT_CONTEXT); + let end = (first_match.end() + EXCERPT_CONTEXT).min(page_text.len()); + + // Snap to char boundaries + let mut excerpt_start = start; + while excerpt_start < page_text.len() && !page_text.is_char_boundary(excerpt_start) { + excerpt_start += 1; + } + let mut excerpt_end = end; + while excerpt_end < page_text.len() && !page_text.is_char_boundary(excerpt_end) { + excerpt_end += 1; + } + + let mut excerpt = String::new(); + if excerpt_start > 0 { + excerpt.push_str("..."); + } + excerpt.push_str(&page_text[excerpt_start..excerpt_end]); + if excerpt_end < page_text.len() { + excerpt.push_str("..."); + } + + // Relative path for display + let rel_path = doc_path + .strip_prefix(root_dir) + .unwrap_or(doc_path) + .to_string_lossy() + .to_string(); + + results.push(DocumentMatchResult { + file: rel_path, + format: format_name.to_string(), + page: page_idx + 1, + excerpt, + match_count: matches.len(), + }); + } + } + + results +} diff --git a/crates/code_assistant/src/tools/impls/mod.rs b/crates/code_assistant/src/tools/impls/mod.rs index c2af93bf..84ac4036 100644 --- a/crates/code_assistant/src/tools/impls/mod.rs +++ b/crates/code_assistant/src/tools/impls/mod.rs @@ -1,5 +1,7 @@ // Tool implementations pub mod delete_files; +#[cfg(feature = "document-conversion")] +pub mod document_search; pub mod edit; pub mod execute_command; pub mod glob_files; @@ -12,6 +14,7 @@ pub mod replace_in_file; pub mod search_files; pub mod spawn_agent; pub mod update_plan; +pub mod view_documents; pub mod view_images; pub mod web_fetch; pub mod web_search; @@ -31,6 +34,7 @@ pub use replace_in_file::ReplaceInFileTool; pub use search_files::SearchFilesTool; pub use spawn_agent::SpawnAgentTool; pub use update_plan::UpdatePlanTool; +pub use view_documents::ViewDocumentsTool; pub use view_images::ViewImagesTool; pub use web_fetch::WebFetchTool; pub use web_search::WebSearchTool; diff --git a/crates/code_assistant/src/tools/impls/search_files.rs b/crates/code_assistant/src/tools/impls/search_files.rs index df0d28d9..45273034 100644 --- a/crates/code_assistant/src/tools/impls/search_files.rs +++ b/crates/code_assistant/src/tools/impls/search_files.rs @@ -29,6 +29,25 @@ pub struct SearchFilesOutput { pub truncated: bool, #[serde(default)] pub summary_mode: bool, + /// Results from searching within documents (PDF, DOCX, etc.) - only present when + /// the `document-conversion` feature is enabled. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub document_results: Vec, +} + +/// A match found inside a document file (PDF, DOCX, etc.). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DocumentMatchResult { + /// Path to the document file (relative to project root). + pub file: String, + /// Document format (e.g. "PDF", "DOCX"). + pub format: String, + /// Page number where the match was found (1-indexed). + pub page: usize, + /// A text excerpt around the match. + pub excerpt: String, + /// Number of matches on this page. + pub match_count: usize, } impl SearchFilesOutput { @@ -104,145 +123,168 @@ impl SearchFilesOutput { // Render implementation for output formatting impl Render for SearchFilesOutput { fn status(&self) -> String { + let doc_matches: usize = self.document_results.iter().map(|r| r.match_count).sum(); + let total = self.results.len() + doc_matches; if self.truncated { format!( "Found {} matches (showing {}) for '{}'", - self.total_matches, - self.results.len(), + self.total_matches + doc_matches, + total, self.regex ) } else { - format!("Found {} matches for '{}'", self.results.len(), self.regex) + format!("Found {} matches for '{}'", total, self.regex) } } fn render(&self, _tracker: &mut ResourcesTracker) -> String { - if self.results.is_empty() { + if self.results.is_empty() && self.document_results.is_empty() { return format!("No matches found for '{}'", self.regex); } let mut formatted = String::new(); // Header with match count and mode information + let doc_matches: usize = self.document_results.iter().map(|r| r.match_count).sum(); + let total_display = self.total_matches + doc_matches; if self.truncated { formatted.push_str(&format!( "Found {} matches for '{}' (showing top {} results):\n", - self.total_matches, + total_display, self.regex, self.results.len() )); - } else { + } else if !self.results.is_empty() { formatted.push_str(&format!( "Found {} matches for '{}':\n", - self.total_matches, self.regex + total_display, self.regex )); } - if self.summary_mode { - // Summary mode: show only file paths with match counts - formatted.push_str( - "\nToo many code snippets would be displayed. Showing file paths only.\n", - ); - formatted.push_str("Use the 'paths' parameter to search within specific directories for detailed results.\n\n"); - - // Group results by file path and sum match counts - let mut file_matches = std::collections::HashMap::new(); - for result in &self.results { - let match_count = result.match_lines.len(); - *file_matches.entry(result.file.clone()).or_insert(0) += match_count; - } - - // Sort files by path for consistent output - let mut sorted_files: Vec<_> = file_matches.iter().collect(); - sorted_files.sort_by_key(|(path, _)| path.as_path()); - - for (file_path, total_matches) in sorted_files { - formatted.push_str(&format!( - "{} ({} matches)\n", - file_path.display(), - total_matches - )); - } - - if self.truncated { - let unique_files = file_matches.len(); - formatted.push_str(&format!( - "\n... and {} more files with matches.\n", - self.total_matches - unique_files - )); - } - } else { - // Full mode: show snippets with context, but limit by snippet count - const MAX_DISPLAYED_SNIPPETS: usize = 20; // Show max 20 snippets in full mode - let mut files_with_snippets = 0; - - // Show detailed results for top matches - for (snippets_shown, result) in self.results.iter().enumerate() { - if snippets_shown >= MAX_DISPLAYED_SNIPPETS { - break; - } - - // Display the file path with line range (same format as accepted by read_files) - let end_line = result.start_line + result.line_content.len() - 1; - formatted.push_str(&format!( - ">>>>> RESULT: {}:{}-{}\n", - result.file.display(), - result.start_line + 1, - end_line + 1 - )); - - // Display the matched content with context - for (line_idx, line) in result.line_content.iter().enumerate() { - let rendered_line = Self::format_line_with_excerpt( - line, - line_idx, - &result.match_lines, - &result.match_ranges, - ); - formatted.push_str(&rendered_line); - - // Add a newline if not already present - if !rendered_line.ends_with('\n') { - formatted.push('\n'); - } - } - - formatted.push_str("<<<<< END RESULT\n\n"); - files_with_snippets += 1; - } - - // Show remaining files as paths only - let remaining_results = &self.results[files_with_snippets..]; - if !remaining_results.is_empty() { - // Group remaining results by file path and sum match counts - let mut file_matches = HashMap::new(); - for result in remaining_results { + // Render text file results (if any) + if !self.results.is_empty() { + if self.summary_mode { + // Summary mode: show only file paths with match counts + formatted.push_str( + "\nToo many code snippets would be displayed. Showing file paths only.\n", + ); + formatted.push_str("Use the 'paths' parameter to search within specific directories for detailed results.\n\n"); + + // Group results by file path and sum match counts + let mut file_matches = std::collections::HashMap::new(); + for result in &self.results { let match_count = result.match_lines.len(); *file_matches.entry(result.file.clone()).or_insert(0) += match_count; } - formatted.push_str(&format!( - "Additional {} files with matches:\n", - file_matches.len() - )); - // Sort files by path for consistent output let mut sorted_files: Vec<_> = file_matches.iter().collect(); sorted_files.sort_by_key(|(path, _)| path.as_path()); for (file_path, total_matches) in sorted_files { formatted.push_str(&format!( - "📄 {} ({} matches)\n", + "{} ({} matches)\n", file_path.display(), total_matches )); } - formatted.push('\n'); + + if self.truncated { + let unique_files = file_matches.len(); + formatted.push_str(&format!( + "\n... and {} more files with matches.\n", + self.total_matches - unique_files + )); + } + } else { + // Full mode: show snippets with context, but limit by snippet count + const MAX_DISPLAYED_SNIPPETS: usize = 20; // Show max 20 snippets in full mode + let mut files_with_snippets = 0; + + // Show detailed results for top matches + for (snippets_shown, result) in self.results.iter().enumerate() { + if snippets_shown >= MAX_DISPLAYED_SNIPPETS { + break; + } + + // Display the file path with line range (same format as accepted by read_files) + let end_line = result.start_line + result.line_content.len() - 1; + formatted.push_str(&format!( + ">>>>> RESULT: {}:{}-{}\n", + result.file.display(), + result.start_line + 1, + end_line + 1 + )); + + // Display the matched content with context + for (line_idx, line) in result.line_content.iter().enumerate() { + let rendered_line = Self::format_line_with_excerpt( + line, + line_idx, + &result.match_lines, + &result.match_ranges, + ); + formatted.push_str(&rendered_line); + + // Add a newline if not already present + if !rendered_line.ends_with('\n') { + formatted.push('\n'); + } + } + + formatted.push_str("<<<<< END RESULT\n\n"); + files_with_snippets += 1; + } + + // Show remaining files as paths only + let remaining_results = &self.results[files_with_snippets..]; + if !remaining_results.is_empty() { + // Group remaining results by file path and sum match counts + let mut file_matches = HashMap::new(); + for result in remaining_results { + let match_count = result.match_lines.len(); + *file_matches.entry(result.file.clone()).or_insert(0) += match_count; + } + + formatted.push_str(&format!( + "Additional {} files with matches:\n", + file_matches.len() + )); + + // Sort files by path for consistent output + let mut sorted_files: Vec<_> = file_matches.iter().collect(); + sorted_files.sort_by_key(|(path, _)| path.as_path()); + + for (file_path, total_matches) in sorted_files { + formatted.push_str(&format!( + "📄 {} ({} matches)\n", + file_path.display(), + total_matches + )); + } + formatted.push('\n'); + } + } + + if self.truncated { + formatted.push_str("Use the 'paths' parameter to search within specific directories for more focused results.\n"); } } - if self.truncated { - formatted.push_str("Use the 'paths' parameter to search within specific directories for more focused results.\n"); + // Append document search results if any + if !self.document_results.is_empty() { + formatted.push_str("\n--- Document matches ---\n\n"); + for doc_result in &self.document_results { + formatted.push_str(&format!( + ">>>>> DOCUMENT MATCH: {} ({}, page {})\n", + doc_result.file, doc_result.format, doc_result.page + )); + formatted.push_str(&doc_result.excerpt); + if !doc_result.excerpt.ends_with('\n') { + formatted.push('\n'); + } + formatted.push_str("<<<<< END DOCUMENT MATCH\n\n"); + } } formatted @@ -454,6 +496,10 @@ impl Tool for SearchFilesTool { (all_results, false, false) }; + // Search within documents if the feature is enabled + let document_results = + Self::search_documents(&root_dir, &input.regex, input.paths.as_deref()).await; + Ok(SearchFilesOutput { project: input.project.clone(), regex: input.regex.clone(), @@ -461,10 +507,33 @@ impl Tool for SearchFilesTool { total_matches: total_files, truncated, summary_mode, + document_results, }) } } +impl SearchFilesTool { + /// Search within document files (PDF, DOCX, etc.) for the given pattern. + /// Only active when the `document-conversion` feature is enabled. + #[cfg(feature = "document-conversion")] + async fn search_documents( + root_dir: &std::path::Path, + regex_pattern: &str, + paths: Option<&[String]>, + ) -> Vec { + super::document_search::search_in_documents(root_dir, regex_pattern, paths).await + } + + #[cfg(not(feature = "document-conversion"))] + async fn search_documents( + _root_dir: &std::path::Path, + _regex_pattern: &str, + _paths: Option<&[String]>, + ) -> Vec { + Vec::new() + } +} + #[cfg(test)] mod tests { use super::*; @@ -500,6 +569,7 @@ mod tests { total_matches: results.len(), truncated: false, summary_mode: false, + document_results: vec![], }; // Render the output @@ -523,6 +593,7 @@ mod tests { total_matches: 0, truncated: false, summary_mode: false, + document_results: vec![], }; // Render the output @@ -533,6 +604,40 @@ mod tests { assert!(rendered.contains("No matches found for 'NonExistentPattern'")); } + #[tokio::test] + async fn test_document_results_shown_when_no_text_results() { + // Regression test: document results must be shown even when no text file results exist + let output = SearchFilesOutput { + project: "test-project".to_string(), + regex: "MemGPT".to_string(), + results: Vec::new(), // No text file matches + total_matches: 0, + truncated: false, + summary_mode: false, + document_results: vec![DocumentMatchResult { + file: "paper.pdf".to_string(), + format: "PDF".to_string(), + page: 3, + excerpt: "...explored previously in MemGPT [3]...".to_string(), + match_count: 4, + }], + }; + + let mut tracker = ResourcesTracker::new(); + let rendered = output.render(&mut tracker); + + // Must NOT say "No matches found" + assert!( + !rendered.contains("No matches found"), + "Should not say 'No matches found' when document results exist" + ); + // Must show the document match + assert!(rendered.contains("DOCUMENT MATCH")); + assert!(rendered.contains("paper.pdf")); + assert!(rendered.contains("page 3")); + assert!(rendered.contains("MemGPT")); + } + #[tokio::test] async fn test_search_files_execution() -> Result<()> { // Create test fixture @@ -583,6 +688,7 @@ mod tests { total_matches: 100, truncated: false, summary_mode: true, + document_results: vec![], }; let mut tracker = ResourcesTracker::new(); @@ -616,6 +722,7 @@ mod tests { total_matches: 1, truncated: false, summary_mode: false, + document_results: vec![], }; let mut tracker = ResourcesTracker::new(); @@ -686,6 +793,7 @@ mod tests { total_matches: 2, truncated: false, summary_mode: false, + document_results: vec![], }; let mut tracker = ResourcesTracker::new(); @@ -713,6 +821,7 @@ mod tests { total_matches: 50, truncated: false, summary_mode: true, + document_results: vec![], }; let rendered_many = many_output.render(&mut tracker); @@ -773,6 +882,7 @@ mod tests { total_matches: 2, // Should be 2 unique files, not 3 results truncated: false, summary_mode: true, + document_results: vec![], }; let mut tracker = ResourcesTracker::new(); diff --git a/crates/code_assistant/src/tools/impls/view_documents.rs b/crates/code_assistant/src/tools/impls/view_documents.rs new file mode 100644 index 00000000..d62b0d1b --- /dev/null +++ b/crates/code_assistant/src/tools/impls/view_documents.rs @@ -0,0 +1,499 @@ +use crate::tools::core::{ + Render, ResourcesTracker, Tool, ToolContext, ToolResult, ToolScope, ToolSpec, +}; +use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::path::PathBuf; + +/// Maximum document file size: 50 MB +const MAX_DOCUMENT_SIZE: usize = 50 * 1024 * 1024; + +/// Maximum total markdown output length (characters) to avoid flooding the context window. +const MAX_OUTPUT_CHARS: usize = 150_000; + +/// Supported document extensions and their human-readable format names. +fn document_format_for_extension(ext: &str) -> Option<&'static str> { + match ext.to_ascii_lowercase().as_str() { + "pdf" => Some("PDF"), + "docx" => Some("DOCX"), + "xlsx" => Some("XLSX"), + "pptx" => Some("PPTX"), + "odt" => Some("ODT"), + "rtf" => Some("RTF"), + "html" | "htm" => Some("HTML"), + "xml" => Some("XML"), + "csv" => Some("CSV"), + "tsv" => Some("TSV"), + _ => None, + } +} + +/// Resolve a path relative to the project root and verify it stays within bounds. +fn resolve_project_path(root_dir: &std::path::Path, rel_path: &std::path::Path) -> Result { + if rel_path.is_absolute() { + anyhow::bail!("Absolute paths are not allowed"); + } + + let candidate = root_dir.join(rel_path); + + let canonical = candidate + .canonicalize() + .map_err(|e| anyhow!("Failed to resolve path '{}': {}", rel_path.display(), e))?; + + let canonical_root = root_dir + .canonicalize() + .unwrap_or_else(|_| root_dir.to_path_buf()); + + if canonical.starts_with(&canonical_root) { + Ok(canonical) + } else { + anyhow::bail!("Access outside project root is not allowed") + } +} + +// -------------------------------------------------------------------------- +// Input +// -------------------------------------------------------------------------- + +#[derive(Deserialize, Serialize)] +pub struct ViewDocumentsInput { + pub project: String, + pub paths: Vec, +} + +// -------------------------------------------------------------------------- +// Output +// -------------------------------------------------------------------------- + +/// A single successfully converted document. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ConvertedDocument { + pub path: String, + pub format: String, + pub markdown: String, + pub page_count: usize, + pub file_size: usize, + /// Whether the markdown output was truncated due to length limits. + pub truncated: bool, +} + +#[derive(Serialize, Deserialize)] +pub struct ViewDocumentsOutput { + pub project: String, + pub converted_documents: Vec, + pub failed_documents: Vec<(String, String)>, +} + +// -------------------------------------------------------------------------- +// Render +// -------------------------------------------------------------------------- + +impl Render for ViewDocumentsOutput { + fn status(&self) -> String { + if self.failed_documents.is_empty() { + format!("Converted {} document(s)", self.converted_documents.len()) + } else { + format!( + "Converted {} document(s), failed {} document(s)", + self.converted_documents.len(), + self.failed_documents.len() + ) + } + } + + fn render(&self, _tracker: &mut ResourcesTracker) -> String { + let mut out = String::new(); + + for (path, error) in &self.failed_documents { + out.push_str(&format!( + "Failed to convert '{}' in project '{}': {}\n", + path, self.project, error + )); + } + + for doc in &self.converted_documents { + let size_display = if doc.file_size >= 1024 * 1024 { + format!("{:.1} MB", doc.file_size as f64 / (1024.0 * 1024.0)) + } else { + format!("{:.1} KB", doc.file_size as f64 / 1024.0) + }; + + out.push_str(&format!( + ">>>>> DOCUMENT: {} ({}, {}, {} pages)\n", + doc.path, doc.format, size_display, doc.page_count + )); + out.push_str(&doc.markdown); + if !doc.markdown.ends_with('\n') { + out.push('\n'); + } + if doc.truncated { + out.push_str("[... output truncated due to length limits ...]\n"); + } + out.push_str("<<<<< END DOCUMENT\n\n"); + } + + out + } +} + +// -------------------------------------------------------------------------- +// ToolResult +// -------------------------------------------------------------------------- + +impl ToolResult for ViewDocumentsOutput { + fn is_success(&self) -> bool { + !self.converted_documents.is_empty() + } +} + +// -------------------------------------------------------------------------- +// Tool +// -------------------------------------------------------------------------- + +pub struct ViewDocumentsTool; + +#[async_trait::async_trait] +impl Tool for ViewDocumentsTool { + type Input = ViewDocumentsInput; + type Output = ViewDocumentsOutput; + + fn spec(&self) -> ToolSpec { + let description = concat!( + "View document files in a project by converting them to Markdown.\n", + "Reads binary document files, converts their content to Markdown text, ", + "and returns the result so you can read and analyze the document content.\n", + "\n", + "Supported formats: PDF, DOCX, XLSX, PPTX, ODT, RTF, HTML, XML, CSV, TSV.\n", + ); + + ToolSpec { + name: "view_documents", + description, + parameters_schema: json!({ + "type": "object", + "properties": { + "project": { + "type": "string", + "description": "Name of the project containing the document files" + }, + "paths": { + "type": "array", + "description": "Paths to document files relative to the project root directory", + "items": { + "type": "string" + } + } + }, + "required": ["project", "paths"] + }), + annotations: Some(json!({ + "readOnlyHint": true, + "idempotentHint": true + })), + supported_scopes: &[ + ToolScope::McpServer, + ToolScope::Agent, + ToolScope::AgentWithDiffBlocks, + ToolScope::SubAgentReadOnly, + ToolScope::SubAgentDefault, + ], + hidden: false, + title_template: Some("Viewing documents {paths}"), + } + } + + async fn execute<'a>( + &self, + context: &mut ToolContext<'a>, + input: &mut Self::Input, + ) -> Result { + let explorer = context + .project_manager + .get_explorer_for_project(&input.project) + .map_err(|e| { + anyhow!( + "Failed to get explorer for project {}: {}", + input.project, + e + ) + })?; + + let mut converted_documents = Vec::new(); + let mut failed_documents = Vec::new(); + let root_dir = explorer.root_dir(); + let mut total_output_chars: usize = 0; + + for path_str in &input.paths { + let path = PathBuf::from(path_str); + + // Check extension + let ext = match path.extension().and_then(|e| e.to_str()) { + Some(e) => e.to_string(), + None => { + failed_documents.push(( + path_str.clone(), + "File has no extension; cannot determine document format".into(), + )); + continue; + } + }; + + let format_name = match document_format_for_extension(&ext) { + Some(name) => name.to_string(), + None => { + failed_documents.push(( + path_str.clone(), + format!( + "Unsupported document format '.{ext}'. Supported: pdf, docx, xlsx, pptx, odt, rtf, html, xml, csv, tsv" + ), + )); + continue; + } + }; + + // Resolve and validate the path + let full_path = match resolve_project_path(&root_dir, &path) { + Ok(p) => p, + Err(e) => { + failed_documents.push((path_str.clone(), e.to_string())); + continue; + } + }; + + // Check file size before reading + let metadata = match tokio::fs::metadata(&full_path).await { + Ok(m) => m, + Err(e) => { + failed_documents.push(( + path_str.clone(), + format!("Failed to read file metadata: {e}"), + )); + continue; + } + }; + + let file_size = metadata.len() as usize; + if file_size > MAX_DOCUMENT_SIZE { + let size_mb = file_size as f64 / (1024.0 * 1024.0); + failed_documents.push(( + path_str.clone(), + format!( + "Document file is too large ({size_mb:.1} MB). Maximum supported size is {} MB", + MAX_DOCUMENT_SIZE / (1024 * 1024) + ), + )); + continue; + } + + // Convert the document using transmutation + match convert_document(&full_path).await { + Ok((markdown, page_count)) => { + let remaining_budget = MAX_OUTPUT_CHARS.saturating_sub(total_output_chars); + let truncated = markdown.len() > remaining_budget; + let final_markdown = if truncated { + // Truncate at a char boundary + let mut end = remaining_budget; + while end < markdown.len() && !markdown.is_char_boundary(end) { + end -= 1; + } + markdown[..end].to_string() + } else { + markdown + }; + + total_output_chars += final_markdown.len(); + + converted_documents.push(ConvertedDocument { + path: path_str.clone(), + format: format_name, + markdown: final_markdown, + page_count, + file_size, + truncated, + }); + } + Err(e) => { + failed_documents.push((path_str.clone(), format!("Conversion failed: {e}"))); + } + } + } + + Ok(ViewDocumentsOutput { + project: input.project.clone(), + converted_documents, + failed_documents, + }) + } +} + +// -------------------------------------------------------------------------- +// Conversion helper (behind feature flag) +// -------------------------------------------------------------------------- + +#[cfg(feature = "document-conversion")] +async fn convert_document(path: &std::path::Path) -> Result<(String, usize)> { + use transmutation::{Converter, OutputFormat}; + + let converter = + Converter::new().map_err(|e| anyhow!("Failed to initialize document converter: {e}"))?; + + let result = converter + .convert( + path.to_str() + .ok_or_else(|| anyhow!("Invalid path encoding"))?, + ) + .to(OutputFormat::Markdown { + split_pages: false, + optimize_for_llm: true, + }) + .execute() + .await + .map_err(|e| anyhow!("Document conversion error: {e}"))?; + + let page_count = result.page_count(); + + // Collect all content into a single markdown string. + // ConversionOutput.data is Vec containing UTF-8 markdown text. + let markdown: String = result + .content + .iter() + .filter_map(|output| String::from_utf8(output.data.clone()).ok()) + .collect::>() + .join("\n\n"); + + Ok((markdown, page_count)) +} + +#[cfg(not(feature = "document-conversion"))] +async fn convert_document(_path: &std::path::Path) -> Result<(String, usize)> { + anyhow::bail!( + "Document conversion is not available. \ + Build with `--features document-conversion` to enable this feature." + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_document_format_detection() { + assert_eq!(document_format_for_extension("pdf"), Some("PDF")); + assert_eq!(document_format_for_extension("PDF"), Some("PDF")); + assert_eq!(document_format_for_extension("docx"), Some("DOCX")); + assert_eq!(document_format_for_extension("xlsx"), Some("XLSX")); + assert_eq!(document_format_for_extension("pptx"), Some("PPTX")); + assert_eq!(document_format_for_extension("odt"), Some("ODT")); + assert_eq!(document_format_for_extension("rtf"), Some("RTF")); + assert_eq!(document_format_for_extension("html"), Some("HTML")); + assert_eq!(document_format_for_extension("htm"), Some("HTML")); + assert_eq!(document_format_for_extension("xml"), Some("XML")); + assert_eq!(document_format_for_extension("csv"), Some("CSV")); + assert_eq!(document_format_for_extension("tsv"), Some("TSV")); + assert_eq!(document_format_for_extension("txt"), None); + assert_eq!(document_format_for_extension("rs"), None); + } + + #[test] + fn test_output_rendering() { + let output = ViewDocumentsOutput { + project: "test".to_string(), + converted_documents: vec![ConvertedDocument { + path: "report.pdf".to_string(), + format: "PDF".to_string(), + markdown: "# Report\n\nThis is the content.".to_string(), + page_count: 3, + file_size: 50_000, + truncated: false, + }], + failed_documents: vec![], + }; + + let mut tracker = ResourcesTracker::new(); + let rendered = output.render(&mut tracker); + + assert!(rendered.contains(">>>>> DOCUMENT: report.pdf")); + assert!(rendered.contains("PDF")); + assert!(rendered.contains("3 pages")); + assert!(rendered.contains("# Report")); + assert!(rendered.contains("<<<<< END DOCUMENT")); + } + + #[test] + fn test_output_with_failures() { + let output = ViewDocumentsOutput { + project: "test".to_string(), + converted_documents: vec![], + failed_documents: vec![("bad.xyz".to_string(), "Unsupported format".to_string())], + }; + + assert!(!output.is_success()); + + let mut tracker = ResourcesTracker::new(); + let rendered = output.render(&mut tracker); + assert!(rendered.contains("Failed to convert 'bad.xyz'")); + assert!(rendered.contains("Unsupported format")); + } + + #[test] + fn test_truncation_rendering() { + let output = ViewDocumentsOutput { + project: "test".to_string(), + converted_documents: vec![ConvertedDocument { + path: "big.pdf".to_string(), + format: "PDF".to_string(), + markdown: "Truncated content".to_string(), + page_count: 100, + file_size: 10_000_000, + truncated: true, + }], + failed_documents: vec![], + }; + + let mut tracker = ResourcesTracker::new(); + let rendered = output.render(&mut tracker); + assert!(rendered.contains("[... output truncated due to length limits ...]")); + } + + #[tokio::test] + async fn test_unsupported_extension() { + use crate::tests::mocks::ToolTestFixture; + use crate::tools::core::ToolRegistry; + + let registry = ToolRegistry::global(); + let tool = registry.get("view_documents"); + if tool.is_none() { + // Tool not registered (feature disabled) - skip test + return; + } + let tool = tool.unwrap(); + + let mut fixture = ToolTestFixture::with_files(vec![( + "data.bin".to_string(), + "not a document".to_string(), + )]); + let mut context = fixture.context(); + + let mut params = json!({ + "project": "test-project", + "paths": ["data.bin"] + }); + + let result = tool.invoke(&mut context, &mut params).await.unwrap(); + assert!(!result.is_success()); + + let mut tracker = ResourcesTracker::new(); + let output = result.as_render().render(&mut tracker); + assert!(output.contains("Unsupported document format")); + } + + #[cfg(not(feature = "document-conversion"))] + #[tokio::test] + async fn test_feature_disabled_error() { + let path = std::path::Path::new("/tmp/test.pdf"); + let result = convert_document(path).await; + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("not available")); + } +} diff --git a/crates/code_assistant/src/ui/gpui/file_icons.rs b/crates/code_assistant/src/ui/gpui/file_icons.rs index a64aedfb..a2406a1f 100644 --- a/crates/code_assistant/src/ui/gpui/file_icons.rs +++ b/crates/code_assistant/src/ui/gpui/file_icons.rs @@ -50,8 +50,9 @@ pub const TOOL_DELETE_FILES: &str = "trash"; // trash.svg pub const TOOL_OPEN_PROJECT: &str = "expanded_folder"; // folder_open.svg pub const TOOL_USER_INPUT: &str = "person"; // person.svg pub const TOOL_COMPLETE_TASK: &str = "check_circle"; // check_circle.svg -pub const TOOL_UPDATE_PLAN: &str = "file_generic"; // file_generic.svg +pub const TOOL_UPDATE_PLAN: &str = "todo_list"; // todo_list.svg pub const TOOL_SPAWN_AGENT: &str = "rerun"; // rerun.svg - for spawning sub-agents +pub const TOOL_VIEW_DOCUMENTS: &str = "file_generic"; // file_generic.svg - for viewing documents pub const TOOL_GENERIC: &str = "file_code"; // file_code.svg const FILE_TYPES_ASSET: &str = "icons/file_icons/file_types.json"; @@ -140,6 +141,7 @@ impl FileIcons { TOOL_UPDATE_PLAN => Some("icons/file_generic.svg"), TOOL_SPAWN_AGENT => Some("icons/rerun.svg"), + TOOL_VIEW_DOCUMENTS => Some("icons/file_generic.svg"), TOOL_GENERIC => Some("icons/file_code.svg"), // For file_types.json types we missed _ => None, @@ -178,6 +180,7 @@ impl FileIcons { TOOL_UPDATE_PLAN => Some(SharedString::from("📝")), TOOL_SPAWN_AGENT => Some(SharedString::from("🔄")), + TOOL_VIEW_DOCUMENTS => Some(SharedString::from("📑")), TOOL_GENERIC => Some(SharedString::from("🔧")), _ => Some(SharedString::from("📄")), // Default fallback } @@ -203,6 +206,7 @@ impl FileIcons { "complete_task" => TOOL_COMPLETE_TASK, "update_plan" => TOOL_UPDATE_PLAN, "spawn_agent" => TOOL_SPAWN_AGENT, + "view_documents" => TOOL_VIEW_DOCUMENTS, _ => TOOL_GENERIC, }; diff --git a/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs b/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs index 0cae7df6..0976f9b4 100644 --- a/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs +++ b/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs @@ -245,6 +245,10 @@ impl InlineToolRenderer { tool_name: "view_images", template: "View {paths}", }, + DescribeTemplate { + tool_name: "view_documents", + template: "View {paths}", + }, ]; let tools: Vec = templates.iter().map(|t| t.tool_name.to_string()).collect(); From 3e96e563835d111c7d90e8dd0557a066c744b817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Tue, 12 May 2026 16:16:06 +0200 Subject: [PATCH 2/5] ACP: Emit session updates from other instances --- crates/code_assistant/src/acp/agent.rs | 19 ++ crates/code_assistant/src/app/acp.rs | 306 ++++++++++++++++++++++++- 2 files changed, 320 insertions(+), 5 deletions(-) diff --git a/crates/code_assistant/src/acp/agent.rs b/crates/code_assistant/src/acp/agent.rs index 1b87b9a2..f1a338f1 100644 --- a/crates/code_assistant/src/acp/agent.rs +++ b/crates/code_assistant/src/acp/agent.rs @@ -56,6 +56,9 @@ pub struct ACPAgentImpl { client_capabilities: Arc>>, /// Sessions created in new_session() but not yet persisted (deferred until first prompt) pending_sessions: Arc>>, + /// The session ID currently connected via ACP (for cross-instance awareness). + /// Updated when load_session or new_session is called. + connected_session_id: Arc>>, } struct ModelStateInfo { @@ -72,6 +75,7 @@ impl ACPAgentImpl { playback_path: Option, fast_playback: bool, session_update_tx: mpsc::UnboundedSender<(acp::SessionNotification, oneshot::Sender<()>)>, + connected_session_id: Arc>>, ) -> Self { Self { session_manager, @@ -83,6 +87,7 @@ impl ACPAgentImpl { session_update_tx, active_uis: Arc::new(Mutex::new(HashMap::new())), client_capabilities: Arc::new(Mutex::new(None)), + connected_session_id, } } @@ -328,6 +333,7 @@ impl acp::Agent for ACPAgentImpl { let model_name = self.model_name.clone(); let session_config_template = self.session_config_template.clone(); let pending_sessions = self.pending_sessions.clone(); + let connected_session_id = self.connected_session_id.clone(); Box::pin(async move { tracing::info!("ACP: Creating new session with cwd: {:?}", arguments.cwd); @@ -357,6 +363,12 @@ impl acp::Agent for ACPAgentImpl { tracing::info!("ACP: Created pending session: {}", session_id); + // Track this as the connected session for cross-instance awareness + { + let mut connected = connected_session_id.lock().unwrap(); + *connected = Some(session_id.clone()); + } + let models_state = ACPAgentImpl::compute_model_state(&model_name, Some(model_name.as_str())) .map(|info| info.state); @@ -381,10 +393,17 @@ impl acp::Agent for ACPAgentImpl { let session_manager = self.session_manager.clone(); let session_update_tx = self.session_update_tx.clone(); let default_model_name = self.model_name.clone(); + let connected_session_id = self.connected_session_id.clone(); Box::pin(async move { tracing::info!("ACP: Loading session: {}", arguments.session_id.0); + // Track this as the connected session for cross-instance awareness + { + let mut connected = connected_session_id.lock().unwrap(); + *connected = Some(arguments.session_id.0.to_string()); + } + // Load session into manager { let mut manager = session_manager.lock().await; diff --git a/crates/code_assistant/src/app/acp.rs b/crates/code_assistant/src/app/acp.rs index 3f103de4..1930184e 100644 --- a/crates/code_assistant/src/app/acp.rs +++ b/crates/code_assistant/src/app/acp.rs @@ -1,16 +1,21 @@ use super::AgentRunConfig; use crate::acp::{ register_fs_worker, register_terminal_worker, set_acp_client_connection, ACPAgentImpl, + ACPUserUI, }; use crate::persistence::FileSessionPersistence; +use crate::session::watcher::SessionWatcher; use crate::session::{SessionConfig, SessionManager}; +use crate::ui::ui_events::UiEvent; +use crate::ui::UserInterface; +use agent_client_protocol as acp; use agent_client_protocol::Client; use anyhow::Result; -use std::sync::Arc; -use tokio::sync::{mpsc, Mutex}; +use std::sync::{Arc, Mutex as StdMutex}; +use tokio::sync::{mpsc, oneshot, Mutex}; use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt}; -use tracing::info; +use tracing::{debug, info, warn}; pub async fn run(verbose: bool, config: AgentRunConfig) -> Result<()> { // Setup logging to file since stdout is used for ACP protocol @@ -61,6 +66,7 @@ pub async fn run(verbose: bool, config: AgentRunConfig) -> Result<()> { // Create session manager let persistence = FileSessionPersistence::new(); + let persistence_for_watcher = FileSessionPersistence::new(); let session_manager = Arc::new(Mutex::new(SessionManager::new( persistence, session_config_template.clone(), @@ -74,16 +80,37 @@ pub async fn run(verbose: bool, config: AgentRunConfig) -> Result<()> { // Create channel for session notifications let (session_update_tx, mut session_update_rx) = mpsc::unbounded_channel(); + // Connected session ID for the filesystem watcher + let connected_session_id: Arc>> = Arc::new(StdMutex::new(None)); + // Create the agent let agent = ACPAgentImpl::new( - session_manager, + session_manager.clone(), session_config_template, model_name.clone(), config.playback.clone(), config.fast_playback, - session_update_tx, + session_update_tx.clone(), + connected_session_id.clone(), ); + // Start the filesystem watcher for cross-instance awareness. + let (watcher_event_tx, watcher_event_rx) = async_channel::bounded::(64); + let _session_watcher = match SessionWatcher::start( + &persistence_for_watcher, + watcher_event_tx, + connected_session_id.clone(), + ) { + Ok(watcher) => { + info!("Filesystem session watcher started (ACP mode)"); + Some(watcher) + } + Err(e) => { + warn!("Failed to start filesystem session watcher: {e}"); + None + } + }; + // Use LocalSet for non-Send futures from agent-client-protocol, // but the spawned futures will themselves spawn agent tasks on the multi-threaded runtime let local_set = tokio::task::LocalSet::new(); @@ -117,9 +144,278 @@ pub async fn run(verbose: bool, config: AgentRunConfig) -> Result<()> { } }); + // Kick off a background task to handle filesystem watcher events + // and forward cross-instance session changes to the ACP client + let session_manager_for_watcher = session_manager.clone(); + let session_update_tx_for_watcher = session_update_tx.clone(); + let connected_session_id_for_watcher = connected_session_id.clone(); + tokio::task::spawn_local(async move { + handle_watcher_events( + watcher_event_rx, + session_manager_for_watcher, + session_update_tx_for_watcher, + connected_session_id_for_watcher, + ) + .await; + }); + // Run the IO handler until stdin/stdout are closed handle_io.await }) .await .map_err(anyhow::Error::new) } + +/// Background task that processes filesystem watcher events and replays +/// incremental session changes to the ACP client. +/// +/// When another code-assistant instance modifies the currently connected +/// session's file on disk, this task: +/// 1. Calls `refresh_session_incremental` to compute the diff +/// 2. Converts new messages/fragments to ACP `SessionNotification`s +/// 3. Sends them through the existing notification channel +async fn handle_watcher_events( + event_rx: async_channel::Receiver, + session_manager: Arc>, + session_update_tx: mpsc::UnboundedSender<(acp::SessionNotification, oneshot::Sender<()>)>, + connected_session_id: Arc>>, +) { + while let Ok(event) = event_rx.recv().await { + match event { + UiEvent::RefreshCurrentSession { session_id } => { + debug!("ACP watcher: RefreshCurrentSession for {session_id}"); + + // Make sure this is still the connected session + let current = connected_session_id.lock().unwrap().clone(); + if current.as_deref() != Some(&session_id) { + debug!( + "ACP watcher: ignoring refresh for {session_id} \ + (connected session is {:?})", + current + ); + continue; + } + + // Compute incremental diff + let ui_events = { + let mut manager = session_manager.lock().await; + match manager.refresh_session_incremental(&session_id) { + Ok(events) => events, + Err(e) => { + warn!("ACP watcher: failed to refresh session {session_id}: {e}"); + continue; + } + } + }; + + // Replay the resulting UI events as ACP notifications + for ui_event in ui_events { + replay_ui_event_to_acp( + &ui_event, + &session_id, + &session_manager, + &session_update_tx, + ) + .await; + } + } + + UiEvent::UpdateSessionActivityState { + session_id, + activity_state, + } => { + debug!( + "ACP watcher: UpdateSessionActivityState for {session_id}: {activity_state:?}" + ); + + // Update the state in the session manager + let mut manager = session_manager.lock().await; + if let Some(instance) = manager.get_session_mut(&session_id) { + instance.set_activity_state(activity_state); + } + // Note: ACP protocol doesn't have a direct "activity state" notification, + // but the client can observe that new content is streaming in. + } + + UiEvent::RefreshChatList => { + // In ACP mode the client manages its own session list via list_sessions(). + // We could potentially notify the client to re-fetch, but the protocol + // doesn't currently have a mechanism for this. + debug!("ACP watcher: RefreshChatList (ignored, client uses list_sessions)"); + } + + _ => { + // Other events are not expected from the watcher + debug!("ACP watcher: unexpected event: {:?}", event); + } + } + } +} + +/// Convert a UiEvent from `refresh_session_incremental` into ACP session +/// notifications and send them to the client. +async fn replay_ui_event_to_acp( + event: &UiEvent, + session_id: &str, + session_manager: &Arc>, + session_update_tx: &mpsc::UnboundedSender<(acp::SessionNotification, oneshot::Sender<()>)>, +) { + match event { + UiEvent::AppendMessages { + messages, + tool_results, + } => { + let acp_session_id = acp::SessionId::new(session_id); + + // Get base_path for the session + let base_path = { + let manager = session_manager.lock().await; + manager + .get_session(session_id) + .and_then(|s| s.session.config.init_path.clone()) + }; + + // Create a temporary ACPUserUI to replay fragments through. + // This reuses the same fragment→ACP conversion logic used during + // streaming and load_session replay. + let replay_ui = Arc::new(ACPUserUI::new( + acp_session_id.clone(), + session_update_tx.clone(), + base_path, + )); + + // Replay message fragments + for message_data in messages { + use crate::ui::gpui::elements::MessageRole; + + match message_data.role { + MessageRole::User => { + // Emit user message fragments as UserMessageChunk + for fragment in &message_data.fragments { + match fragment { + crate::ui::DisplayFragment::PlainText(text) => { + let content = acp::ContentBlock::Text(acp::TextContent::new( + text.clone(), + )); + let chunk = ACPUserUI::content_chunk(content); + replay_ui.queue_session_update( + acp::SessionUpdate::UserMessageChunk(chunk), + ); + } + crate::ui::DisplayFragment::CompactionDivider { summary } => { + let content = acp::ContentBlock::Text(acp::TextContent::new( + format!("[Context compacted: {}]", summary), + )); + let chunk = ACPUserUI::content_chunk(content); + replay_ui.queue_session_update( + acp::SessionUpdate::AgentMessageChunk(chunk), + ); + } + _ => { + // Other fragment types in user messages are uncommon + } + } + } + } + MessageRole::Assistant => { + // Emit assistant message fragments through display_fragment + // which handles all the ACP conversion logic + for fragment in &message_data.fragments { + if let Err(e) = replay_ui.display_fragment(fragment) { + warn!( + "ACP watcher: failed to replay fragment for {session_id}: {e}" + ); + } + } + } + } + } + + // Replay tool results as ToolCallUpdate with final status + for tool_result in tool_results { + let status = match tool_result.status { + crate::ui::ToolStatus::Success => acp::ToolCallStatus::Completed, + crate::ui::ToolStatus::Error => acp::ToolCallStatus::Failed, + _ => acp::ToolCallStatus::InProgress, + }; + + let output_content: Vec = tool_result + .output + .as_ref() + .map(|o| { + vec![acp::ToolCallContent::Content(acp::Content::new( + acp::ContentBlock::Text(acp::TextContent::new(o.clone())), + ))] + }) + .unwrap_or_default(); + + let mut update_fields = acp::ToolCallUpdateFields::new().status(status); + if !output_content.is_empty() { + update_fields = update_fields.content(output_content); + } + + let tool_call_update = acp::ToolCallUpdate::new( + acp::ToolCallId::new(tool_result.tool_id.clone()), + update_fields, + ); + + replay_ui + .queue_session_update(acp::SessionUpdate::ToolCallUpdate(tool_call_update)); + } + } + + UiEvent::SetMessages { .. } => { + // A full reload is needed (paths diverged). In ACP mode we can't easily + // "clear and reload" the session from the agent side without a protocol-level + // mechanism. For now, log a warning. The client would need to call + // load_session again to get the full state. + warn!( + "ACP watcher: session {session_id} paths diverged — \ + full reload required but not yet supported in ACP mode" + ); + } + + UiEvent::UpdatePlan { plan } => { + use crate::types::{PlanItemPriority, PlanItemStatus}; + + // Forward plan updates to the ACP client + let acp_session_id = acp::SessionId::new(session_id); + if !plan.entries.is_empty() { + let acp_entries: Vec = plan + .entries + .iter() + .map(|item| { + let status = match item.status { + PlanItemStatus::Completed => acp::PlanEntryStatus::Completed, + PlanItemStatus::InProgress => acp::PlanEntryStatus::InProgress, + PlanItemStatus::Pending => acp::PlanEntryStatus::Pending, + }; + let priority = match item.priority { + PlanItemPriority::High => acp::PlanEntryPriority::High, + PlanItemPriority::Low => acp::PlanEntryPriority::Low, + PlanItemPriority::Medium => acp::PlanEntryPriority::Medium, + }; + acp::PlanEntry::new(item.content.clone(), priority, status) + }) + .collect(); + + let plan_update = acp::Plan::new(acp_entries); + let notification = acp::SessionNotification::new( + acp_session_id, + acp::SessionUpdate::Plan(plan_update), + ); + let (ack_tx, _) = oneshot::channel(); + if let Err(e) = session_update_tx.send((notification, ack_tx)) { + warn!("ACP watcher: failed to send plan update: {e}"); + } + } + } + + _ => { + debug!( + "ACP watcher: unhandled UI event during replay: {:?}", + std::mem::discriminant(event) + ); + } + } +} From 31b2d45bda56662f42543708bf3cc8c5ba57f194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Tue, 12 May 2026 16:19:25 +0200 Subject: [PATCH 3/5] Show diff when write_file is used to overwrite --- .../src/tools/impls/write_file.rs | 30 ++++++- .../src/ui/gpui/diff_card_renderer.rs | 78 ++++++++++++++++++- crates/code_assistant/src/ui/gpui/elements.rs | 75 ++++++++++++++++++ .../src/ui/gpui/tool_block_renderers.rs | 3 + crates/code_assistant/src/ui/gpui/ui_state.rs | 23 ++++++ .../terminal/tool_renderers/diff_renderer.rs | 18 ++++- 6 files changed, 222 insertions(+), 5 deletions(-) diff --git a/crates/code_assistant/src/tools/impls/write_file.rs b/crates/code_assistant/src/tools/impls/write_file.rs index 92d2e570..618f771c 100644 --- a/crates/code_assistant/src/tools/impls/write_file.rs +++ b/crates/code_assistant/src/tools/impls/write_file.rs @@ -23,6 +23,10 @@ pub struct WriteFileOutput { pub path: PathBuf, pub content: String, pub error: Option, + /// If the file existed before writing, this holds the original content. + /// Used by the UI to render a proper diff instead of showing all lines as new. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub original_content: Option, } // Render implementation for output formatting @@ -46,6 +50,16 @@ impl Render for WriteFileOutput { ) } } + + fn render_for_ui(&self, _tracker: &mut ResourcesTracker) -> String { + // Emit JSON with original_content (if present) so the diff card renderer + // can show a proper unified diff for file overwrites. + if let Some(original) = &self.original_content { + json!({ "original_content": original }).to_string() + } else { + String::new() + } + } } // ToolResult implementation @@ -133,6 +147,7 @@ impl Tool for WriteFileTool { "Failed to get explorer for project {}: {}", input.project, e )), + original_content: None, }); } }; @@ -150,6 +165,7 @@ impl Tool for WriteFileTool { path, content: String::new(), error: Some("Absolute paths are not allowed".to_string()), + original_content: None, }); } @@ -158,7 +174,15 @@ impl Tool for WriteFileTool { // Join with root_dir to get full path let full_path = project_root.join(&path); - // Write the file first + // Read the original content before writing (if the file exists and we're not appending). + // This allows the UI to show a proper diff for overwrites. + let original_content = if !input.append { + explorer.read_file(&full_path).await.ok() + } else { + None + }; + + // Write the file match explorer .write_file(&full_path, &input.content, input.append) .await @@ -194,12 +218,14 @@ impl Tool for WriteFileTool { path, content: input.content.clone(), error: None, + original_content, }) } Err(e) => Ok(WriteFileOutput { path, content: String::new(), // Empty content on error error: Some(e.to_string()), + original_content: None, }), } } @@ -218,6 +244,7 @@ mod tests { path: PathBuf::from("test.txt"), content: "Test content".to_string(), error: None, + original_content: None, }; let mut tracker = ResourcesTracker::new(); @@ -230,6 +257,7 @@ mod tests { path: PathBuf::from("test.txt"), content: String::new(), error: Some("File not writable".to_string()), + original_content: None, }; let rendered_error = output_error.render(&mut tracker); diff --git a/crates/code_assistant/src/ui/gpui/diff_card_renderer.rs b/crates/code_assistant/src/ui/gpui/diff_card_renderer.rs index 0fa26c63..ae018ca1 100644 --- a/crates/code_assistant/src/ui/gpui/diff_card_renderer.rs +++ b/crates/code_assistant/src/ui/gpui/diff_card_renderer.rs @@ -134,7 +134,7 @@ impl ToolBlockRenderer for DiffCardRenderer { .child(header_label), ); - let mut header_right = div().flex().flex_row().items_center().gap_2(); + let mut header_right = div().flex().flex_row().items_center().gap_1(); if has_error { header_right = header_right.child( gpui::svg() @@ -143,6 +143,36 @@ impl ToolBlockRenderer for DiffCardRenderer { .text_color(theme.danger), ); } + // Diff/File toggle button for write_file with original_content + if tool.name == "write_file" && write_file_has_original_content(tool) { + let diff_mode = card_ctx.write_file_diff_mode; + let label: SharedString = if diff_mode { "diff" } else { "file" }.into(); + let btn_text_color = if diff_mode { + theme.accent + } else { + header_text_color + }; + header_right = header_right.child( + div() + .id(SharedString::from(format!("diff-toggle-{}", tool.id))) + .flex_none() + .flex() + .items_center() + .justify_center() + .px_1p5() + .py(px(2.)) + .rounded(px(4.)) + .cursor_pointer() + .hover(|s| s.bg(header_text_color.opacity(0.1))) + .text_size(rems(0.6875)) + .font_weight(FontWeight::MEDIUM) + .text_color(btn_text_color) + .on_click(cx.listener(move |view, _event: &ClickEvent, _window, cx| { + view.toggle_write_file_diff_mode(cx); + })) + .child(label), + ); + } // Chevron — highlights on header hover via group header_right = header_right.child( div() @@ -199,7 +229,9 @@ impl ToolBlockRenderer for DiffCardRenderer { let body_content = match tool.name.as_str() { "edit" => render_edit_body(tool, is_generating, theme, rem_size), "replace_in_file" => render_replace_body(tool, is_generating, theme, rem_size), - "write_file" => render_write_body(tool, theme, rem_size), + "write_file" => { + render_write_body(tool, theme, rem_size, card_ctx.write_file_diff_mode) + } "delete_files" => render_delete_body(tool, theme), _ => None, }; @@ -358,17 +390,47 @@ fn render_replace_body( ) } -/// Render body for the `write_file` tool — all-green additions with line numbers. +/// Render body for the `write_file` tool. +/// +/// When `diff_mode` is true and the tool output contains `original_content` +/// (indicating an existing file was overwritten), renders a unified diff. +/// Otherwise falls back to all-green additions with line numbers. fn render_write_body( tool: &ToolUseBlock, theme: &gpui_component::theme::Theme, rem_size: gpui::Pixels, + diff_mode: bool, ) -> Option { let content = get_param(tool, "content")?; if content.is_empty() { return None; } + // Try to extract original_content from the tool output JSON + let original_content = tool + .output + .as_deref() + .and_then(|s| serde_json::from_str::(s).ok()) + .and_then(|v| { + v.get("original_content") + .and_then(|c| c.as_str()) + .map(String::from) + }); + + // If we have original content and diff mode is on, show a unified diff + if diff_mode { + if let Some(ref original) = original_content { + return Some(render_unified_diff( + original, + content, + theme, + Some(1), + rem_size, + )); + } + } + + // Fall back to all-green additions (new file or diff mode toggled off) let lines: Vec<&str> = content.lines().collect(); let total_lines = lines.len(); let gutter_width = total_lines.to_string().len(); @@ -754,6 +816,16 @@ fn get_param<'a>(tool: &'a ToolUseBlock, name: &str) -> Option<&'a str> { .map(|p| p.value.as_str()) } +/// Check whether a write_file tool's output JSON contains `original_content`, +/// indicating the file was overwritten (not newly created). +fn write_file_has_original_content(tool: &ToolUseBlock) -> bool { + tool.output + .as_deref() + .and_then(|s| serde_json::from_str::(s).ok()) + .and_then(|v| v.get("original_content").cloned()) + .is_some() +} + /// Extract match start line numbers from the tool's output JSON. /// /// After execution, `edit` and `replace_in_file` tools emit their output as diff --git a/crates/code_assistant/src/ui/gpui/elements.rs b/crates/code_assistant/src/ui/gpui/elements.rs index 496b0a70..3d48a038 100644 --- a/crates/code_assistant/src/ui/gpui/elements.rs +++ b/crates/code_assistant/src/ui/gpui/elements.rs @@ -90,6 +90,37 @@ impl ToolCollapseState { } } +/// Convenience helpers for write_file diff mode state. +/// +/// When a write_file tool overwrites an existing file, the card can show either +/// a unified diff or the plain new-file content. This persists the user's +/// choice per tool block. +pub struct ToolDiffModeState; + +impl ToolDiffModeState { + /// Look up a previously stored diff mode override for a tool in a session. + /// Returns `None` if no override exists (default = diff mode on). + pub fn get(session_id: &str, tool_id: &str) -> Option { + UiStateStore::try_global()? + .lock() + .ok() + .and_then(|mut store| store.get_tool_diff_mode(session_id, tool_id)) + } + + /// Record a diff mode override for a tool in a session. + /// Returns `true` if the store was marked dirty (i.e. a save should be + /// scheduled). + pub fn set(session_id: &str, tool_id: &str, diff_mode: bool) -> bool { + if let Some(store) = UiStateStore::try_global() { + if let Ok(mut store) = store.lock() { + store.set_tool_diff_mode(session_id, tool_id, diff_mode); + return true; + } + } + false + } +} + /// Animation configuration for expand/collapse #[derive(Clone)] pub struct AnimationConfig { @@ -1039,6 +1070,13 @@ impl BlockData { } } + fn as_tool(&self) -> Option<&ToolUseBlock> { + match self { + BlockData::ToolUse(b) => Some(b), + _ => None, + } + } + fn as_tool_mut(&mut self) -> Option<&mut ToolUseBlock> { match self { BlockData::ToolUse(b) => Some(b), @@ -1070,6 +1108,9 @@ pub struct BlockView { current_project: Arc>, /// Session ID this block belongs to (for collapse-state persistence). session_id: Option, + /// For write_file tool blocks: whether to show the diff view (true) or the + /// plain new-file view (false). Only relevant when original_content is available. + pub write_file_diff_mode: bool, } impl BlockView { @@ -1081,6 +1122,20 @@ impl BlockView { session_id: Option, _cx: &mut Context, ) -> Self { + // Load persisted diff mode preference for write_file tool blocks. + let write_file_diff_mode = if let Some(tool) = block.as_tool() { + if tool.name == "write_file" { + session_id + .as_deref() + .and_then(|sid| ToolDiffModeState::get(sid, &tool.id)) + .unwrap_or(true) // default: show diff + } else { + true + } + } else { + true + }; + Self { block, block_id, @@ -1091,6 +1146,7 @@ impl BlockView { animation_task: None, current_project, session_id, + write_file_diff_mode, } } @@ -1160,6 +1216,23 @@ impl BlockView { self.start_expand_collapse_animation(should_expand, cx); } + /// Toggle between diff view and plain new-file view for write_file tool blocks. + pub fn toggle_write_file_diff_mode(&mut self, cx: &mut Context) { + self.write_file_diff_mode = !self.write_file_diff_mode; + + // Persist the new state + if let (Some(session_id), Some(tool)) = (&self.session_id, self.block.as_tool()) { + if ToolDiffModeState::set(session_id, &tool.id, self.write_file_diff_mode) { + // Schedule a debounced save + if let Some(sender) = cx.try_global::() { + let _ = sender.0.try_send(crate::ui::UiEvent::PersistUiState); + } + } + } + + cx.notify(); + } + fn toggle_compaction(&mut self, cx: &mut Context) { if let Some(summary) = self.block.as_compaction_mut() { summary.is_expanded = !summary.is_expanded; @@ -1719,6 +1792,7 @@ impl Render for BlockView { ToolBlockState::Expanded => 1.0, }, }; + let card_ctx = crate::ui::gpui::tool_block_renderers::CardRenderContext { animation_scale: scale, @@ -1729,6 +1803,7 @@ impl Render for BlockView { .lock() .unwrap() .clone(), + write_file_diff_mode: self.write_file_diff_mode, }; if let Some(element) = renderer.render( diff --git a/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs b/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs index ea51fe43..0d46367f 100644 --- a/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs +++ b/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs @@ -46,6 +46,9 @@ pub struct CardRenderContext { /// The session's current/default project name. Card renderers can compare /// this against the tool's `project` parameter to decide whether to show it. pub current_project: String, + /// For write_file tool blocks: whether to show diff view (true) or plain + /// new-file view (false). Only relevant when original_content is available. + pub write_file_diff_mode: bool, } // --------------------------------------------------------------------------- diff --git a/crates/code_assistant/src/ui/gpui/ui_state.rs b/crates/code_assistant/src/ui/gpui/ui_state.rs index 9be8c06d..8e057357 100644 --- a/crates/code_assistant/src/ui/gpui/ui_state.rs +++ b/crates/code_assistant/src/ui/gpui/ui_state.rs @@ -44,6 +44,13 @@ pub struct UiSessionState { /// blocks at their renderer-default state are omitted. #[serde(default, skip_serializing_if = "HashMap::is_empty")] pub tool_collapse_overrides: HashMap, + + /// write_file diff mode overrides set by the user. + /// Key: tool_id, Value: `true` means show diff view, `false` means show + /// plain new-file view. Only stored when the user explicitly toggles away + /// from the default (diff mode = true). + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub tool_diff_mode_overrides: HashMap, } // --------------------------------------------------------------------------- @@ -130,6 +137,22 @@ impl UiStateStore { self.dirty.insert(session_id.to_owned()); } + /// Return the diff mode override for a write_file tool block, loading from + /// disk if the session hasn't been loaded yet. + pub fn get_tool_diff_mode(&mut self, session_id: &str, tool_id: &str) -> Option { + let state = self.get(session_id); + state.tool_diff_mode_overrides.get(tool_id).copied() + } + + /// Set a write_file diff mode override. + pub fn set_tool_diff_mode(&mut self, session_id: &str, tool_id: &str, diff_mode: bool) { + let state = self.states.entry(session_id.to_owned()).or_default(); + state + .tool_diff_mode_overrides + .insert(tool_id.to_owned(), diff_mode); + self.dirty.insert(session_id.to_owned()); + } + /// Remove the in-memory state and on-disk file for a deleted session. pub fn remove_session(&mut self, session_id: &str) { self.states.remove(session_id); diff --git a/crates/code_assistant/src/ui/terminal/tool_renderers/diff_renderer.rs b/crates/code_assistant/src/ui/terminal/tool_renderers/diff_renderer.rs index 6fab577a..fc7a75cf 100644 --- a/crates/code_assistant/src/ui/terminal/tool_renderers/diff_renderer.rs +++ b/crates/code_assistant/src/ui/terminal/tool_renderers/diff_renderer.rs @@ -5,6 +5,7 @@ use ratatui::prelude::*; use ratatui::style::{Color, Modifier, Style}; +use serde_json; use similar::{ChangeTag, TextDiff}; use super::{ @@ -122,6 +123,7 @@ fn generate_tool_diff_lines(tool_block: &ToolUseBlock) -> Vec { } generate_search_replace_diff_lines(diff) } + "write_file" => { let content = tool_block .parameters @@ -131,7 +133,21 @@ fn generate_tool_diff_lines(tool_block: &ToolUseBlock) -> Vec { if content.is_empty() { return Vec::new(); } - generate_write_file_diff_lines(content) + // If the tool output contains original_content, show a proper diff + let original_content = tool_block + .output + .as_deref() + .and_then(|s| serde_json::from_str::(s).ok()) + .and_then(|v| { + v.get("original_content") + .and_then(|c| c.as_str()) + .map(String::from) + }); + if let Some(ref original) = original_content { + generate_diff_lines(original, content) + } else { + generate_write_file_diff_lines(content) + } } _ => Vec::new(), } From 74fb460a7f15219e1fd0a1ad9d3bbc50f0c62361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Wed, 13 May 2026 17:38:29 +0200 Subject: [PATCH 4/5] ACP: avoid code duplication, fix duplicate final message --- crates/code_assistant/src/acp/agent.rs | 13 ++ crates/code_assistant/src/acp/ui.rs | 72 +++++- crates/code_assistant/src/app/acp.rs | 216 +++--------------- crates/code_assistant/src/session/manager.rs | 18 ++ crates/code_assistant/src/ui/gpui/elements.rs | 14 +- crates/code_assistant/src/ui/gpui/theme.rs | 16 +- .../src/ui/gpui/tool_block_renderers.rs | 13 +- 7 files changed, 145 insertions(+), 217 deletions(-) diff --git a/crates/code_assistant/src/acp/agent.rs b/crates/code_assistant/src/acp/agent.rs index f1a338f1..c3c9a368 100644 --- a/crates/code_assistant/src/acp/agent.rs +++ b/crates/code_assistant/src/acp/agent.rs @@ -885,6 +885,19 @@ impl acp::Agent for ACPAgentImpl { arguments.session_id.0 ); + // Advance the UI-sync baseline so the file-watcher debounce + // (which fires ~300ms later) won't replay content already + // streamed during this prompt. + { + let mut manager = session_manager.lock().await; + if let Err(e) = manager.advance_ui_sync_baseline(&arguments.session_id.0) { + tracing::warn!( + "ACP: Failed to advance UI sync baseline for {}: {e}", + arguments.session_id.0 + ); + } + } + // Mark session as disconnected and remove UI from active set { let mut manager = session_manager.lock().await; diff --git a/crates/code_assistant/src/acp/ui.rs b/crates/code_assistant/src/acp/ui.rs index 85b49eac..355ae591 100644 --- a/crates/code_assistant/src/acp/ui.rs +++ b/crates/code_assistant/src/acp/ui.rs @@ -708,6 +708,75 @@ impl UserInterface for ACPUserUI { ); } + UiEvent::AppendMessages { + messages, + tool_results, + } => { + // Cross-instance awareness: replay new messages that another + // code-assistant instance appended to the currently connected session. + use crate::ui::gpui::elements::MessageRole; + + for message_data in &messages { + match message_data.role { + MessageRole::User => { + for fragment in &message_data.fragments { + match fragment { + DisplayFragment::PlainText(text) => { + let content = acp::ContentBlock::Text( + acp::TextContent::new(text.clone()), + ); + let chunk = Self::content_chunk(content); + self.queue_session_update( + acp::SessionUpdate::UserMessageChunk(chunk), + ); + } + DisplayFragment::CompactionDivider { .. } => { + self.display_fragment(fragment)?; + } + _ => {} + } + } + } + MessageRole::Assistant => { + for fragment in &message_data.fragments { + self.display_fragment(fragment)?; + } + } + } + } + + // Replay tool results as ToolCallUpdate with final status + for tool_result in &tool_results { + let status = match tool_result.status { + crate::ui::ToolStatus::Success => acp::ToolCallStatus::Completed, + crate::ui::ToolStatus::Error => acp::ToolCallStatus::Failed, + _ => acp::ToolCallStatus::InProgress, + }; + + let output_content: Vec = tool_result + .output + .as_ref() + .map(|o| { + vec![acp::ToolCallContent::Content(acp::Content::new( + acp::ContentBlock::Text(acp::TextContent::new(o.clone())), + ))] + }) + .unwrap_or_default(); + + let mut update_fields = acp::ToolCallUpdateFields::new().status(status); + if !output_content.is_empty() { + update_fields = update_fields.content(output_content); + } + + let tool_call_update = acp::ToolCallUpdate::new( + acp::ToolCallId::new(tool_result.tool_id.clone()), + update_fields, + ); + + self.queue_session_update(acp::SessionUpdate::ToolCallUpdate(tool_call_update)); + } + } + // Events that don't translate to ACP UiEvent::SetMessages { .. } | UiEvent::DisplayCompactionSummary { .. } @@ -735,8 +804,7 @@ impl UserInterface for ACPUserUI { | UiEvent::RollbackStreaming { .. } | UiEvent::ShowTransientStatus { .. } | UiEvent::ClearTransientStatus - | UiEvent::RefreshCurrentSession { .. } - | UiEvent::AppendMessages { .. } => { + | UiEvent::RefreshCurrentSession { .. } => { // These are UI management events, not relevant for ACP // (RollbackStreaming: ACP cannot retract already-sent notifications) } diff --git a/crates/code_assistant/src/app/acp.rs b/crates/code_assistant/src/app/acp.rs index 1930184e..71f6040a 100644 --- a/crates/code_assistant/src/app/acp.rs +++ b/crates/code_assistant/src/app/acp.rs @@ -166,14 +166,13 @@ pub async fn run(verbose: bool, config: AgentRunConfig) -> Result<()> { .map_err(anyhow::Error::new) } -/// Background task that processes filesystem watcher events and replays -/// incremental session changes to the ACP client. +/// Background task that processes filesystem watcher events. /// /// When another code-assistant instance modifies the currently connected /// session's file on disk, this task: /// 1. Calls `refresh_session_incremental` to compute the diff -/// 2. Converts new messages/fragments to ACP `SessionNotification`s -/// 3. Sends them through the existing notification channel +/// 2. Routes the resulting `UiEvent`s through an `ACPUserUI` instance +/// (the same code path used for local agent streaming) async fn handle_watcher_events( event_rx: async_channel::Receiver, session_manager: Arc>, @@ -208,15 +207,30 @@ async fn handle_watcher_events( } }; - // Replay the resulting UI events as ACP notifications + if ui_events.is_empty() { + continue; + } + + // Get base_path for the replay UI + let base_path = { + let manager = session_manager.lock().await; + manager + .get_session(&session_id) + .and_then(|s| s.session.config.init_path.clone()) + }; + + // Route events through a temporary ACPUserUI — same conversion + // logic as local agent streaming (no duplication). + let replay_ui = ACPUserUI::new( + acp::SessionId::new(session_id.clone()), + session_update_tx.clone(), + base_path, + ); + for ui_event in ui_events { - replay_ui_event_to_acp( - &ui_event, - &session_id, - &session_manager, - &session_update_tx, - ) - .await; + if let Err(e) = replay_ui.send_event(ui_event).await { + warn!("ACP watcher: failed to send event for {session_id}: {e}"); + } } } @@ -228,194 +242,24 @@ async fn handle_watcher_events( "ACP watcher: UpdateSessionActivityState for {session_id}: {activity_state:?}" ); - // Update the state in the session manager + // Update the state in the session manager so that + // refresh_session_incremental sees RunningExternally and + // knows to emit content (rather than the early-return for + // locally running agents). let mut manager = session_manager.lock().await; if let Some(instance) = manager.get_session_mut(&session_id) { instance.set_activity_state(activity_state); } - // Note: ACP protocol doesn't have a direct "activity state" notification, - // but the client can observe that new content is streaming in. } UiEvent::RefreshChatList => { // In ACP mode the client manages its own session list via list_sessions(). - // We could potentially notify the client to re-fetch, but the protocol - // doesn't currently have a mechanism for this. debug!("ACP watcher: RefreshChatList (ignored, client uses list_sessions)"); } _ => { - // Other events are not expected from the watcher debug!("ACP watcher: unexpected event: {:?}", event); } } } } - -/// Convert a UiEvent from `refresh_session_incremental` into ACP session -/// notifications and send them to the client. -async fn replay_ui_event_to_acp( - event: &UiEvent, - session_id: &str, - session_manager: &Arc>, - session_update_tx: &mpsc::UnboundedSender<(acp::SessionNotification, oneshot::Sender<()>)>, -) { - match event { - UiEvent::AppendMessages { - messages, - tool_results, - } => { - let acp_session_id = acp::SessionId::new(session_id); - - // Get base_path for the session - let base_path = { - let manager = session_manager.lock().await; - manager - .get_session(session_id) - .and_then(|s| s.session.config.init_path.clone()) - }; - - // Create a temporary ACPUserUI to replay fragments through. - // This reuses the same fragment→ACP conversion logic used during - // streaming and load_session replay. - let replay_ui = Arc::new(ACPUserUI::new( - acp_session_id.clone(), - session_update_tx.clone(), - base_path, - )); - - // Replay message fragments - for message_data in messages { - use crate::ui::gpui::elements::MessageRole; - - match message_data.role { - MessageRole::User => { - // Emit user message fragments as UserMessageChunk - for fragment in &message_data.fragments { - match fragment { - crate::ui::DisplayFragment::PlainText(text) => { - let content = acp::ContentBlock::Text(acp::TextContent::new( - text.clone(), - )); - let chunk = ACPUserUI::content_chunk(content); - replay_ui.queue_session_update( - acp::SessionUpdate::UserMessageChunk(chunk), - ); - } - crate::ui::DisplayFragment::CompactionDivider { summary } => { - let content = acp::ContentBlock::Text(acp::TextContent::new( - format!("[Context compacted: {}]", summary), - )); - let chunk = ACPUserUI::content_chunk(content); - replay_ui.queue_session_update( - acp::SessionUpdate::AgentMessageChunk(chunk), - ); - } - _ => { - // Other fragment types in user messages are uncommon - } - } - } - } - MessageRole::Assistant => { - // Emit assistant message fragments through display_fragment - // which handles all the ACP conversion logic - for fragment in &message_data.fragments { - if let Err(e) = replay_ui.display_fragment(fragment) { - warn!( - "ACP watcher: failed to replay fragment for {session_id}: {e}" - ); - } - } - } - } - } - - // Replay tool results as ToolCallUpdate with final status - for tool_result in tool_results { - let status = match tool_result.status { - crate::ui::ToolStatus::Success => acp::ToolCallStatus::Completed, - crate::ui::ToolStatus::Error => acp::ToolCallStatus::Failed, - _ => acp::ToolCallStatus::InProgress, - }; - - let output_content: Vec = tool_result - .output - .as_ref() - .map(|o| { - vec![acp::ToolCallContent::Content(acp::Content::new( - acp::ContentBlock::Text(acp::TextContent::new(o.clone())), - ))] - }) - .unwrap_or_default(); - - let mut update_fields = acp::ToolCallUpdateFields::new().status(status); - if !output_content.is_empty() { - update_fields = update_fields.content(output_content); - } - - let tool_call_update = acp::ToolCallUpdate::new( - acp::ToolCallId::new(tool_result.tool_id.clone()), - update_fields, - ); - - replay_ui - .queue_session_update(acp::SessionUpdate::ToolCallUpdate(tool_call_update)); - } - } - - UiEvent::SetMessages { .. } => { - // A full reload is needed (paths diverged). In ACP mode we can't easily - // "clear and reload" the session from the agent side without a protocol-level - // mechanism. For now, log a warning. The client would need to call - // load_session again to get the full state. - warn!( - "ACP watcher: session {session_id} paths diverged — \ - full reload required but not yet supported in ACP mode" - ); - } - - UiEvent::UpdatePlan { plan } => { - use crate::types::{PlanItemPriority, PlanItemStatus}; - - // Forward plan updates to the ACP client - let acp_session_id = acp::SessionId::new(session_id); - if !plan.entries.is_empty() { - let acp_entries: Vec = plan - .entries - .iter() - .map(|item| { - let status = match item.status { - PlanItemStatus::Completed => acp::PlanEntryStatus::Completed, - PlanItemStatus::InProgress => acp::PlanEntryStatus::InProgress, - PlanItemStatus::Pending => acp::PlanEntryStatus::Pending, - }; - let priority = match item.priority { - PlanItemPriority::High => acp::PlanEntryPriority::High, - PlanItemPriority::Low => acp::PlanEntryPriority::Low, - PlanItemPriority::Medium => acp::PlanEntryPriority::Medium, - }; - acp::PlanEntry::new(item.content.clone(), priority, status) - }) - .collect(); - - let plan_update = acp::Plan::new(acp_entries); - let notification = acp::SessionNotification::new( - acp_session_id, - acp::SessionUpdate::Plan(plan_update), - ); - let (ack_tx, _) = oneshot::channel(); - if let Err(e) = session_update_tx.send((notification, ack_tx)) { - warn!("ACP watcher: failed to send plan update: {e}"); - } - } - } - - _ => { - debug!( - "ACP watcher: unhandled UI event during replay: {:?}", - std::mem::discriminant(event) - ); - } - } -} diff --git a/crates/code_assistant/src/session/manager.rs b/crates/code_assistant/src/session/manager.rs index d5621df9..ba72e72f 100644 --- a/crates/code_assistant/src/session/manager.rs +++ b/crates/code_assistant/src/session/manager.rs @@ -376,6 +376,24 @@ impl SessionManager { Ok(ui_events) } + /// Advance the UI-sync baseline to match the current on-disk state. + /// + /// Call this after the local agent finishes and its UI has already + /// streamed all content to the client. This ensures the subsequent + /// file-watcher debounce will find no diff and won't replay content + /// that was already sent via streaming. + pub fn advance_ui_sync_baseline(&mut self, session_id: &str) -> Result<()> { + let session_instance = self + .active_sessions + .get_mut(session_id) + .ok_or_else(|| anyhow::anyhow!("Session not found: {session_id}"))?; + + session_instance.reload_from_persistence(&self.persistence)?; + session_instance.last_ui_synced_path = session_instance.session.active_path.clone(); + session_instance.last_ui_synced_tool_count = session_instance.session.tool_executions.len(); + Ok(()) + } + /// Add a user message to a session and return the new node_id. /// This is used to add the message before displaying it in the UI, /// ensuring the node_id is available for the edit button. diff --git a/crates/code_assistant/src/ui/gpui/elements.rs b/crates/code_assistant/src/ui/gpui/elements.rs index 3d48a038..ef090373 100644 --- a/crates/code_assistant/src/ui/gpui/elements.rs +++ b/crates/code_assistant/src/ui/gpui/elements.rs @@ -1424,25 +1424,15 @@ impl BlockView { window: &mut gpui::Window, cx: &mut Context, ) -> gpui::Div { - use crate::ui::gpui::theme::colors; - let theme = cx.theme().clone(); // Icon let icon = file_icons::get().get_tool_icon(&block.name); let (icon_color, desc_color) = match block.status { ToolStatus::Error => (theme.danger, theme.danger), - ToolStatus::Running | ToolStatus::Pending => { - if self.is_generating { - (theme.muted_foreground, theme.muted_foreground) - } else { - ( - colors::tool_block_icon(&theme, &block.status), - theme.foreground, - ) - } + ToolStatus::Running | ToolStatus::Pending | ToolStatus::Success => { + (theme.muted_foreground, theme.muted_foreground) } - ToolStatus::Success => (theme.muted_foreground, theme.muted_foreground), }; // Description text diff --git a/crates/code_assistant/src/ui/gpui/theme.rs b/crates/code_assistant/src/ui/gpui/theme.rs index f5f75e29..50f46ddf 100644 --- a/crates/code_assistant/src/ui/gpui/theme.rs +++ b/crates/code_assistant/src/ui/gpui/theme.rs @@ -133,7 +133,7 @@ pub fn toggle_theme(window: Option<&mut gpui::Window>, cx: &mut App) -> ThemeMod /// Color utility functions for specific components pub mod colors { - use gpui::{black, rgba, white, Hsla}; + use gpui::{rgba, Hsla}; use gpui_component::theme::Theme; // Thinking block colors @@ -152,18 +152,4 @@ pub mod colors { rgba(0x0077CCFF).into() // Light mode blue chevron } } - - // Tool block colors - pub fn tool_block_icon(theme: &Theme, status: &crate::ui::ToolStatus) -> Hsla { - match status { - crate::ui::ToolStatus::Error => theme.warning, - _ => { - if theme.is_dark() { - white() - } else { - black() - } - } - } - } } diff --git a/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs b/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs index 0d46367f..968c44e1 100644 --- a/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs +++ b/crates/code_assistant/src/ui/gpui/tool_block_renderers.rs @@ -205,6 +205,8 @@ struct DescribeTemplate { /// Format string with `{param}` placeholders. The renderer substitutes /// the first matching parameter value found in the tool block. template: &'static str, + /// Fallback text shown before parameters have been resolved. + fallback: &'static str, } /// Inline renderer for exploration / read-only tools. @@ -219,30 +221,37 @@ impl InlineToolRenderer { DescribeTemplate { tool_name: "list_files", template: "List {paths}", + fallback: "List files", }, DescribeTemplate { tool_name: "glob_files", template: "Glob {pattern}", + fallback: "Glob files", }, DescribeTemplate { tool_name: "web_search", template: "Search web for \"{query}\"", + fallback: "Search web", }, DescribeTemplate { tool_name: "web_fetch", template: "Fetch {url}", + fallback: "Fetch URL", }, DescribeTemplate { tool_name: "perplexity_ask", template: "Ask Perplexity", + fallback: "Ask Perplexity", }, DescribeTemplate { tool_name: "view_images", template: "View {paths}", + fallback: "View images", }, DescribeTemplate { tool_name: "view_documents", template: "View {paths}", + fallback: "View documents", }, ]; @@ -285,9 +294,9 @@ impl ToolBlockRenderer for InlineToolRenderer { if let Some(tmpl) = self.templates.iter().find(|t| t.tool_name == tool.name) { let desc = Self::resolve_template(tmpl.template, tool); // If the template still has unresolved placeholders (params not yet - // streamed), show a friendlier fallback. + // streamed), show the stable fallback text. if desc.contains('{') { - tool.name.replace('_', " ") + tmpl.fallback.to_string() } else { desc } From 270a6d2b22ad8823b6eb3b5df9423a12bd2bdcfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sat, 16 May 2026 18:20:27 +0200 Subject: [PATCH 5/5] Move document_search.rs into fs_explorer crate --- Cargo.lock | 2 ++ crates/code_assistant/Cargo.toml | 2 +- crates/code_assistant/src/tools/impls/mod.rs | 2 -- .../src/tools/impls/search_files.rs | 23 ++++--------------- crates/fs_explorer/Cargo.toml | 6 +++++ .../src}/document_search.rs | 10 ++++---- crates/fs_explorer/src/lib.rs | 2 ++ crates/fs_explorer/src/types.rs | 15 ++++++++++++ 8 files changed, 36 insertions(+), 26 deletions(-) rename crates/{code_assistant/src/tools/impls => fs_explorer/src}/document_search.rs (96%) diff --git a/Cargo.lock b/Cargo.lock index 3773bcd6..02a2baab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3003,6 +3003,8 @@ dependencies = [ "tempfile", "tokio", "tracing", + "transmutation", + "walkdir", ] [[package]] diff --git a/crates/code_assistant/Cargo.toml b/crates/code_assistant/Cargo.toml index bd3d462d..f91ae52e 100644 --- a/crates/code_assistant/Cargo.toml +++ b/crates/code_assistant/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [features] default = ["document-conversion"] -document-conversion = ["transmutation"] +document-conversion = ["transmutation", "fs_explorer/document-conversion"] [dependencies] command_executor = { path = "../command_executor" } diff --git a/crates/code_assistant/src/tools/impls/mod.rs b/crates/code_assistant/src/tools/impls/mod.rs index 84ac4036..127828e4 100644 --- a/crates/code_assistant/src/tools/impls/mod.rs +++ b/crates/code_assistant/src/tools/impls/mod.rs @@ -1,7 +1,5 @@ // Tool implementations pub mod delete_files; -#[cfg(feature = "document-conversion")] -pub mod document_search; pub mod edit; pub mod execute_command; pub mod glob_files; diff --git a/crates/code_assistant/src/tools/impls/search_files.rs b/crates/code_assistant/src/tools/impls/search_files.rs index ad0473b5..7efb85ec 100644 --- a/crates/code_assistant/src/tools/impls/search_files.rs +++ b/crates/code_assistant/src/tools/impls/search_files.rs @@ -2,7 +2,7 @@ use crate::tools::core::{ Render, ResourcesTracker, Tool, ToolContext, ToolResult, ToolScope, ToolSpec, }; use anyhow::{anyhow, Result}; -use fs_explorer::{SearchMode, SearchOptions, SearchResult}; +use fs_explorer::{DocumentMatchResult, SearchMode, SearchOptions, SearchResult}; use serde::{Deserialize, Serialize}; use serde_json::json; use std::collections::HashMap; @@ -35,21 +35,6 @@ pub struct SearchFilesOutput { pub document_results: Vec, } -/// A match found inside a document file (PDF, DOCX, etc.). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DocumentMatchResult { - /// Path to the document file (relative to project root). - pub file: String, - /// Document format (e.g. "PDF", "DOCX"). - pub format: String, - /// Page number where the match was found (1-indexed). - pub page: usize, - /// A text excerpt around the match. - pub excerpt: String, - /// Number of matches on this page. - pub match_count: usize, -} - impl SearchFilesOutput { fn format_line_with_excerpt( line: &str, @@ -273,7 +258,9 @@ impl Render for SearchFilesOutput { // Append document search results if any if !self.document_results.is_empty() { - formatted.push_str("\n--- Document matches ---\n\n"); + formatted.push_str( + "\n--- Document matches (use view_documents to read full content) ---\n\n", + ); for doc_result in &self.document_results { formatted.push_str(&format!( ">>>>> DOCUMENT MATCH: {} ({}, page {})\n", @@ -550,7 +537,7 @@ impl SearchFilesTool { regex_pattern: &str, paths: Option<&[String]>, ) -> Vec { - super::document_search::search_in_documents(root_dir, regex_pattern, paths).await + fs_explorer::document_search::search_in_documents(root_dir, regex_pattern, paths).await } #[cfg(not(feature = "document-conversion"))] diff --git a/crates/fs_explorer/Cargo.toml b/crates/fs_explorer/Cargo.toml index f2d61042..1cb531c1 100644 --- a/crates/fs_explorer/Cargo.toml +++ b/crates/fs_explorer/Cargo.toml @@ -3,6 +3,10 @@ name = "fs_explorer" version = "0.1.0" edition = "2024" +[features] +default = [] +document-conversion = ["transmutation"] + [dependencies] anyhow = "1.0.95" async-trait = "0.1.85" @@ -15,6 +19,8 @@ regex = "1.11.1" serde = { version = "1.0.215", features = ["derive"] } tracing = "0.1.40" path-clean = "1.0.1" +walkdir = "2.5" +transmutation = { version = "0.3", features = ["office"], optional = true } [dev-dependencies] tempfile = "3.13.0" diff --git a/crates/code_assistant/src/tools/impls/document_search.rs b/crates/fs_explorer/src/document_search.rs similarity index 96% rename from crates/code_assistant/src/tools/impls/document_search.rs rename to crates/fs_explorer/src/document_search.rs index 57e142ce..694ff332 100644 --- a/crates/code_assistant/src/tools/impls/document_search.rs +++ b/crates/fs_explorer/src/document_search.rs @@ -5,7 +5,7 @@ //! converts them to Markdown page-by-page using `transmutation`, and searches the //! resulting text with the user's regex pattern. Matches are reported with page numbers. -use super::search_files::DocumentMatchResult; +use crate::types::DocumentMatchResult; use regex::RegexBuilder; use std::path::{Path, PathBuf}; use transmutation::{Converter, OutputFormat}; @@ -79,10 +79,10 @@ pub async fn search_in_documents( } // Skip large files - if let Ok(metadata) = path.metadata() { - if metadata.len() > MAX_SEARCH_DOCUMENT_SIZE { - continue; - } + if let Ok(metadata) = path.metadata() + && metadata.len() > MAX_SEARCH_DOCUMENT_SIZE + { + continue; } document_files.push(path.to_path_buf()); diff --git a/crates/fs_explorer/src/lib.rs b/crates/fs_explorer/src/lib.rs index bb9da3a4..987d8fad 100644 --- a/crates/fs_explorer/src/lib.rs +++ b/crates/fs_explorer/src/lib.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "document-conversion")] +pub mod document_search; pub mod encoding; mod explorer; pub mod file_updater; diff --git a/crates/fs_explorer/src/types.rs b/crates/fs_explorer/src/types.rs index ea22b702..9f4e0457 100644 --- a/crates/fs_explorer/src/types.rs +++ b/crates/fs_explorer/src/types.rs @@ -91,6 +91,21 @@ pub struct SearchResult { pub match_ranges: Vec>, } +/// A match found inside a document file (PDF, DOCX, etc.). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DocumentMatchResult { + /// Path to the document file (relative to project root). + pub file: String, + /// Document format (e.g. "PDF", "DOCX"). + pub format: String, + /// Page number where the match was found (1-indexed). + pub page: usize, + /// A text excerpt around the match. + pub excerpt: String, + /// Number of matches on this page. + pub match_count: usize, +} + #[async_trait::async_trait] pub trait CodeExplorer: Send + Sync { fn root_dir(&self) -> PathBuf;