neurosynq · AdrianCurtin · Jun 6, 2026 · Jun 5, 2026
diff --git a/.gitignore b/.gitignore
@@ -49,6 +49,8 @@ logs
 !docs/mongodb_index_optimization_guide.md
 !docs/atlas_vector_search_guide.md
 !docs/usage_guide.md
+!docs/webhooks_guide.md
 !SECURITY.md
 !docs/client_sdk_guide.md
 !docs/acl_clp_guide.md
+!examples/README.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/Gemfile b/Gemfile
@@ -32,5 +32,12 @@ group :test, :development do
   gem "puma"
   gem "sinatra"
   gem "rack-test"
+  # MFA / TOTP test infrastructure (Parse::MFA, two_factor_auth).
+  # rotp:    generates TOTP secrets and time-based codes so the MFA unit and
+  #          integration tests can enroll and log in against Parse Server's
+  #          TOTP adapter (SHA1 / 6 digits / 30s — rotp's defaults match).
+  # rqrcode: renders the provisioning QR code exercised by Parse::MFA.qr_code.
+  gem "rotp"
+  gem "rqrcode"
   # gem "thin" # for yard server - disabled due to eventmachine compilation issues
 end
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    parse-stack-next (5.3.0)
+    parse-stack-next (5.4.0)
       activemodel (>= 6.1, < 9)
       activesupport (>= 6.1, < 9)
       connection_pool (>= 2.2, < 4)
@@ -39,6 +39,7 @@ GEM
     bundler-audit (0.9.3)
       bundler (>= 1.2.0)
       thor (~> 1.0)
+    chunky_png (1.4.0)
     coderay (1.1.3)
     concurrent-ruby (1.3.6)
     connection_pool (3.0.2)
@@ -54,7 +55,7 @@ GEM
       faraday-net_http (>= 2.0, < 3.5)
       json
       logger
-    faraday-net_http (3.4.3)
+    faraday-net_http (3.4.4)
       net-http (~> 0.5)
     faraday-net_http_persistent (2.3.1)
       faraday (~> 2.5)
@@ -72,7 +73,7 @@ GEM
       prism (>= 1.3.0)
       rdoc (>= 4.0.0)
       reline (>= 0.4.2)
-    json (2.19.5)
+    json (2.19.8)
     logger (1.7.0)
     method_source (1.1.0)
     minitest (6.0.6)
@@ -104,7 +105,7 @@ GEM
       coderay (~> 1.1)
       method_source (~> 1.0)
       reline (>= 0.6.0)
-    psych (5.3.1)
+    psych (5.4.0)
       date
       stringio
     puma (8.0.2)
@@ -133,6 +134,11 @@ GEM
       connection_pool
     reline (0.6.3)
       io-console (~> 0.5)
+    rotp (6.3.0)
+    rqrcode (3.2.0)
+      chunky_png (~> 1.0)
+      rqrcode_core (~> 2.0)
+    rqrcode_core (2.1.0)
     ruby-progressbar (1.13.0)
     rufo (0.18.2)
     securerandom (0.4.1)
@@ -181,6 +187,8 @@ DEPENDENCIES
   rake
   redcarpet
   redis
+  rotp
+  rqrcode
   rufo
   sinatra
   webrick

diff --git a/README.md b/README.md
diff --git a/Rakefile b/Rakefile
@@ -77,12 +77,57 @@ def client_console_token!
       pwd = $stdin.gets.to_s
     end
   end
-  u = Parse::User.login(user, pwd.chomp)
+  u = console_login_with_optional_mfa(user, pwd.chomp)
   abort "[client:console] login failed for #{user.inspect}" if u.nil? || u.session_token.to_s.empty?
   puts "Logged in as #{u.username} (#{u.id})."
   u.session_token
 end
 
+# Log `user` in, transparently handling an MFA-enrolled account. If the server
+# reports that additional MFA auth is required, prompt for a TOTP / recovery
+# code (or read +PARSE_LOGIN_MFA+ for non-interactive use) and retry via
+# {Parse::User.login_with_mfa}. Returns a logged-in {Parse::User}, or nil when
+# the credentials themselves are rejected (so the caller's "login failed" abort
+# still fires for a bad password).
+def console_login_with_optional_mfa(user, pwd)
+  # Parse Server signals "this account needs an MFA token" two ways depending on
+  # the error code path: a returned error response ("Missing additional
+  # authData ...") or a raised Parse::Error for the OTHER_CAUSE (code <= 100)
+  # variant. Treat both as "prompt for MFA"; anything else is a real credential
+  # failure and must NOT trigger an MFA prompt.
+  mfa_indicator = /additional\s+authData|missing.*mfa|\bMFA\b/i
+  begin
+    response = Parse.client.login(user, pwd)
+    if response.success?
+      return Parse::User.with_authdata_trust { Parse::User.build(response.result) }
+    end
+    return nil unless response.error.to_s.match?(mfa_indicator)
+  rescue Parse::Error, Parse::Client::ResponseError => e
+    raise unless e.message.to_s.match?(mfa_indicator)
+  end
+
+  token = ENV["PARSE_LOGIN_MFA"].to_s.strip
+  if token.empty?
+    print "MFA token (authenticator code or recovery code): "
+    token = $stdin.gets.to_s.strip
+  end
+  abort "[client:console] MFA token required for #{user.inspect}" if token.empty?
+
+  # A wrong/expired token can surface either as Parse::MFA::VerificationError or,
+  # depending on the server error code path, as a generic Parse::Error (e.g.
+  # ServiceUnavailableError for the OTHER_CAUSE code) or a nil return. Since a
+  # token was supplied here, treat any failure as an MFA verification failure
+  # and abort cleanly rather than letting an unhandled exception escape.
+  result =
+    begin
+      Parse::User.login_with_mfa(user, pwd, token)
+    rescue Parse::MFA::VerificationError, Parse::Error => e
+      abort "[client:console] MFA verification failed for #{user.inspect}: #{e.message}"
+    end
+  abort "[client:console] MFA verification failed for #{user.inspect}" if result.nil?
+  result
+end
+
 # Default test task runs all tests with Docker enabled.
 #
 # `*disruptive*` tests are EXCLUDED here: they stop/restart the shared
@@ -131,7 +176,11 @@ def run_test_files!(label, files, log:)
     puts "[#{n}/#{total}] #{file}"
     puts "=" * 80
     t0 = Time.now
-    ok = system("PARSE_TEST_USE_DOCKER=true ruby -Ilib:test #{file}")
+    # Always go through `bundle exec` so the locked gem versions win. With a
+    # bare `ruby`, RubyGems activates the newest installed minitest (6.0.x),
+    # which dropped the bundled `minitest/mock`; the standalone `minitest-mock`
+    # gem then can't co-activate and `test_helper.rb` fails to load every file.
+    ok = system("PARSE_TEST_USE_DOCKER=true bundle exec ruby -Ilib:test #{file}")
     dt = Time.now - t0
     results << [file, ok, dt]
     summary = format("[%d/%d] %-4s %7.1fs  %s", n, total, ok ? "PASS" : "FAIL", dt, file)
@@ -203,7 +252,7 @@ namespace :test do
         puts "=" * 80
         # Each file runs in its own process so a server outage in one cannot
         # bleed into the next.
-        system("PARSE_TEST_USE_DOCKER=true ruby -Ilib:test #{file}") || begin
+        system("PARSE_TEST_USE_DOCKER=true bundle exec ruby -Ilib:test #{file}") || begin
           # A disruptive test may have left the server down on failure; bring
           # it back so a follow-up run / other tasks start from a clean state.
           system("docker start #{ENV["PSNEXT_PREFIX"] || "psnext-it"}-server", out: IO::NULL, err: IO::NULL)

diff --git a/docs/atlas_vector_search_guide.md b/docs/atlas_vector_search_guide.md
@@ -372,6 +372,10 @@ embed-time chunking), use one of these patterns:
 
 ## Retrieval (RAG)
 
+> For an end-to-end runnable script — managed `embed`, `agent_searchable`,
+> `semantic_search`, and an OpenAI/Anthropic generation add-in — see
+> [`examples/rag_chatbot.rb`](../examples/rag_chatbot.rb).
+
 `Parse::Retrieval` (`Parse::RAG` is an alias) sits on top of
 `find_similar`. `Parse::Retrieval.retrieve` embeds a natural-language
 query, runs Atlas `$vectorSearch` through `find_similar` (so ACL/CLP are
@@ -395,8 +399,88 @@ chunks = Parse::Retrieval.retrieve(
 # => Array<Parse::Retrieval::Chunk> — { id, score, content, source, metadata }
 ```
 
-`rerank:` and `hybrid:` are reserved on the signature and raise
-`NotImplementedError` if supplied.
+`retrieve` also accepts `hybrid:` (fuse a lexical branch with the vector
+branch — see [Hybrid search](#hybrid-search-vector--lexical) below) and
+`rerank:` (reorder retrieved documents with a cross-encoder before
+chunking — see [Reranking](#reranking)). Both were reserved in earlier
+releases and now ship in 5.4.0.
+
+### Hybrid search (vector + lexical)
+
+`Class.hybrid_search` runs a lexical Atlas Search (`$search`) branch and a
+`$vectorSearch` branch as **two independent aggregations**, then fuses
+their ranked results with reciprocal-rank fusion (RRF). Two aggregations
+(not a single `$facet`) is mandatory: `$vectorSearch` is prohibited inside
+`$facet` / `$lookup` / `$unionWith` and must be stage 0 of its pipeline.
+Each branch enforces ACL/CLP/`protectedFields` independently before
+fusion (via `Parse::AtlasSearch.search` and `Parse::VectorSearch.search`),
+so the fused rows are already access-filtered — there is no separate
+hydration fetch.
+
+```ruby
+hits = Article.hybrid_search(
+  text:    "how do I reset my password",   # embedded for the vector branch;
+                                            # also the default lexical query
+  lexical: { index: "article_search", fields: %w[title body] },
+  vector:  { index: "article_embedding_idx", num_candidates: 200 },
+  k:       20,
+  fusion:  { k_constant: 60, weights: { lexical: 0.4, vector: 0.6 } },
+  session_token: user.session_token,        # ACL scope, applied to BOTH branches
+)
+# => Array<Parse::Object>; each carries #hybrid_score, #hybrid_ranks,
+#    and #vector_score / #search_score when that branch contributed.
+```
+
+**RRF math.** `fused_score(d) = Σ_b weight_b / (k_constant + rank_b(d))`,
+where `rank_b(d)` is the document's 1-based rank in branch `b`. A larger
+`k_constant` (default 60) flattens the contribution curve. `weights`
+defaults to 1.0 per branch. `Parse::VectorSearch::Hybrid.rrf` exposes the
+pure fusion if you want to fuse pre-fetched ranked lists yourself.
+
+**Native `$rankFusion` (Atlas 8.0+).**
+`Parse::VectorSearch::Hybrid.rank_fusion_supported?(collection)` detects
+the native server-side fusion stage via a cached behavioural probe (1-hour
+TTL — not version-string parsing). Native execution is **opt-in**
+(`fusion: { method: :rrf_native }`) and falls back to the client-side path
+when the cluster does not support it; the default `:rrf` always fuses
+client-side, which is the fully-enforced, deterministic path. `$rankFusion`
+is admitted to `PipelineSecurity::ALLOWED_STAGES` for the native path.
+
+`Parse::Retrieval.retrieve(hybrid: true, ...)` routes through
+`hybrid_search` and chunks the fused results; pass `hybrid: { lexical:,
+vector:, fusion: }` to configure the branches. Tenant scope is folded into
+**both** branches (the vector Atlas pre-filter and the lexical
+post-`$search` `$match`) so neither leaks cross-tenant document existence.
+
+### Reranking
+
+A reranker reorders retrieved documents by a cross-encoder relevance score
+**before** chunking. Pass any object answering
+`#rerank(query:, documents:, top_n:)` — typically a
+`Parse::Retrieval::Reranker::Base` subclass:
+
+```ruby
+reranker = Parse::Retrieval::Reranker::Cohere.new(
+  api_key: ENV.fetch("COHERE_API_KEY"), model: "rerank-v3.5",
+)
+chunks = Parse::Retrieval.retrieve(
+  query: "reset my password", klass: Article, k: 30,
+  rerank: reranker, rerank_top_n: 5,    # keep the 5 most relevant docs
+)
+# Reranked chunks' score is the cross-encoder relevance_score.
+```
+
+`Reranker::Fixture` is a deterministic, zero-network reranker (lexical
+token overlap) for tests. The `Reranker::Base` protocol validates inputs,
+bounds `top_n`, rejects out-of-range indices, and sorts descending —
+adapters implement only the network call (`#rerank_scores`).
+
+> **Spend cap.** The `semantic_search` agent tool charges the estimated
+> query-embedding tokens against the caller's tenant budget via
+> `Parse::Embeddings::SpendCap` (opt-in; `configure(limit_tokens:,
+> window:)`). A breach hard-refuses (surfaced to the agent as a
+> rate-limited tool error). Admin agents are exempt; direct
+> `find_similar` / `retrieve` callers are not metered.
 
 ### Chunkers
 

diff --git a/docs/client_sdk_guide.md b/docs/client_sdk_guide.md
@@ -11,6 +11,11 @@ go over REST, and authorization is carried by the user's `sessionToken`.
 Every claim below is locked in by the integration tests under
 `test/lib/parse/client_*_integration_test.rb`.
 
+For a runnable starting point, see
+[`examples/basic_client.rb`](../examples/basic_client.rb) (a no-master client
+with a row-level ACL-enforcement demo) and its master-key counterpart
+[`examples/basic_server.rb`](../examples/basic_server.rb).
+
 ---
 
 ## Why a separate guide?

diff --git a/docs/mcp_guide.md b/docs/mcp_guide.md
@@ -7,7 +7,7 @@ The Model Context Protocol (MCP) is a standardized JSON-RPC 2.0-based interface
 Three deployment modes are available:
 
 - **Standalone HTTP server (`MCPServer`)** — a WEBrick process for dedicated MCP deployments.
-- **Rack-mountable adapter (`MCPRackApp`)** — embeds inside an existing Sinatra or Rails application.
+- **Rack-mountable adapter (`MCPRackApp`)** — embeds inside an existing Sinatra or Rails application. This is the primary deployment for the MCP 2025-06-18 Streamable HTTP transport; enable it with `transport: :streamable_http` (see [Streamable HTTP transport](#streamable-http-transport-primary)).
 - **Direct in-process dispatcher (`MCPDispatcher`)** — a pure function for in-process usage, custom transports, and testing.
 
 ---
@@ -191,6 +191,42 @@ map("/mcp") { run mcp_app }
 map("/")    { run ->(env) { [200, {"Content-Type" => "text/plain"}, ["ok"]] } }
 ```
 
+#### Streamable HTTP transport (primary)
+
+The MCP 2025-06-18 **Streamable HTTP** transport is the recommended transport for `MCPRackApp`. It is a single connection model in which the client `POST`s JSON-RPC requests (receiving either a buffered JSON reply or, with `Accept: text/event-stream`, a streamed SSE reply) and holds open a long-lived `GET` request to receive server-initiated notifications. Session termination is signalled with `DELETE` carrying the `Mcp-Session-Id`.
+
+Enable the whole transport with one switch:
+
+```ruby
+mcp_app = Parse::Agent.rack_app(transport: :streamable_http) do |env|
+  # ... auth factory ...
+end
+```
+
+`transport: :streamable_http` is exactly equivalent to `streaming: true, notifications: true` — it turns on POST→SSE streaming and the server→client `GET /` notification stream together. Add `resource_subscriptions: true` alongside it to upgrade the server→client bus from the plain notification posture to the LiveQuery-backed resource-subscription posture:
+
+```ruby
+mcp_app = Parse::Agent.rack_app(
+  transport: :streamable_http,
+  resource_subscriptions: true,   # optional: bridge LiveQuery resource updates
+) do |env|
+  # ...
+end
+```
+
+`transport:` is a closed enum:
+
+| Value | Effect |
+|-------|--------|
+| `:streamable_http` | Full Streamable HTTP transport (`streaming: true` + `notifications: true`). |
+| `:legacy` / `nil` (default) | Historical behavior: buffered JSON responses, no server→client stream. The standalone SSE/JSON path below remains a supported fallback. |
+
+Passing `transport: :streamable_http` together with an explicit `streaming:` or `notifications:` raises `ArgumentError` (the switch already owns those toggles); any value other than the two above also raises. The default is unchanged, so an existing `Parse::Agent.rack_app { ... }` keeps its non-streaming JSON behavior until you opt in.
+
+**WEBrick cannot deliver Streamable HTTP.** The switch — like `streaming:` — has no effect under the WEBrick-backed standalone `MCPServer`, which buffers responses and cannot hold the `GET` stream open. Use Puma, Falcon, or Unicorn for a real Streamable HTTP deployment.
+
+The remaining subsections document the individual toggles `transport: :streamable_http` consolidates, for operators who need finer control or are reading older configurations.
+
 #### MCP progress notifications via SSE (opt-in)
 
 **WEBrick cannot stream.** The standalone `MCPServer` is WEBrick-based and buffers the full response before sending. Setting `streaming: true` on an `MCPRackApp` mounted under WEBrick silently degrades to a single buffered response with concatenated SSE events. SSE streaming requires a Rack server that supports streaming response bodies — **Puma, Falcon, or Unicorn**. Verify your deployment uses one of these before relying on `streaming: true`.
@@ -537,10 +573,29 @@ Parse Server version and its `masterKeyIps` configuration.)
   soft cap *equal to* `max_concurrent_dispatchers`. So the effective steady-state
   ceiling across both surfaces is up to **2× `max_concurrent_dispatchers`** (up
   to N request-scoped SSE dispatchers plus N listening streams). Size the value
-  with that 2× factor in mind (e.g. relative to your Puma `max_threads`). Leaving
-  it unset (the default `nil`) leaves both surfaces uncapped; the app logs a
+  with that 2× factor in mind (e.g. relative to your Puma `max_threads`).
+  `max_concurrent_dispatchers:` defaults to a finite **100**
+  (`Parse::Agent::MCPRackApp::DEFAULT_MAX_CONCURRENT_DISPATCHERS`), so a
+  streaming surface is bounded out of the box — once the cap is reached a new
+  SSE request or listening stream is refused with a `503` JSON-RPC `-32000`
+  ("server busy"). Pass an explicit positive integer to resize it, or
+  `max_concurrent_dispatchers: nil` to knowingly run uncapped (the app logs a
   one-time warning at construction when a streaming or subscription/notification
-  surface is enabled without a cap.
+  surface is enabled with `nil`). A non-positive or non-integer value raises
+  `ArgumentError`.
+- **Client disconnect mid-tool-call.** When a client drops the connection while
+  a tool is still running, the SSE worker is torn down and the dispatcher's
+  cancellation token is tripped, so a cooperative tool (one that checks
+  `agent.cancelled?` at a checkpoint) exits promptly. A tool blocked inside a
+  Mongo/REST roundtrip cannot observe the token, but its slot is reclaimed when
+  the per-tool `Timeout` or the clean MongoDB `socket_timeout` (10s) / REST
+  `timeout` (30s) deadline fires — through the driver's clean error path. The
+  orphaned dispatcher is **intentionally not force-killed**: a `Thread#kill`
+  would bypass the driver's connection-invalidation and could return a half-used
+  pooled connection to a later request. To observe how often disconnects abandon
+  in-flight work, watch the cumulative
+  `Parse::Agent::MCPRackApp.abandoned_dispatcher_count` or subscribe to the
+  `parse.agent.mcp_dispatcher_abandoned` `ActiveSupport::Notifications` event.
 
 ### Listening-stream ownership