From 7ec50b8fce3c372b5adebadb2dd8deec34548427 Mon Sep 17 00:00:00 2001
From: Ashutosh Tripathi <zero8@z3R0-8.local>
Date: Tue, 10 Feb 2026 15:12:54 +0530
Subject: [PATCH 1/5] Add memory and ollama modules

Add memory.ts (conversation storage, search, embedding, recall) and
ollama.ts (LLM client for summarization and synthesis) to enable
Smriti to import QMD as a proper package dependency.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/memory.ts | 848 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/ollama.ts | 169 ++++++++++
 2 files changed, 1017 insertions(+)
 create mode 100644 src/memory.ts
 create mode 100644 src/ollama.ts

diff --git a/src/memory.ts b/src/memory.ts
new file mode 100644
index 00000000..46a7ff2e
--- /dev/null
+++ b/src/memory.ts
@@ -0,0 +1,848 @@
+/**
+ * memory.ts - Conversation memory storage & retrieval for QMD
+ *
+ * Stores conversation messages in sessions, provides FTS5 + vector search,
+ * summarization via Ollama, and memory recall for LLM context.
+ *
+ * Reuses QMD's existing infrastructure:
+ * - content_vectors + vectors_vec tables for embeddings
+ * - hashContent() for content-addressable storage
+ * - chunkDocumentByTokens() for chunking
+ * - insertEmbedding() for vector storage
+ * - BM25 normalization pattern from searchFTS
+ * - Two-step vector search pattern from searchVec
+ * - reciprocalRankFusion() for combining results
+ */
+
+import { Database } from "bun:sqlite";
+import {
+  hashContent,
+  chunkDocumentByTokens,
+  insertEmbedding,
+  reciprocalRankFusion,
+  getDocid,
+  type RankedResult,
+} from "./store.js";
+import {
+  getDefaultLlamaCpp,
+  formatQueryForEmbedding,
+  formatDocForEmbedding,
+} from "./llm.js";
+import { ollamaSummarize, ollamaRecall as ollamaRecallSynthesize } from "./ollama.js";
+
+// =============================================================================
+// Types
+// =============================================================================
+
+export type MemorySession = {
+  id: string;
+  title: string;
+  created_at: string;
+  updated_at: string;
+  summary: string | null;
+  summary_at: string | null;
+  active: number;
+};
+
+export type MemoryMessage = {
+  id: number;
+  session_id: string;
+  role: string;
+  content: string;
+  hash: string;
+  created_at: string;
+  metadata: Record<string, unknown> | null;
+};
+
+export type MemorySearchResult = {
+  session_id: string;
+  session_title: string;
+  message_id: number;
+  role: string;
+  content: string;
+  score: number;
+  source: "fts" | "vec";
+};
+
+// =============================================================================
+// Schema Initialization
+// =============================================================================
+
+/**
+ * Create memory tables, indexes, triggers in the QMD database.
+ * Safe to call multiple times (uses IF NOT EXISTS).
+ */
+export function initializeMemoryTables(db: Database): void {
+  // Sessions table
+  db.exec(`
+    CREATE TABLE IF NOT EXISTS memory_sessions (
+      id TEXT PRIMARY KEY,
+      title TEXT NOT NULL DEFAULT '',
+      created_at TEXT NOT NULL,
+      updated_at TEXT NOT NULL,
+      summary TEXT,
+      summary_at TEXT,
+      active INTEGER NOT NULL DEFAULT 1
+    )
+  `);
+
+  // Messages table
+  db.exec(`
+    CREATE TABLE IF NOT EXISTS memory_messages (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      session_id TEXT NOT NULL,
+      role TEXT NOT NULL,
+      content TEXT NOT NULL,
+      hash TEXT NOT NULL,
+      created_at TEXT NOT NULL,
+      metadata TEXT,
+      FOREIGN KEY (session_id) REFERENCES memory_sessions(id) ON DELETE CASCADE
+    )
+  `);
+
+  db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_session ON memory_messages(session_id)`);
+  db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_hash ON memory_messages(hash)`);
+
+  // FTS5 for memory search
+  db.exec(`
+    CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5(
+      session_title, role, content,
+      tokenize='porter unicode61'
+    )
+  `);
+
+  // Triggers to sync memory_fts
+  db.exec(`
+    CREATE TRIGGER IF NOT EXISTS memory_messages_ai AFTER INSERT ON memory_messages
+    BEGIN
+      INSERT INTO memory_fts(rowid, session_title, role, content)
+      SELECT
+        new.id,
+        (SELECT title FROM memory_sessions WHERE id = new.session_id),
+        new.role,
+        new.content;
+    END
+  `);
+
+  db.exec(`
+    CREATE TRIGGER IF NOT EXISTS memory_messages_ad AFTER DELETE ON memory_messages
+    BEGIN
+      DELETE FROM memory_fts WHERE rowid = old.id;
+    END
+  `);
+}
+
+// =============================================================================
+// FTS5 Query Building (same pattern as store.ts buildFTS5Query)
+// =============================================================================
+
+function sanitizeMemoryFTSTerm(term: string): string {
+  return term.replace(/[^\p{L}\p{N}']/gu, "").toLowerCase();
+}
+
+function buildMemoryFTS5Query(query: string): string | null {
+  const terms = query
+    .split(/\s+/)
+    .map((t) => sanitizeMemoryFTSTerm(t))
+    .filter((t) => t.length > 0);
+  if (terms.length === 0) return null;
+  if (terms.length === 1) return `"${terms[0]}"*`;
+  return terms.map((t) => `"${t}"*`).join(" AND ");
+}
+
+// =============================================================================
+// Session CRUD
+// =============================================================================
+
+/**
+ * Create a new memory session. If id is "new", generates a random ID.
+ */
+export function createSession(
+  db: Database,
+  id: string,
+  title: string = ""
+): MemorySession {
+  const now = new Date().toISOString();
+  const sessionId = id === "new" ? crypto.randomUUID().slice(0, 8) : id;
+
+  db.prepare(
+    `INSERT INTO memory_sessions (id, title, created_at, updated_at, active) VALUES (?, ?, ?, ?, 1)`
+  ).run(sessionId, title, now, now);
+
+  return {
+    id: sessionId,
+    title,
+    created_at: now,
+    updated_at: now,
+    summary: null,
+    summary_at: null,
+    active: 1,
+  };
+}
+
+/**
+ * Get a session by ID.
+ */
+export function getSession(db: Database, id: string): MemorySession | null {
+  return (
+    (db
+      .prepare(`SELECT * FROM memory_sessions WHERE id = ?`)
+      .get(id) as MemorySession | null) || null
+  );
+}
+
+/**
+ * List sessions, most recent first.
+ */
+export function listSessions(
+  db: Database,
+  options: { limit?: number; includeInactive?: boolean } = {}
+): MemorySession[] {
+  const limit = options.limit ?? 20;
+  const where = options.includeInactive ? "" : "WHERE active = 1";
+  return db
+    .prepare(
+      `SELECT * FROM memory_sessions ${where} ORDER BY updated_at DESC LIMIT ?`
+    )
+    .all(limit) as MemorySession[];
+}
+
+/**
+ * Soft-delete a session (set active = 0). If hard = true, permanently delete.
+ */
+export function deleteSession(
+  db: Database,
+  id: string,
+  hard: boolean = false
+): void {
+  if (hard) {
+    // Delete messages first (CASCADE should handle, but be explicit)
+    db.prepare(`DELETE FROM memory_messages WHERE session_id = ?`).run(id);
+    db.prepare(`DELETE FROM memory_sessions WHERE id = ?`).run(id);
+  } else {
+    db.prepare(`UPDATE memory_sessions SET active = 0 WHERE id = ?`).run(id);
+  }
+}
+
+/**
+ * Clear all sessions (soft or hard delete).
+ */
+export function clearAllSessions(db: Database, hard: boolean = false): number {
+  if (hard) {
+    const count = (
+      db.prepare(`SELECT COUNT(*) as count FROM memory_sessions`).get() as {
+        count: number;
+      }
+    ).count;
+    db.exec(`DELETE FROM memory_messages`);
+    db.exec(`DELETE FROM memory_sessions`);
+    db.exec(`DELETE FROM memory_fts`);
+    return count;
+  } else {
+    const result = db.prepare(
+      `UPDATE memory_sessions SET active = 0 WHERE active = 1`
+    );
+    return result.run().changes;
+  }
+}
+
+// =============================================================================
+// Message CRUD
+// =============================================================================
+
+/**
+ * Add a message to a session. Creates session if it doesn't exist.
+ */
+export async function addMessage(
+  db: Database,
+  sessionId: string,
+  role: string,
+  content: string,
+  options: { title?: string; metadata?: Record<string, unknown> } = {}
+): Promise<MemoryMessage> {
+  const now = new Date().toISOString();
+  const hash = await hashContent(content);
+
+  // Ensure session exists (createSession may generate a new ID for "new")
+  let session = getSession(db, sessionId);
+  if (!session) {
+    session = createSession(db, sessionId, options.title || "");
+  } else if (options.title && !session.title) {
+    db.prepare(`UPDATE memory_sessions SET title = ? WHERE id = ?`).run(
+      options.title,
+      sessionId
+    );
+  }
+
+  // Use the resolved session ID (may differ from input if "new" was passed)
+  const resolvedSessionId = session.id;
+
+  const metadataStr = options.metadata
+    ? JSON.stringify(options.metadata)
+    : null;
+
+  const result = db
+    .prepare(
+      `INSERT INTO memory_messages (session_id, role, content, hash, created_at, metadata)
+     VALUES (?, ?, ?, ?, ?, ?)`
+    )
+    .run(resolvedSessionId, role, content, hash, now, metadataStr);
+
+  // Update session timestamp
+  db.prepare(`UPDATE memory_sessions SET updated_at = ? WHERE id = ?`).run(
+    now,
+    resolvedSessionId
+  );
+
+  return {
+    id: Number(result.lastInsertRowid),
+    session_id: resolvedSessionId,
+    role,
+    content,
+    hash,
+    created_at: now,
+    metadata: options.metadata || null,
+  };
+}
+
+/**
+ * Get messages for a session, ordered by creation time.
+ */
+export function getMessages(
+  db: Database,
+  sessionId: string,
+  options: { limit?: number } = {}
+): MemoryMessage[] {
+  let sql = `SELECT * FROM memory_messages WHERE session_id = ? ORDER BY created_at ASC`;
+  const params: (string | number)[] = [sessionId];
+  if (options.limit) {
+    sql += ` LIMIT ?`;
+    params.push(options.limit);
+  }
+  return db.prepare(sql).all(...params) as MemoryMessage[];
+}
+
+/**
+ * Get a formatted transcript for a session.
+ */
+export function getSessionTranscript(
+  db: Database,
+  sessionId: string
+): string {
+  const messages = getMessages(db, sessionId);
+  return messages.map((m) => `${m.role}: ${m.content}`).join("\n\n");
+}
+
+// =============================================================================
+// Search
+// =============================================================================
+
+/**
+ * Search memory using FTS5 (BM25). Same normalization as store.ts searchFTS.
+ */
+export function searchMemoryFTS(
+  db: Database,
+  query: string,
+  limit: number = 20
+): MemorySearchResult[] {
+  const ftsQuery = buildMemoryFTS5Query(query);
+  if (!ftsQuery) return [];
+
+  const sql = `
+    SELECT
+      m.session_id,
+      s.title as session_title,
+      m.id as message_id,
+      m.role,
+      m.content,
+      bm25(memory_fts, 5.0, 1.0, 1.0) as bm25_score
+    FROM memory_fts f
+    JOIN memory_messages m ON m.id = f.rowid
+    JOIN memory_sessions s ON s.id = m.session_id
+    WHERE memory_fts MATCH ? AND s.active = 1
+    ORDER BY bm25_score ASC
+    LIMIT ?
+  `;
+
+  const rows = db.prepare(sql).all(ftsQuery, limit) as {
+    session_id: string;
+    session_title: string;
+    message_id: number;
+    role: string;
+    content: string;
+    bm25_score: number;
+  }[];
+
+  return rows.map((row) => ({
+    session_id: row.session_id,
+    session_title: row.session_title,
+    message_id: row.message_id,
+    role: row.role,
+    content: row.content,
+    // Same BM25 normalization as store.ts: 1 / (1 + |score|)
+    score: 1 / (1 + Math.abs(row.bm25_score)),
+    source: "fts" as const,
+  }));
+}
+
+/**
+ * Search memory using vector similarity.
+ * Two-step pattern: query vectors_vec first, then JOIN separately.
+ */
+export async function searchMemoryVec(
+  db: Database,
+  query: string,
+  limit: number = 20
+): Promise<MemorySearchResult[]> {
+  // Check if vectors_vec table exists
+  const tableExists = db
+    .prepare(
+      `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
+    )
+    .get();
+  if (!tableExists) return [];
+
+  // Get query embedding
+  const llm = getDefaultLlamaCpp();
+  const formattedQuery = formatQueryForEmbedding(query);
+  const result = await llm.embed(formattedQuery, { isQuery: true });
+  if (!result) return [];
+
+  // Step 1: Get vector matches (no JOINs - sqlite-vec hangs with JOINs)
+  const vecResults = db
+    .prepare(
+      `SELECT hash_seq, distance FROM vectors_vec WHERE embedding MATCH ? AND k = ?`
+    )
+    .all(new Float32Array(result.embedding), limit * 3) as {
+    hash_seq: string;
+    distance: number;
+  }[];
+
+  if (vecResults.length === 0) return [];
+
+  // Step 2: Match against memory_messages by hash
+  const hashSeqs = vecResults.map((r) => r.hash_seq);
+  const distanceMap = new Map(vecResults.map((r) => [r.hash_seq, r.distance]));
+
+  // Extract unique hashes from hash_seq (format: "hash_seq")
+  const hashes = [
+    ...new Set(hashSeqs.map((hs) => hs.split("_").slice(0, -1).join("_"))),
+  ];
+  const hashPlaceholders = hashes.map(() => "?").join(",");
+
+  const docSql = `
+    SELECT
+      m.id as message_id,
+      m.session_id,
+      s.title as session_title,
+      m.role,
+      m.content,
+      m.hash,
+      cv.hash || '_' || cv.seq as hash_seq
+    FROM memory_messages m
+    JOIN memory_sessions s ON s.id = m.session_id
+    JOIN content_vectors cv ON cv.hash = m.hash
+    WHERE m.hash IN (${hashPlaceholders}) AND s.active = 1
+  `;
+
+  const docRows = db.prepare(docSql).all(...hashes) as {
+    message_id: number;
+    session_id: string;
+    session_title: string;
+    role: string;
+    content: string;
+    hash: string;
+    hash_seq: string;
+  }[];
+
+  // Combine with distances, dedupe by message_id
+  const seen = new Map<
+    number,
+    { row: (typeof docRows)[0]; bestDist: number }
+  >();
+  for (const row of docRows) {
+    const distance = distanceMap.get(row.hash_seq) ?? 1;
+    const existing = seen.get(row.message_id);
+    if (!existing || distance < existing.bestDist) {
+      seen.set(row.message_id, { row, bestDist: distance });
+    }
+  }
+
+  return Array.from(seen.values())
+    .sort((a, b) => a.bestDist - b.bestDist)
+    .slice(0, limit)
+    .map(({ row, bestDist }) => ({
+      session_id: row.session_id,
+      session_title: row.session_title,
+      message_id: row.message_id,
+      role: row.role,
+      content: row.content,
+      score: 1 - bestDist, // cosine similarity
+      source: "vec" as const,
+    }));
+}
+
+// =============================================================================
+// Embedding
+// =============================================================================
+
+/**
+ * Embed unembedded memory messages.
+ * Reuses existing content_vectors + vectors_vec tables.
+ * Returns count of newly embedded messages.
+ */
+export async function embedMemoryMessages(
+  db: Database,
+  options: { onProgress?: (done: number, total: number) => void } = {}
+): Promise<number> {
+  // Find messages without embeddings
+  const unembedded = db
+    .prepare(
+      `
+    SELECT m.hash, m.content, m.session_id
+    FROM memory_messages m
+    LEFT JOIN content_vectors cv ON cv.hash = m.hash AND cv.seq = 0
+    WHERE cv.hash IS NULL
+    GROUP BY m.hash
+  `
+    )
+    .all() as { hash: string; content: string; session_id: string }[];
+
+  if (unembedded.length === 0) return 0;
+
+  const llm = getDefaultLlamaCpp();
+  let embedded = 0;
+
+  for (const msg of unembedded) {
+    // Chunk the message content
+    const chunks = await chunkDocumentByTokens(msg.content);
+
+    // Ensure vec table exists with correct dimensions
+    // Get dimension from first embedding
+    const firstText = formatDocForEmbedding(chunks[0]!.text);
+    const firstEmbed = await llm.embed(firstText);
+    if (!firstEmbed) continue;
+
+    const dimensions = firstEmbed.embedding.length;
+
+    // Ensure vectors_vec table exists with correct dimensions
+    const tableInfo = db
+      .prepare(
+        `SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
+      )
+      .get() as { sql: string } | null;
+    if (!tableInfo) {
+      db.exec(
+        `CREATE VIRTUAL TABLE vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[${dimensions}] distance_metric=cosine)`
+      );
+    }
+
+    const now = new Date().toISOString();
+
+    // Insert first chunk embedding
+    insertEmbedding(
+      db,
+      msg.hash,
+      0,
+      chunks[0]!.pos,
+      new Float32Array(firstEmbed.embedding),
+      firstEmbed.model,
+      now
+    );
+
+    // Embed remaining chunks
+    for (let i = 1; i < chunks.length; i++) {
+      const chunk = chunks[i]!;
+      const text = formatDocForEmbedding(chunk.text);
+      const embedResult = await llm.embed(text);
+      if (embedResult) {
+        insertEmbedding(
+          db,
+          msg.hash,
+          i,
+          chunk.pos,
+          new Float32Array(embedResult.embedding),
+          embedResult.model,
+          now
+        );
+      }
+    }
+
+    embedded++;
+    options.onProgress?.(embedded, unembedded.length);
+  }
+
+  return embedded;
+}
+
+// =============================================================================
+// Summarization
+// =============================================================================
+
+/**
+ * Summarize a session via Ollama and store the summary.
+ */
+export async function summarizeSession(
+  db: Database,
+  sessionId: string,
+  options: { model?: string; force?: boolean } = {}
+): Promise<string> {
+  const session = getSession(db, sessionId);
+  if (!session) throw new Error(`Session not found: ${sessionId}`);
+
+  // Check if already summarized (unless force)
+  if (session.summary && !options.force) {
+    return session.summary;
+  }
+
+  const transcript = getSessionTranscript(db, sessionId);
+  if (!transcript.trim()) throw new Error(`Session ${sessionId} has no messages`);
+
+  const summary = await ollamaSummarize(transcript, { model: options.model });
+  const now = new Date().toISOString();
+
+  db.prepare(
+    `UPDATE memory_sessions SET summary = ?, summary_at = ? WHERE id = ?`
+  ).run(summary, now, sessionId);
+
+  return summary;
+}
+
+/**
+ * Summarize recent sessions that don't have summaries yet.
+ * Returns count of sessions summarized.
+ */
+export async function summarizeRecentSessions(
+  db: Database,
+  options: { limit?: number; model?: string } = {}
+): Promise<number> {
+  const limit = options.limit ?? 10;
+  const sessions = db
+    .prepare(
+      `SELECT id FROM memory_sessions WHERE active = 1 AND summary IS NULL ORDER BY updated_at DESC LIMIT ?`
+    )
+    .all(limit) as { id: string }[];
+
+  let count = 0;
+  for (const s of sessions) {
+    try {
+      await summarizeSession(db, s.id, { model: options.model });
+      count++;
+    } catch {
+      // Skip sessions that fail to summarize
+    }
+  }
+  return count;
+}
+
+// =============================================================================
+// Recall
+// =============================================================================
+
+/**
+ * Recall relevant memories for a query.
+ * Combines FTS + vector search using RRF, deduplicates by session,
+ * and optionally synthesizes via Ollama.
+ */
+export async function recallMemories(
+  db: Database,
+  query: string,
+  options: {
+    limit?: number;
+    synthesize?: boolean;
+    model?: string;
+    maxTokens?: number;
+  } = {}
+): Promise<{ results: MemorySearchResult[]; synthesis?: string }> {
+  const limit = options.limit ?? 10;
+
+  // Run FTS and vector search
+  const ftsResults = searchMemoryFTS(db, query, limit);
+  let vecResults: MemorySearchResult[] = [];
+  try {
+    vecResults = await searchMemoryVec(db, query, limit);
+  } catch {
+    // Vector search may fail if no embeddings exist
+  }
+
+  // Convert to RankedResult format for RRF
+  const toRanked = (results: MemorySearchResult[]): RankedResult[] =>
+    results.map((r) => ({
+      file: `${r.session_id}:${r.message_id}`,
+      displayPath: r.session_title,
+      title: r.role,
+      body: r.content,
+      score: r.score,
+    }));
+
+  // Fuse results with RRF
+  const fused = reciprocalRankFusion(
+    [toRanked(ftsResults), toRanked(vecResults)],
+    [1.0, 1.0]
+  );
+
+  // Deduplicate by session, keeping best score per session
+  const sessionSeen = new Map<string, boolean>();
+  const dedupedResults: MemorySearchResult[] = [];
+
+  for (const r of fused) {
+    const [sessionId] = r.file.split(":");
+    if (!sessionId) continue;
+
+    // Find the original result to preserve all fields
+    const original =
+      [...ftsResults, ...vecResults].find(
+        (o) => `${o.session_id}:${o.message_id}` === r.file
+      ) || null;
+
+    if (original && !sessionSeen.has(sessionId)) {
+      sessionSeen.set(sessionId, true);
+      dedupedResults.push({ ...original, score: r.score });
+    } else if (!original && !sessionSeen.has(sessionId)) {
+      sessionSeen.set(sessionId, true);
+      dedupedResults.push({
+        session_id: sessionId!,
+        session_title: r.displayPath,
+        message_id: parseInt(r.file.split(":")[1] || "0"),
+        role: r.title,
+        content: r.body,
+        score: r.score,
+        source: "fts",
+      });
+    }
+  }
+
+  const results = dedupedResults.slice(0, limit);
+
+  // Optionally synthesize via Ollama
+  let synthesis: string | undefined;
+  if (options.synthesize && results.length > 0) {
+    const memoriesText = results
+      .map(
+        (r) =>
+          `[Session: ${r.session_title || r.session_id}]\n${r.role}: ${r.content}`
+      )
+      .join("\n\n---\n\n");
+
+    synthesis = await ollamaRecallSynthesize(query, memoriesText, {
+      model: options.model,
+      maxTokens: options.maxTokens,
+    });
+  }
+
+  return { results, synthesis };
+}
+
+// =============================================================================
+// Import
+// =============================================================================
+
+/**
+ * Import a conversation transcript from a file.
+ * Supports 'chat' format (role: content) and 'jsonl' format.
+ */
+export async function importTranscript(
+  db: Database,
+  content: string,
+  options: { title?: string; format?: "chat" | "jsonl"; sessionId?: string } = {}
+): Promise<{ sessionId: string; messageCount: number }> {
+  const format = options.format ?? "chat";
+  const sessionId = options.sessionId || crypto.randomUUID().slice(0, 8);
+
+  let messages: { role: string; content: string }[] = [];
+
+  if (format === "jsonl") {
+    messages = content
+      .split("\n")
+      .filter((line) => line.trim())
+      .map((line) => {
+        const parsed = JSON.parse(line);
+        return { role: parsed.role || "user", content: parsed.content || "" };
+      });
+  } else {
+    // Chat format: "role: content" separated by blank lines
+    const blocks = content.split(/\n\n+/);
+    for (const block of blocks) {
+      const trimmed = block.trim();
+      if (!trimmed) continue;
+      const colonIdx = trimmed.indexOf(":");
+      if (colonIdx > 0 && colonIdx < 20) {
+        const role = trimmed.slice(0, colonIdx).trim().toLowerCase();
+        const msgContent = trimmed.slice(colonIdx + 1).trim();
+        if (msgContent) {
+          messages.push({ role, content: msgContent });
+        }
+      } else {
+        messages.push({ role: "user", content: trimmed });
+      }
+    }
+  }
+
+  for (const msg of messages) {
+    await addMessage(db, sessionId, msg.role, msg.content, {
+      title: options.title,
+    });
+  }
+
+  return { sessionId, messageCount: messages.length };
+}
+
+// =============================================================================
+// Status
+// =============================================================================
+
+/**
+ * Get memory statistics.
+ */
+export function getMemoryStatus(db: Database): {
+  sessions: number;
+  activeSessions: number;
+  messages: number;
+  embeddedMessages: number;
+  summarizedSessions: number;
+} {
+  const sessions = (
+    db
+      .prepare(`SELECT COUNT(*) as count FROM memory_sessions`)
+      .get() as { count: number }
+  ).count;
+
+  const activeSessions = (
+    db
+      .prepare(
+        `SELECT COUNT(*) as count FROM memory_sessions WHERE active = 1`
+      )
+      .get() as { count: number }
+  ).count;
+
+  const messages = (
+    db
+      .prepare(`SELECT COUNT(*) as count FROM memory_messages`)
+      .get() as { count: number }
+  ).count;
+
+  const embeddedMessages = (
+    db
+      .prepare(
+        `SELECT COUNT(DISTINCT m.hash) as count FROM memory_messages m
+       JOIN content_vectors cv ON cv.hash = m.hash`
+      )
+      .get() as { count: number }
+  ).count;
+
+  const summarizedSessions = (
+    db
+      .prepare(
+        `SELECT COUNT(*) as count FROM memory_sessions WHERE summary IS NOT NULL`
+      )
+      .get() as { count: number }
+  ).count;
+
+  return {
+    sessions,
+    activeSessions,
+    messages,
+    embeddedMessages,
+    summarizedSessions,
+  };
+}
diff --git a/src/ollama.ts b/src/ollama.ts
new file mode 100644
index 00000000..0bbbde98
--- /dev/null
+++ b/src/ollama.ts
@@ -0,0 +1,169 @@
+/**
+ * ollama.ts - Ollama API client for QMD memory summarization and synthesis
+ *
+ * Uses Bun's fetch() to call Ollama's HTTP API. Separate from llm.ts which
+ * uses node-llama-cpp for embeddings/reranking.
+ *
+ * Config via env:
+ *   OLLAMA_HOST     - Ollama server URL (default: http://127.0.0.1:11434)
+ *   QMD_MEMORY_MODEL - Model for summarization/synthesis (default: qwen3:8b-tuned)
+ */
+
+// =============================================================================
+// Configuration
+// =============================================================================
+
+const OLLAMA_HOST = Bun.env.OLLAMA_HOST || "http://127.0.0.1:11434";
+const DEFAULT_MEMORY_MODEL = Bun.env.QMD_MEMORY_MODEL || "qwen3:8b-tuned";
+
+// =============================================================================
+// Types
+// =============================================================================
+
+export type OllamaChatMessage = {
+  role: "system" | "user" | "assistant";
+  content: string;
+};
+
+export type OllamaChatOptions = {
+  model?: string;
+  temperature?: number;
+  maxTokens?: number;
+};
+
+export type OllamaChatResponse = {
+  model: string;
+  message: OllamaChatMessage;
+  done: boolean;
+  total_duration?: number;
+  eval_count?: number;
+};
+
+// =============================================================================
+// Core API
+// =============================================================================
+
+/**
+ * Send a chat completion request to Ollama.
+ * Uses stream: false for simple request/response.
+ */
+export async function ollamaChat(
+  messages: OllamaChatMessage[],
+  options: OllamaChatOptions = {}
+): Promise<OllamaChatResponse> {
+  const model = options.model || DEFAULT_MEMORY_MODEL;
+  const resp = await fetch(`${OLLAMA_HOST}/api/chat`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model,
+      messages,
+      stream: false,
+      options: {
+        ...(options.temperature !== undefined && { temperature: options.temperature }),
+        ...(options.maxTokens !== undefined && { num_predict: options.maxTokens }),
+      },
+    }),
+  });
+
+  if (!resp.ok) {
+    const body = await resp.text();
+    throw new Error(`Ollama chat failed (${resp.status}): ${body}`);
+  }
+
+  return resp.json() as Promise<OllamaChatResponse>;
+}
+
+/**
+ * Summarize a conversation transcript via Ollama.
+ * Returns a concise summary of the conversation.
+ */
+export async function ollamaSummarize(
+  transcript: string,
+  options: OllamaChatOptions = {}
+): Promise<string> {
+  const messages: OllamaChatMessage[] = [
+    {
+      role: "system",
+      content:
+        "You are a conversation summarizer. Produce a concise summary of the following conversation. " +
+        "Focus on key topics discussed, decisions made, questions asked, and solutions provided. " +
+        "Keep it under 200 words. Output only the summary, no preamble.",
+    },
+    {
+      role: "user",
+      content: transcript,
+    },
+  ];
+
+  const resp = await ollamaChat(messages, {
+    ...options,
+    temperature: options.temperature ?? 0.3,
+    maxTokens: options.maxTokens ?? 512,
+  });
+
+  return resp.message.content.trim();
+}
+
+/**
+ * Synthesize recalled memories into coherent context via Ollama.
+ * Takes a query and memory fragments, returns a synthesized response.
+ */
+export async function ollamaRecall(
+  query: string,
+  memories: string,
+  options: OllamaChatOptions = {}
+): Promise<string> {
+  const messages: OllamaChatMessage[] = [
+    {
+      role: "system",
+      content:
+        "You are a memory recall assistant. Given a query and relevant past conversation memories, " +
+        "synthesize the memories into useful context for answering the query. " +
+        "Be concise and focus on information directly relevant to the query. " +
+        "If memories contain contradictory information, note the most recent. " +
+        "Output only the synthesized context, no preamble.",
+    },
+    {
+      role: "user",
+      content: `Query: ${query}\n\nRelevant memories:\n${memories}`,
+    },
+  ];
+
+  const resp = await ollamaChat(messages, {
+    ...options,
+    temperature: options.temperature ?? 0.3,
+    maxTokens: options.maxTokens ?? 1024,
+  });
+
+  return resp.message.content.trim();
+}
+
+/**
+ * Check if Ollama is running and accessible.
+ * Pings the /api/tags endpoint.
+ */
+export async function ollamaHealthCheck(): Promise<{
+  ok: boolean;
+  models?: string[];
+  error?: string;
+}> {
+  try {
+    const resp = await fetch(`${OLLAMA_HOST}/api/tags`, {
+      signal: AbortSignal.timeout(5000),
+    });
+    if (!resp.ok) {
+      return { ok: false, error: `HTTP ${resp.status}` };
+    }
+    const data = (await resp.json()) as { models?: { name: string }[] };
+    const models = data.models?.map((m) => m.name) || [];
+    return { ok: true, models };
+  } catch (err) {
+    return {
+      ok: false,
+      error: err instanceof Error ? err.message : String(err),
+    };
+  }
+}
+
+export { DEFAULT_MEMORY_MODEL, OLLAMA_HOST };

From e257bb7b4eeca81b268b091d5ad8e8842f31af5d Mon Sep 17 00:00:00 2001
From: Ashutosh Tripathi <hello@zero8.dev>
Date: Fri, 27 Feb 2026 15:55:41 +0530
Subject: [PATCH 2/5] perf(memory): optimize addMessage session upsert and add
 coverage

---
 src/memory.test.ts |  64 +++++++++++++++++++++++++
 src/memory.ts      | 116 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 159 insertions(+), 21 deletions(-)
 create mode 100644 src/memory.test.ts

diff --git a/src/memory.test.ts b/src/memory.test.ts
new file mode 100644
index 00000000..75ad2c74
--- /dev/null
+++ b/src/memory.test.ts
@@ -0,0 +1,64 @@
+import { afterEach, beforeEach, expect, test } from "bun:test";
+import { Database } from "bun:sqlite";
+import { addMessage, initializeMemoryTables } from "./memory";
+
+let db: Database;
+
+beforeEach(() => {
+  db = new Database(":memory:");
+  db.exec("PRAGMA foreign_keys = ON");
+  initializeMemoryTables(db);
+});
+
+afterEach(() => {
+  db.close();
+});
+
+test("addMessage creates session once and does not overwrite existing title", async () => {
+  await addMessage(db, "s1", "user", "first", { title: "Initial Title" });
+  await addMessage(db, "s1", "assistant", "second", { title: "New Title" });
+
+  const session = db
+    .prepare(`SELECT id, title FROM memory_sessions WHERE id = ?`)
+    .get("s1") as { id: string; title: string } | null;
+  const sessionCount = (
+    db.prepare(`SELECT COUNT(*) AS count FROM memory_sessions WHERE id = ?`).get("s1") as {
+      count: number;
+    }
+  ).count;
+  const messageCount = (
+    db.prepare(`SELECT COUNT(*) AS count FROM memory_messages WHERE session_id = ?`).get("s1") as {
+      count: number;
+    }
+  ).count;
+
+  expect(session).not.toBeNull();
+  expect(session?.title).toBe("Initial Title");
+  expect(sessionCount).toBe(1);
+  expect(messageCount).toBe(2);
+});
+
+test("addMessage backfills empty title when provided later", async () => {
+  const now = new Date().toISOString();
+  db.prepare(
+    `INSERT INTO memory_sessions (id, title, created_at, updated_at, active) VALUES (?, ?, ?, ?, 1)`
+  ).run("s-empty", "", now, now);
+
+  await addMessage(db, "s-empty", "user", "hello", { title: "Backfilled Title" });
+
+  const session = db
+    .prepare(`SELECT title FROM memory_sessions WHERE id = ?`)
+    .get("s-empty") as { title: string } | null;
+  expect(session?.title).toBe("Backfilled Title");
+});
+
+test("addMessage with sessionId 'new' generates a concrete session id", async () => {
+  const message = await addMessage(db, "new", "user", "hello");
+  expect(message.session_id).not.toBe("new");
+  expect(message.session_id.length).toBe(8);
+
+  const session = db
+    .prepare(`SELECT id FROM memory_sessions WHERE id = ?`)
+    .get(message.session_id) as { id: string } | null;
+  expect(session?.id).toBe(message.session_id);
+});
diff --git a/src/memory.ts b/src/memory.ts
index 46a7ff2e..8559341d 100644
--- a/src/memory.ts
+++ b/src/memory.ts
@@ -64,6 +64,14 @@ export type MemorySearchResult = {
   source: "fts" | "vec";
 };
 
+type RecallTimings = {
+  ftsMs: number;
+  vecMs: number;
+  fuseMs: number;
+  dedupeMs: number;
+  totalMs: number;
+};
+
 // =============================================================================
 // Schema Initialization
 // =============================================================================
@@ -102,6 +110,7 @@ export function initializeMemoryTables(db: Database): void {
 
   db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_session ON memory_messages(session_id)`);
   db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_hash ON memory_messages(hash)`);
+  db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_sessions_active ON memory_sessions(active, id)`);
 
   // FTS5 for memory search
   db.exec(`
@@ -263,19 +272,26 @@ export async function addMessage(
   const now = new Date().toISOString();
   const hash = await hashContent(content);
 
-  // Ensure session exists (createSession may generate a new ID for "new")
-  let session = getSession(db, sessionId);
-  if (!session) {
-    session = createSession(db, sessionId, options.title || "");
-  } else if (options.title && !session.title) {
-    db.prepare(`UPDATE memory_sessions SET title = ? WHERE id = ?`).run(
-      options.title,
-      sessionId
-    );
+  // Preserve "new" behavior, which generates an ID.
+  let resolvedSessionId = sessionId;
+  if (sessionId === "new") {
+    resolvedSessionId = crypto.randomUUID().slice(0, 8);
   }
 
-  // Use the resolved session ID (may differ from input if "new" was passed)
-  const resolvedSessionId = session.id;
+  // Ensure session exists without a pre-read on every message.
+  db.prepare(
+    `INSERT OR IGNORE INTO memory_sessions (id, title, created_at, updated_at, active)
+     VALUES (?, ?, ?, ?, 1)`
+  ).run(resolvedSessionId, options.title || "", now, now);
+
+  // If title is provided later, fill it only when current title is empty.
+  if (options.title) {
+    db.prepare(
+      `UPDATE memory_sessions
+       SET title = ?
+       WHERE id = ? AND (title = '' OR title IS NULL)`
+    ).run(options.title, resolvedSessionId);
+  }
 
   const metadataStr = options.metadata
     ? JSON.stringify(options.metadata)
@@ -347,24 +363,37 @@ export function searchMemoryFTS(
 ): MemorySearchResult[] {
   const ftsQuery = buildMemoryFTS5Query(query);
   if (!ftsQuery) return [];
+  const candidateLimit = Math.max(limit * 3, limit);
 
+  // Rank candidate rowids in FTS first, then join to payload tables.
+  // This keeps the expensive bm25 ordering on the smallest possible row shape.
   const sql = `
+    WITH ranked AS (
+      SELECT
+        rowid,
+        bm25(memory_fts, 5.0, 1.0, 1.0) as bm25_score
+      FROM memory_fts
+      WHERE memory_fts MATCH ?
+      ORDER BY bm25_score ASC
+      LIMIT ?
+    )
     SELECT
       m.session_id,
       s.title as session_title,
       m.id as message_id,
       m.role,
       m.content,
-      bm25(memory_fts, 5.0, 1.0, 1.0) as bm25_score
-    FROM memory_fts f
-    JOIN memory_messages m ON m.id = f.rowid
+      r.bm25_score
+    FROM ranked r
+    JOIN memory_messages m ON m.id = r.rowid
     JOIN memory_sessions s ON s.id = m.session_id
-    WHERE memory_fts MATCH ? AND s.active = 1
-    ORDER BY bm25_score ASC
+    WHERE s.active = 1
+    ORDER BY r.bm25_score ASC
     LIMIT ?
   `;
 
-  const rows = db.prepare(sql).all(ftsQuery, limit) as {
+  const stmt = getMemoryFtsStmt(db, sql);
+  const rows = stmt.all(ftsQuery, candidateLimit, limit) as {
     session_id: string;
     session_title: string;
     message_id: number;
@@ -385,6 +414,17 @@ export function searchMemoryFTS(
   }));
 }
 
+const memoryFtsStmtCache = new WeakMap<Database, ReturnType<Database["prepare"]>>();
+
+function getMemoryFtsStmt(db: Database, sql: string) {
+  let stmt = memoryFtsStmtCache.get(db);
+  if (!stmt) {
+    stmt = db.prepare(sql);
+    memoryFtsStmtCache.set(db, stmt);
+  }
+  return stmt;
+}
+
 /**
  * Search memory using vector similarity.
  * Two-step pattern: query vectors_vec first, then JOIN separately.
@@ -654,16 +694,22 @@ export async function recallMemories(
     maxTokens?: number;
   } = {}
 ): Promise<{ results: MemorySearchResult[]; synthesis?: string }> {
+  const startedAt = performance.now();
+  const shouldTraceRecall = process.env.SMRITI_BENCH_TRACE === "1";
   const limit = options.limit ?? 10;
 
   // Run FTS and vector search
+  const ftsStartedAt = performance.now();
   const ftsResults = searchMemoryFTS(db, query, limit);
+  const ftsMs = performance.now() - ftsStartedAt;
   let vecResults: MemorySearchResult[] = [];
+  const vecStartedAt = performance.now();
   try {
     vecResults = await searchMemoryVec(db, query, limit);
   } catch {
     // Vector search may fail if no embeddings exist
   }
+  const vecMs = performance.now() - vecStartedAt;
 
   // Convert to RankedResult format for RRF
   const toRanked = (results: MemorySearchResult[]): RankedResult[] =>
@@ -676,24 +722,33 @@ export async function recallMemories(
     }));
 
   // Fuse results with RRF
+  const fuseStartedAt = performance.now();
   const fused = reciprocalRankFusion(
     [toRanked(ftsResults), toRanked(vecResults)],
     [1.0, 1.0]
   );
+  const fuseMs = performance.now() - fuseStartedAt;
 
   // Deduplicate by session, keeping best score per session
+  const dedupeStartedAt = performance.now();
   const sessionSeen = new Map<string, boolean>();
   const dedupedResults: MemorySearchResult[] = [];
+  const originalByKey = new Map<string, MemorySearchResult>();
+  for (const result of ftsResults) {
+    originalByKey.set(`${result.session_id}:${result.message_id}`, result);
+  }
+  for (const result of vecResults) {
+    const key = `${result.session_id}:${result.message_id}`;
+    // Prefer vector entry if both are present because it typically carries the better semantic score.
+    originalByKey.set(key, result);
+  }
 
   for (const r of fused) {
     const [sessionId] = r.file.split(":");
     if (!sessionId) continue;
 
     // Find the original result to preserve all fields
-    const original =
-      [...ftsResults, ...vecResults].find(
-        (o) => `${o.session_id}:${o.message_id}` === r.file
-      ) || null;
+    const original = originalByKey.get(r.file) ?? null;
 
     if (original && !sessionSeen.has(sessionId)) {
       sessionSeen.set(sessionId, true);
@@ -711,6 +766,7 @@ export async function recallMemories(
       });
     }
   }
+  const dedupeMs = performance.now() - dedupeStartedAt;
 
   const results = dedupedResults.slice(0, limit);
 
@@ -730,6 +786,24 @@ export async function recallMemories(
     });
   }
 
+  if (shouldTraceRecall) {
+    const timings: RecallTimings = {
+      ftsMs,
+      vecMs,
+      fuseMs,
+      dedupeMs,
+      totalMs: performance.now() - startedAt,
+    };
+    console.error(
+      `[recall.trace] q="${query.slice(0, 64)}" ` +
+        `fts=${timings.ftsMs.toFixed(3)}ms ` +
+        `vec=${timings.vecMs.toFixed(3)}ms ` +
+        `fuse=${timings.fuseMs.toFixed(3)}ms ` +
+        `dedupe=${timings.dedupeMs.toFixed(3)}ms ` +
+        `total=${timings.totalMs.toFixed(3)}ms`
+    );
+  }
+
   return { results, synthesis };
 }
 

From 0274a2c9a3f3d42082ced1fd66f01254b116b723 Mon Sep 17 00:00:00 2001
From: Ashutosh Tripathi <hello@zero8.dev>
Date: Tue, 10 Mar 2026 08:07:50 +0530
Subject: [PATCH 3/5] refactor: move memory.ts + ollama.ts to smriti/src/

These modules are Smriti-specific additions (conversation memory layer
and Ollama client) that don't belong in the QMD source tree.

Moving them to smriti/src/ means:
- qmd/ submodule stays as pure upstream code
- Upstream syncs are conflict-free (no smriti code in qmd/src/)
- A thin adapter (smriti/src/qmd-internals.ts) is the sole coupling point
---
 src/memory.ts | 922 --------------------------------------------------
 src/ollama.ts | 169 ---------
 2 files changed, 1091 deletions(-)
 delete mode 100644 src/memory.ts
 delete mode 100644 src/ollama.ts

diff --git a/src/memory.ts b/src/memory.ts
deleted file mode 100644
index 8559341d..00000000
--- a/src/memory.ts
+++ /dev/null
@@ -1,922 +0,0 @@
-/**
- * memory.ts - Conversation memory storage & retrieval for QMD
- *
- * Stores conversation messages in sessions, provides FTS5 + vector search,
- * summarization via Ollama, and memory recall for LLM context.
- *
- * Reuses QMD's existing infrastructure:
- * - content_vectors + vectors_vec tables for embeddings
- * - hashContent() for content-addressable storage
- * - chunkDocumentByTokens() for chunking
- * - insertEmbedding() for vector storage
- * - BM25 normalization pattern from searchFTS
- * - Two-step vector search pattern from searchVec
- * - reciprocalRankFusion() for combining results
- */
-
-import { Database } from "bun:sqlite";
-import {
-  hashContent,
-  chunkDocumentByTokens,
-  insertEmbedding,
-  reciprocalRankFusion,
-  getDocid,
-  type RankedResult,
-} from "./store.js";
-import {
-  getDefaultLlamaCpp,
-  formatQueryForEmbedding,
-  formatDocForEmbedding,
-} from "./llm.js";
-import { ollamaSummarize, ollamaRecall as ollamaRecallSynthesize } from "./ollama.js";
-
-// =============================================================================
-// Types
-// =============================================================================
-
-export type MemorySession = {
-  id: string;
-  title: string;
-  created_at: string;
-  updated_at: string;
-  summary: string | null;
-  summary_at: string | null;
-  active: number;
-};
-
-export type MemoryMessage = {
-  id: number;
-  session_id: string;
-  role: string;
-  content: string;
-  hash: string;
-  created_at: string;
-  metadata: Record<string, unknown> | null;
-};
-
-export type MemorySearchResult = {
-  session_id: string;
-  session_title: string;
-  message_id: number;
-  role: string;
-  content: string;
-  score: number;
-  source: "fts" | "vec";
-};
-
-type RecallTimings = {
-  ftsMs: number;
-  vecMs: number;
-  fuseMs: number;
-  dedupeMs: number;
-  totalMs: number;
-};
-
-// =============================================================================
-// Schema Initialization
-// =============================================================================
-
-/**
- * Create memory tables, indexes, triggers in the QMD database.
- * Safe to call multiple times (uses IF NOT EXISTS).
- */
-export function initializeMemoryTables(db: Database): void {
-  // Sessions table
-  db.exec(`
-    CREATE TABLE IF NOT EXISTS memory_sessions (
-      id TEXT PRIMARY KEY,
-      title TEXT NOT NULL DEFAULT '',
-      created_at TEXT NOT NULL,
-      updated_at TEXT NOT NULL,
-      summary TEXT,
-      summary_at TEXT,
-      active INTEGER NOT NULL DEFAULT 1
-    )
-  `);
-
-  // Messages table
-  db.exec(`
-    CREATE TABLE IF NOT EXISTS memory_messages (
-      id INTEGER PRIMARY KEY AUTOINCREMENT,
-      session_id TEXT NOT NULL,
-      role TEXT NOT NULL,
-      content TEXT NOT NULL,
-      hash TEXT NOT NULL,
-      created_at TEXT NOT NULL,
-      metadata TEXT,
-      FOREIGN KEY (session_id) REFERENCES memory_sessions(id) ON DELETE CASCADE
-    )
-  `);
-
-  db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_session ON memory_messages(session_id)`);
-  db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_hash ON memory_messages(hash)`);
-  db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_sessions_active ON memory_sessions(active, id)`);
-
-  // FTS5 for memory search
-  db.exec(`
-    CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5(
-      session_title, role, content,
-      tokenize='porter unicode61'
-    )
-  `);
-
-  // Triggers to sync memory_fts
-  db.exec(`
-    CREATE TRIGGER IF NOT EXISTS memory_messages_ai AFTER INSERT ON memory_messages
-    BEGIN
-      INSERT INTO memory_fts(rowid, session_title, role, content)
-      SELECT
-        new.id,
-        (SELECT title FROM memory_sessions WHERE id = new.session_id),
-        new.role,
-        new.content;
-    END
-  `);
-
-  db.exec(`
-    CREATE TRIGGER IF NOT EXISTS memory_messages_ad AFTER DELETE ON memory_messages
-    BEGIN
-      DELETE FROM memory_fts WHERE rowid = old.id;
-    END
-  `);
-}
-
-// =============================================================================
-// FTS5 Query Building (same pattern as store.ts buildFTS5Query)
-// =============================================================================
-
-function sanitizeMemoryFTSTerm(term: string): string {
-  return term.replace(/[^\p{L}\p{N}']/gu, "").toLowerCase();
-}
-
-function buildMemoryFTS5Query(query: string): string | null {
-  const terms = query
-    .split(/\s+/)
-    .map((t) => sanitizeMemoryFTSTerm(t))
-    .filter((t) => t.length > 0);
-  if (terms.length === 0) return null;
-  if (terms.length === 1) return `"${terms[0]}"*`;
-  return terms.map((t) => `"${t}"*`).join(" AND ");
-}
-
-// =============================================================================
-// Session CRUD
-// =============================================================================
-
-/**
- * Create a new memory session. If id is "new", generates a random ID.
- */
-export function createSession(
-  db: Database,
-  id: string,
-  title: string = ""
-): MemorySession {
-  const now = new Date().toISOString();
-  const sessionId = id === "new" ? crypto.randomUUID().slice(0, 8) : id;
-
-  db.prepare(
-    `INSERT INTO memory_sessions (id, title, created_at, updated_at, active) VALUES (?, ?, ?, ?, 1)`
-  ).run(sessionId, title, now, now);
-
-  return {
-    id: sessionId,
-    title,
-    created_at: now,
-    updated_at: now,
-    summary: null,
-    summary_at: null,
-    active: 1,
-  };
-}
-
-/**
- * Get a session by ID.
- */
-export function getSession(db: Database, id: string): MemorySession | null {
-  return (
-    (db
-      .prepare(`SELECT * FROM memory_sessions WHERE id = ?`)
-      .get(id) as MemorySession | null) || null
-  );
-}
-
-/**
- * List sessions, most recent first.
- */
-export function listSessions(
-  db: Database,
-  options: { limit?: number; includeInactive?: boolean } = {}
-): MemorySession[] {
-  const limit = options.limit ?? 20;
-  const where = options.includeInactive ? "" : "WHERE active = 1";
-  return db
-    .prepare(
-      `SELECT * FROM memory_sessions ${where} ORDER BY updated_at DESC LIMIT ?`
-    )
-    .all(limit) as MemorySession[];
-}
-
-/**
- * Soft-delete a session (set active = 0). If hard = true, permanently delete.
- */
-export function deleteSession(
-  db: Database,
-  id: string,
-  hard: boolean = false
-): void {
-  if (hard) {
-    // Delete messages first (CASCADE should handle, but be explicit)
-    db.prepare(`DELETE FROM memory_messages WHERE session_id = ?`).run(id);
-    db.prepare(`DELETE FROM memory_sessions WHERE id = ?`).run(id);
-  } else {
-    db.prepare(`UPDATE memory_sessions SET active = 0 WHERE id = ?`).run(id);
-  }
-}
-
-/**
- * Clear all sessions (soft or hard delete).
- */
-export function clearAllSessions(db: Database, hard: boolean = false): number {
-  if (hard) {
-    const count = (
-      db.prepare(`SELECT COUNT(*) as count FROM memory_sessions`).get() as {
-        count: number;
-      }
-    ).count;
-    db.exec(`DELETE FROM memory_messages`);
-    db.exec(`DELETE FROM memory_sessions`);
-    db.exec(`DELETE FROM memory_fts`);
-    return count;
-  } else {
-    const result = db.prepare(
-      `UPDATE memory_sessions SET active = 0 WHERE active = 1`
-    );
-    return result.run().changes;
-  }
-}
-
-// =============================================================================
-// Message CRUD
-// =============================================================================
-
-/**
- * Add a message to a session. Creates session if it doesn't exist.
- */
-export async function addMessage(
-  db: Database,
-  sessionId: string,
-  role: string,
-  content: string,
-  options: { title?: string; metadata?: Record<string, unknown> } = {}
-): Promise<MemoryMessage> {
-  const now = new Date().toISOString();
-  const hash = await hashContent(content);
-
-  // Preserve "new" behavior, which generates an ID.
-  let resolvedSessionId = sessionId;
-  if (sessionId === "new") {
-    resolvedSessionId = crypto.randomUUID().slice(0, 8);
-  }
-
-  // Ensure session exists without a pre-read on every message.
-  db.prepare(
-    `INSERT OR IGNORE INTO memory_sessions (id, title, created_at, updated_at, active)
-     VALUES (?, ?, ?, ?, 1)`
-  ).run(resolvedSessionId, options.title || "", now, now);
-
-  // If title is provided later, fill it only when current title is empty.
-  if (options.title) {
-    db.prepare(
-      `UPDATE memory_sessions
-       SET title = ?
-       WHERE id = ? AND (title = '' OR title IS NULL)`
-    ).run(options.title, resolvedSessionId);
-  }
-
-  const metadataStr = options.metadata
-    ? JSON.stringify(options.metadata)
-    : null;
-
-  const result = db
-    .prepare(
-      `INSERT INTO memory_messages (session_id, role, content, hash, created_at, metadata)
-     VALUES (?, ?, ?, ?, ?, ?)`
-    )
-    .run(resolvedSessionId, role, content, hash, now, metadataStr);
-
-  // Update session timestamp
-  db.prepare(`UPDATE memory_sessions SET updated_at = ? WHERE id = ?`).run(
-    now,
-    resolvedSessionId
-  );
-
-  return {
-    id: Number(result.lastInsertRowid),
-    session_id: resolvedSessionId,
-    role,
-    content,
-    hash,
-    created_at: now,
-    metadata: options.metadata || null,
-  };
-}
-
-/**
- * Get messages for a session, ordered by creation time.
- */
-export function getMessages(
-  db: Database,
-  sessionId: string,
-  options: { limit?: number } = {}
-): MemoryMessage[] {
-  let sql = `SELECT * FROM memory_messages WHERE session_id = ? ORDER BY created_at ASC`;
-  const params: (string | number)[] = [sessionId];
-  if (options.limit) {
-    sql += ` LIMIT ?`;
-    params.push(options.limit);
-  }
-  return db.prepare(sql).all(...params) as MemoryMessage[];
-}
-
-/**
- * Get a formatted transcript for a session.
- */
-export function getSessionTranscript(
-  db: Database,
-  sessionId: string
-): string {
-  const messages = getMessages(db, sessionId);
-  return messages.map((m) => `${m.role}: ${m.content}`).join("\n\n");
-}
-
-// =============================================================================
-// Search
-// =============================================================================
-
-/**
- * Search memory using FTS5 (BM25). Same normalization as store.ts searchFTS.
- */
-export function searchMemoryFTS(
-  db: Database,
-  query: string,
-  limit: number = 20
-): MemorySearchResult[] {
-  const ftsQuery = buildMemoryFTS5Query(query);
-  if (!ftsQuery) return [];
-  const candidateLimit = Math.max(limit * 3, limit);
-
-  // Rank candidate rowids in FTS first, then join to payload tables.
-  // This keeps the expensive bm25 ordering on the smallest possible row shape.
-  const sql = `
-    WITH ranked AS (
-      SELECT
-        rowid,
-        bm25(memory_fts, 5.0, 1.0, 1.0) as bm25_score
-      FROM memory_fts
-      WHERE memory_fts MATCH ?
-      ORDER BY bm25_score ASC
-      LIMIT ?
-    )
-    SELECT
-      m.session_id,
-      s.title as session_title,
-      m.id as message_id,
-      m.role,
-      m.content,
-      r.bm25_score
-    FROM ranked r
-    JOIN memory_messages m ON m.id = r.rowid
-    JOIN memory_sessions s ON s.id = m.session_id
-    WHERE s.active = 1
-    ORDER BY r.bm25_score ASC
-    LIMIT ?
-  `;
-
-  const stmt = getMemoryFtsStmt(db, sql);
-  const rows = stmt.all(ftsQuery, candidateLimit, limit) as {
-    session_id: string;
-    session_title: string;
-    message_id: number;
-    role: string;
-    content: string;
-    bm25_score: number;
-  }[];
-
-  return rows.map((row) => ({
-    session_id: row.session_id,
-    session_title: row.session_title,
-    message_id: row.message_id,
-    role: row.role,
-    content: row.content,
-    // Same BM25 normalization as store.ts: 1 / (1 + |score|)
-    score: 1 / (1 + Math.abs(row.bm25_score)),
-    source: "fts" as const,
-  }));
-}
-
-const memoryFtsStmtCache = new WeakMap<Database, ReturnType<Database["prepare"]>>();
-
-function getMemoryFtsStmt(db: Database, sql: string) {
-  let stmt = memoryFtsStmtCache.get(db);
-  if (!stmt) {
-    stmt = db.prepare(sql);
-    memoryFtsStmtCache.set(db, stmt);
-  }
-  return stmt;
-}
-
-/**
- * Search memory using vector similarity.
- * Two-step pattern: query vectors_vec first, then JOIN separately.
- */
-export async function searchMemoryVec(
-  db: Database,
-  query: string,
-  limit: number = 20
-): Promise<MemorySearchResult[]> {
-  // Check if vectors_vec table exists
-  const tableExists = db
-    .prepare(
-      `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
-    )
-    .get();
-  if (!tableExists) return [];
-
-  // Get query embedding
-  const llm = getDefaultLlamaCpp();
-  const formattedQuery = formatQueryForEmbedding(query);
-  const result = await llm.embed(formattedQuery, { isQuery: true });
-  if (!result) return [];
-
-  // Step 1: Get vector matches (no JOINs - sqlite-vec hangs with JOINs)
-  const vecResults = db
-    .prepare(
-      `SELECT hash_seq, distance FROM vectors_vec WHERE embedding MATCH ? AND k = ?`
-    )
-    .all(new Float32Array(result.embedding), limit * 3) as {
-    hash_seq: string;
-    distance: number;
-  }[];
-
-  if (vecResults.length === 0) return [];
-
-  // Step 2: Match against memory_messages by hash
-  const hashSeqs = vecResults.map((r) => r.hash_seq);
-  const distanceMap = new Map(vecResults.map((r) => [r.hash_seq, r.distance]));
-
-  // Extract unique hashes from hash_seq (format: "hash_seq")
-  const hashes = [
-    ...new Set(hashSeqs.map((hs) => hs.split("_").slice(0, -1).join("_"))),
-  ];
-  const hashPlaceholders = hashes.map(() => "?").join(",");
-
-  const docSql = `
-    SELECT
-      m.id as message_id,
-      m.session_id,
-      s.title as session_title,
-      m.role,
-      m.content,
-      m.hash,
-      cv.hash || '_' || cv.seq as hash_seq
-    FROM memory_messages m
-    JOIN memory_sessions s ON s.id = m.session_id
-    JOIN content_vectors cv ON cv.hash = m.hash
-    WHERE m.hash IN (${hashPlaceholders}) AND s.active = 1
-  `;
-
-  const docRows = db.prepare(docSql).all(...hashes) as {
-    message_id: number;
-    session_id: string;
-    session_title: string;
-    role: string;
-    content: string;
-    hash: string;
-    hash_seq: string;
-  }[];
-
-  // Combine with distances, dedupe by message_id
-  const seen = new Map<
-    number,
-    { row: (typeof docRows)[0]; bestDist: number }
-  >();
-  for (const row of docRows) {
-    const distance = distanceMap.get(row.hash_seq) ?? 1;
-    const existing = seen.get(row.message_id);
-    if (!existing || distance < existing.bestDist) {
-      seen.set(row.message_id, { row, bestDist: distance });
-    }
-  }
-
-  return Array.from(seen.values())
-    .sort((a, b) => a.bestDist - b.bestDist)
-    .slice(0, limit)
-    .map(({ row, bestDist }) => ({
-      session_id: row.session_id,
-      session_title: row.session_title,
-      message_id: row.message_id,
-      role: row.role,
-      content: row.content,
-      score: 1 - bestDist, // cosine similarity
-      source: "vec" as const,
-    }));
-}
-
-// =============================================================================
-// Embedding
-// =============================================================================
-
-/**
- * Embed unembedded memory messages.
- * Reuses existing content_vectors + vectors_vec tables.
- * Returns count of newly embedded messages.
- */
-export async function embedMemoryMessages(
-  db: Database,
-  options: { onProgress?: (done: number, total: number) => void } = {}
-): Promise<number> {
-  // Find messages without embeddings
-  const unembedded = db
-    .prepare(
-      `
-    SELECT m.hash, m.content, m.session_id
-    FROM memory_messages m
-    LEFT JOIN content_vectors cv ON cv.hash = m.hash AND cv.seq = 0
-    WHERE cv.hash IS NULL
-    GROUP BY m.hash
-  `
-    )
-    .all() as { hash: string; content: string; session_id: string }[];
-
-  if (unembedded.length === 0) return 0;
-
-  const llm = getDefaultLlamaCpp();
-  let embedded = 0;
-
-  for (const msg of unembedded) {
-    // Chunk the message content
-    const chunks = await chunkDocumentByTokens(msg.content);
-
-    // Ensure vec table exists with correct dimensions
-    // Get dimension from first embedding
-    const firstText = formatDocForEmbedding(chunks[0]!.text);
-    const firstEmbed = await llm.embed(firstText);
-    if (!firstEmbed) continue;
-
-    const dimensions = firstEmbed.embedding.length;
-
-    // Ensure vectors_vec table exists with correct dimensions
-    const tableInfo = db
-      .prepare(
-        `SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
-      )
-      .get() as { sql: string } | null;
-    if (!tableInfo) {
-      db.exec(
-        `CREATE VIRTUAL TABLE vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[${dimensions}] distance_metric=cosine)`
-      );
-    }
-
-    const now = new Date().toISOString();
-
-    // Insert first chunk embedding
-    insertEmbedding(
-      db,
-      msg.hash,
-      0,
-      chunks[0]!.pos,
-      new Float32Array(firstEmbed.embedding),
-      firstEmbed.model,
-      now
-    );
-
-    // Embed remaining chunks
-    for (let i = 1; i < chunks.length; i++) {
-      const chunk = chunks[i]!;
-      const text = formatDocForEmbedding(chunk.text);
-      const embedResult = await llm.embed(text);
-      if (embedResult) {
-        insertEmbedding(
-          db,
-          msg.hash,
-          i,
-          chunk.pos,
-          new Float32Array(embedResult.embedding),
-          embedResult.model,
-          now
-        );
-      }
-    }
-
-    embedded++;
-    options.onProgress?.(embedded, unembedded.length);
-  }
-
-  return embedded;
-}
-
-// =============================================================================
-// Summarization
-// =============================================================================
-
-/**
- * Summarize a session via Ollama and store the summary.
- */
-export async function summarizeSession(
-  db: Database,
-  sessionId: string,
-  options: { model?: string; force?: boolean } = {}
-): Promise<string> {
-  const session = getSession(db, sessionId);
-  if (!session) throw new Error(`Session not found: ${sessionId}`);
-
-  // Check if already summarized (unless force)
-  if (session.summary && !options.force) {
-    return session.summary;
-  }
-
-  const transcript = getSessionTranscript(db, sessionId);
-  if (!transcript.trim()) throw new Error(`Session ${sessionId} has no messages`);
-
-  const summary = await ollamaSummarize(transcript, { model: options.model });
-  const now = new Date().toISOString();
-
-  db.prepare(
-    `UPDATE memory_sessions SET summary = ?, summary_at = ? WHERE id = ?`
-  ).run(summary, now, sessionId);
-
-  return summary;
-}
-
-/**
- * Summarize recent sessions that don't have summaries yet.
- * Returns count of sessions summarized.
- */
-export async function summarizeRecentSessions(
-  db: Database,
-  options: { limit?: number; model?: string } = {}
-): Promise<number> {
-  const limit = options.limit ?? 10;
-  const sessions = db
-    .prepare(
-      `SELECT id FROM memory_sessions WHERE active = 1 AND summary IS NULL ORDER BY updated_at DESC LIMIT ?`
-    )
-    .all(limit) as { id: string }[];
-
-  let count = 0;
-  for (const s of sessions) {
-    try {
-      await summarizeSession(db, s.id, { model: options.model });
-      count++;
-    } catch {
-      // Skip sessions that fail to summarize
-    }
-  }
-  return count;
-}
-
-// =============================================================================
-// Recall
-// =============================================================================
-
-/**
- * Recall relevant memories for a query.
- * Combines FTS + vector search using RRF, deduplicates by session,
- * and optionally synthesizes via Ollama.
- */
-export async function recallMemories(
-  db: Database,
-  query: string,
-  options: {
-    limit?: number;
-    synthesize?: boolean;
-    model?: string;
-    maxTokens?: number;
-  } = {}
-): Promise<{ results: MemorySearchResult[]; synthesis?: string }> {
-  const startedAt = performance.now();
-  const shouldTraceRecall = process.env.SMRITI_BENCH_TRACE === "1";
-  const limit = options.limit ?? 10;
-
-  // Run FTS and vector search
-  const ftsStartedAt = performance.now();
-  const ftsResults = searchMemoryFTS(db, query, limit);
-  const ftsMs = performance.now() - ftsStartedAt;
-  let vecResults: MemorySearchResult[] = [];
-  const vecStartedAt = performance.now();
-  try {
-    vecResults = await searchMemoryVec(db, query, limit);
-  } catch {
-    // Vector search may fail if no embeddings exist
-  }
-  const vecMs = performance.now() - vecStartedAt;
-
-  // Convert to RankedResult format for RRF
-  const toRanked = (results: MemorySearchResult[]): RankedResult[] =>
-    results.map((r) => ({
-      file: `${r.session_id}:${r.message_id}`,
-      displayPath: r.session_title,
-      title: r.role,
-      body: r.content,
-      score: r.score,
-    }));
-
-  // Fuse results with RRF
-  const fuseStartedAt = performance.now();
-  const fused = reciprocalRankFusion(
-    [toRanked(ftsResults), toRanked(vecResults)],
-    [1.0, 1.0]
-  );
-  const fuseMs = performance.now() - fuseStartedAt;
-
-  // Deduplicate by session, keeping best score per session
-  const dedupeStartedAt = performance.now();
-  const sessionSeen = new Map<string, boolean>();
-  const dedupedResults: MemorySearchResult[] = [];
-  const originalByKey = new Map<string, MemorySearchResult>();
-  for (const result of ftsResults) {
-    originalByKey.set(`${result.session_id}:${result.message_id}`, result);
-  }
-  for (const result of vecResults) {
-    const key = `${result.session_id}:${result.message_id}`;
-    // Prefer vector entry if both are present because it typically carries the better semantic score.
-    originalByKey.set(key, result);
-  }
-
-  for (const r of fused) {
-    const [sessionId] = r.file.split(":");
-    if (!sessionId) continue;
-
-    // Find the original result to preserve all fields
-    const original = originalByKey.get(r.file) ?? null;
-
-    if (original && !sessionSeen.has(sessionId)) {
-      sessionSeen.set(sessionId, true);
-      dedupedResults.push({ ...original, score: r.score });
-    } else if (!original && !sessionSeen.has(sessionId)) {
-      sessionSeen.set(sessionId, true);
-      dedupedResults.push({
-        session_id: sessionId!,
-        session_title: r.displayPath,
-        message_id: parseInt(r.file.split(":")[1] || "0"),
-        role: r.title,
-        content: r.body,
-        score: r.score,
-        source: "fts",
-      });
-    }
-  }
-  const dedupeMs = performance.now() - dedupeStartedAt;
-
-  const results = dedupedResults.slice(0, limit);
-
-  // Optionally synthesize via Ollama
-  let synthesis: string | undefined;
-  if (options.synthesize && results.length > 0) {
-    const memoriesText = results
-      .map(
-        (r) =>
-          `[Session: ${r.session_title || r.session_id}]\n${r.role}: ${r.content}`
-      )
-      .join("\n\n---\n\n");
-
-    synthesis = await ollamaRecallSynthesize(query, memoriesText, {
-      model: options.model,
-      maxTokens: options.maxTokens,
-    });
-  }
-
-  if (shouldTraceRecall) {
-    const timings: RecallTimings = {
-      ftsMs,
-      vecMs,
-      fuseMs,
-      dedupeMs,
-      totalMs: performance.now() - startedAt,
-    };
-    console.error(
-      `[recall.trace] q="${query.slice(0, 64)}" ` +
-        `fts=${timings.ftsMs.toFixed(3)}ms ` +
-        `vec=${timings.vecMs.toFixed(3)}ms ` +
-        `fuse=${timings.fuseMs.toFixed(3)}ms ` +
-        `dedupe=${timings.dedupeMs.toFixed(3)}ms ` +
-        `total=${timings.totalMs.toFixed(3)}ms`
-    );
-  }
-
-  return { results, synthesis };
-}
-
-// =============================================================================
-// Import
-// =============================================================================
-
-/**
- * Import a conversation transcript from a file.
- * Supports 'chat' format (role: content) and 'jsonl' format.
- */
-export async function importTranscript(
-  db: Database,
-  content: string,
-  options: { title?: string; format?: "chat" | "jsonl"; sessionId?: string } = {}
-): Promise<{ sessionId: string; messageCount: number }> {
-  const format = options.format ?? "chat";
-  const sessionId = options.sessionId || crypto.randomUUID().slice(0, 8);
-
-  let messages: { role: string; content: string }[] = [];
-
-  if (format === "jsonl") {
-    messages = content
-      .split("\n")
-      .filter((line) => line.trim())
-      .map((line) => {
-        const parsed = JSON.parse(line);
-        return { role: parsed.role || "user", content: parsed.content || "" };
-      });
-  } else {
-    // Chat format: "role: content" separated by blank lines
-    const blocks = content.split(/\n\n+/);
-    for (const block of blocks) {
-      const trimmed = block.trim();
-      if (!trimmed) continue;
-      const colonIdx = trimmed.indexOf(":");
-      if (colonIdx > 0 && colonIdx < 20) {
-        const role = trimmed.slice(0, colonIdx).trim().toLowerCase();
-        const msgContent = trimmed.slice(colonIdx + 1).trim();
-        if (msgContent) {
-          messages.push({ role, content: msgContent });
-        }
-      } else {
-        messages.push({ role: "user", content: trimmed });
-      }
-    }
-  }
-
-  for (const msg of messages) {
-    await addMessage(db, sessionId, msg.role, msg.content, {
-      title: options.title,
-    });
-  }
-
-  return { sessionId, messageCount: messages.length };
-}
-
-// =============================================================================
-// Status
-// =============================================================================
-
-/**
- * Get memory statistics.
- */
-export function getMemoryStatus(db: Database): {
-  sessions: number;
-  activeSessions: number;
-  messages: number;
-  embeddedMessages: number;
-  summarizedSessions: number;
-} {
-  const sessions = (
-    db
-      .prepare(`SELECT COUNT(*) as count FROM memory_sessions`)
-      .get() as { count: number }
-  ).count;
-
-  const activeSessions = (
-    db
-      .prepare(
-        `SELECT COUNT(*) as count FROM memory_sessions WHERE active = 1`
-      )
-      .get() as { count: number }
-  ).count;
-
-  const messages = (
-    db
-      .prepare(`SELECT COUNT(*) as count FROM memory_messages`)
-      .get() as { count: number }
-  ).count;
-
-  const embeddedMessages = (
-    db
-      .prepare(
-        `SELECT COUNT(DISTINCT m.hash) as count FROM memory_messages m
-       JOIN content_vectors cv ON cv.hash = m.hash`
-      )
-      .get() as { count: number }
-  ).count;
-
-  const summarizedSessions = (
-    db
-      .prepare(
-        `SELECT COUNT(*) as count FROM memory_sessions WHERE summary IS NOT NULL`
-      )
-      .get() as { count: number }
-  ).count;
-
-  return {
-    sessions,
-    activeSessions,
-    messages,
-    embeddedMessages,
-    summarizedSessions,
-  };
-}
diff --git a/src/ollama.ts b/src/ollama.ts
deleted file mode 100644
index 0bbbde98..00000000
--- a/src/ollama.ts
+++ /dev/null
@@ -1,169 +0,0 @@
-/**
- * ollama.ts - Ollama API client for QMD memory summarization and synthesis
- *
- * Uses Bun's fetch() to call Ollama's HTTP API. Separate from llm.ts which
- * uses node-llama-cpp for embeddings/reranking.
- *
- * Config via env:
- *   OLLAMA_HOST     - Ollama server URL (default: http://127.0.0.1:11434)
- *   QMD_MEMORY_MODEL - Model for summarization/synthesis (default: qwen3:8b-tuned)
- */
-
-// =============================================================================
-// Configuration
-// =============================================================================
-
-const OLLAMA_HOST = Bun.env.OLLAMA_HOST || "http://127.0.0.1:11434";
-const DEFAULT_MEMORY_MODEL = Bun.env.QMD_MEMORY_MODEL || "qwen3:8b-tuned";
-
-// =============================================================================
-// Types
-// =============================================================================
-
-export type OllamaChatMessage = {
-  role: "system" | "user" | "assistant";
-  content: string;
-};
-
-export type OllamaChatOptions = {
-  model?: string;
-  temperature?: number;
-  maxTokens?: number;
-};
-
-export type OllamaChatResponse = {
-  model: string;
-  message: OllamaChatMessage;
-  done: boolean;
-  total_duration?: number;
-  eval_count?: number;
-};
-
-// =============================================================================
-// Core API
-// =============================================================================
-
-/**
- * Send a chat completion request to Ollama.
- * Uses stream: false for simple request/response.
- */
-export async function ollamaChat(
-  messages: OllamaChatMessage[],
-  options: OllamaChatOptions = {}
-): Promise<OllamaChatResponse> {
-  const model = options.model || DEFAULT_MEMORY_MODEL;
-  const resp = await fetch(`${OLLAMA_HOST}/api/chat`, {
-    method: "POST",
-    headers: { "Content-Type": "application/json" },
-    body: JSON.stringify({
-      model,
-      messages,
-      stream: false,
-      options: {
-        ...(options.temperature !== undefined && { temperature: options.temperature }),
-        ...(options.maxTokens !== undefined && { num_predict: options.maxTokens }),
-      },
-    }),
-  });
-
-  if (!resp.ok) {
-    const body = await resp.text();
-    throw new Error(`Ollama chat failed (${resp.status}): ${body}`);
-  }
-
-  return resp.json() as Promise<OllamaChatResponse>;
-}
-
-/**
- * Summarize a conversation transcript via Ollama.
- * Returns a concise summary of the conversation.
- */
-export async function ollamaSummarize(
-  transcript: string,
-  options: OllamaChatOptions = {}
-): Promise<string> {
-  const messages: OllamaChatMessage[] = [
-    {
-      role: "system",
-      content:
-        "You are a conversation summarizer. Produce a concise summary of the following conversation. " +
-        "Focus on key topics discussed, decisions made, questions asked, and solutions provided. " +
-        "Keep it under 200 words. Output only the summary, no preamble.",
-    },
-    {
-      role: "user",
-      content: transcript,
-    },
-  ];
-
-  const resp = await ollamaChat(messages, {
-    ...options,
-    temperature: options.temperature ?? 0.3,
-    maxTokens: options.maxTokens ?? 512,
-  });
-
-  return resp.message.content.trim();
-}
-
-/**
- * Synthesize recalled memories into coherent context via Ollama.
- * Takes a query and memory fragments, returns a synthesized response.
- */
-export async function ollamaRecall(
-  query: string,
-  memories: string,
-  options: OllamaChatOptions = {}
-): Promise<string> {
-  const messages: OllamaChatMessage[] = [
-    {
-      role: "system",
-      content:
-        "You are a memory recall assistant. Given a query and relevant past conversation memories, " +
-        "synthesize the memories into useful context for answering the query. " +
-        "Be concise and focus on information directly relevant to the query. " +
-        "If memories contain contradictory information, note the most recent. " +
-        "Output only the synthesized context, no preamble.",
-    },
-    {
-      role: "user",
-      content: `Query: ${query}\n\nRelevant memories:\n${memories}`,
-    },
-  ];
-
-  const resp = await ollamaChat(messages, {
-    ...options,
-    temperature: options.temperature ?? 0.3,
-    maxTokens: options.maxTokens ?? 1024,
-  });
-
-  return resp.message.content.trim();
-}
-
-/**
- * Check if Ollama is running and accessible.
- * Pings the /api/tags endpoint.
- */
-export async function ollamaHealthCheck(): Promise<{
-  ok: boolean;
-  models?: string[];
-  error?: string;
-}> {
-  try {
-    const resp = await fetch(`${OLLAMA_HOST}/api/tags`, {
-      signal: AbortSignal.timeout(5000),
-    });
-    if (!resp.ok) {
-      return { ok: false, error: `HTTP ${resp.status}` };
-    }
-    const data = (await resp.json()) as { models?: { name: string }[] };
-    const models = data.models?.map((m) => m.name) || [];
-    return { ok: true, models };
-  } catch (err) {
-    return {
-      ok: false,
-      error: err instanceof Error ? err.message : String(err),
-    };
-  }
-}
-
-export { DEFAULT_MEMORY_MODEL, OLLAMA_HOST };

From 14b1ce06ae1d59df249e7687dc8d71932befe298 Mon Sep 17 00:00:00 2001
From: Ashutosh Tripathi <hello@zero8.dev>
Date: Tue, 10 Mar 2026 08:19:57 +0530
Subject: [PATCH 4/5] refactor: remove memory.test.ts (tests moved to
 smriti/test/)

---
 src/memory.test.ts | 64 ----------------------------------------------
 1 file changed, 64 deletions(-)
 delete mode 100644 src/memory.test.ts

diff --git a/src/memory.test.ts b/src/memory.test.ts
deleted file mode 100644
index 75ad2c74..00000000
--- a/src/memory.test.ts
+++ /dev/null
@@ -1,64 +0,0 @@
-import { afterEach, beforeEach, expect, test } from "bun:test";
-import { Database } from "bun:sqlite";
-import { addMessage, initializeMemoryTables } from "./memory";
-
-let db: Database;
-
-beforeEach(() => {
-  db = new Database(":memory:");
-  db.exec("PRAGMA foreign_keys = ON");
-  initializeMemoryTables(db);
-});
-
-afterEach(() => {
-  db.close();
-});
-
-test("addMessage creates session once and does not overwrite existing title", async () => {
-  await addMessage(db, "s1", "user", "first", { title: "Initial Title" });
-  await addMessage(db, "s1", "assistant", "second", { title: "New Title" });
-
-  const session = db
-    .prepare(`SELECT id, title FROM memory_sessions WHERE id = ?`)
-    .get("s1") as { id: string; title: string } | null;
-  const sessionCount = (
-    db.prepare(`SELECT COUNT(*) AS count FROM memory_sessions WHERE id = ?`).get("s1") as {
-      count: number;
-    }
-  ).count;
-  const messageCount = (
-    db.prepare(`SELECT COUNT(*) AS count FROM memory_messages WHERE session_id = ?`).get("s1") as {
-      count: number;
-    }
-  ).count;
-
-  expect(session).not.toBeNull();
-  expect(session?.title).toBe("Initial Title");
-  expect(sessionCount).toBe(1);
-  expect(messageCount).toBe(2);
-});
-
-test("addMessage backfills empty title when provided later", async () => {
-  const now = new Date().toISOString();
-  db.prepare(
-    `INSERT INTO memory_sessions (id, title, created_at, updated_at, active) VALUES (?, ?, ?, ?, 1)`
-  ).run("s-empty", "", now, now);
-
-  await addMessage(db, "s-empty", "user", "hello", { title: "Backfilled Title" });
-
-  const session = db
-    .prepare(`SELECT title FROM memory_sessions WHERE id = ?`)
-    .get("s-empty") as { title: string } | null;
-  expect(session?.title).toBe("Backfilled Title");
-});
-
-test("addMessage with sessionId 'new' generates a concrete session id", async () => {
-  const message = await addMessage(db, "new", "user", "hello");
-  expect(message.session_id).not.toBe("new");
-  expect(message.session_id.length).toBe(8);
-
-  const session = db
-    .prepare(`SELECT id FROM memory_sessions WHERE id = ?`)
-    .get(message.session_id) as { id: string } | null;
-  expect(session?.id).toBe(message.session_id);
-});

From f951f9966ceafb4cd2046f4dce641c78ab10af8a Mon Sep 17 00:00:00 2001
From: Ashutosh Tripathi <hello@zero8.dev>
Date: Tue, 10 Mar 2026 15:16:44 +0530
Subject: [PATCH 5/5] chore: regenerate bun.lock after upstream merge

---
 bun.lock | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bun.lock b/bun.lock
index 9cb44a60..da8f9dc6 100644
--- a/bun.lock
+++ b/bun.lock
@@ -22,8 +22,9 @@
       "optionalDependencies": {
         "sqlite-vec-darwin-arm64": "^0.1.7-alpha.2",
         "sqlite-vec-darwin-x64": "^0.1.7-alpha.2",
+        "sqlite-vec-linux-arm64": "^0.1.7-alpha.2",
         "sqlite-vec-linux-x64": "^0.1.7-alpha.2",
-        "sqlite-vec-win32-x64": "^0.1.7-alpha.2",
+        "sqlite-vec-windows-x64": "^0.1.7-alpha.2",
       },
       "peerDependencies": {
         "typescript": "^5.9.3",