From 7ec50b8fce3c372b5adebadb2dd8deec34548427 Mon Sep 17 00:00:00 2001 From: Ashutosh Tripathi Date: Tue, 10 Feb 2026 15:12:54 +0530 Subject: [PATCH 1/5] Add memory and ollama modules Add memory.ts (conversation storage, search, embedding, recall) and ollama.ts (LLM client for summarization and synthesis) to enable Smriti to import QMD as a proper package dependency. Co-Authored-By: Claude Opus 4.6 --- src/memory.ts | 848 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/ollama.ts | 169 ++++++++++ 2 files changed, 1017 insertions(+) create mode 100644 src/memory.ts create mode 100644 src/ollama.ts diff --git a/src/memory.ts b/src/memory.ts new file mode 100644 index 00000000..46a7ff2e --- /dev/null +++ b/src/memory.ts @@ -0,0 +1,848 @@ +/** + * memory.ts - Conversation memory storage & retrieval for QMD + * + * Stores conversation messages in sessions, provides FTS5 + vector search, + * summarization via Ollama, and memory recall for LLM context. + * + * Reuses QMD's existing infrastructure: + * - content_vectors + vectors_vec tables for embeddings + * - hashContent() for content-addressable storage + * - chunkDocumentByTokens() for chunking + * - insertEmbedding() for vector storage + * - BM25 normalization pattern from searchFTS + * - Two-step vector search pattern from searchVec + * - reciprocalRankFusion() for combining results + */ + +import { Database } from "bun:sqlite"; +import { + hashContent, + chunkDocumentByTokens, + insertEmbedding, + reciprocalRankFusion, + getDocid, + type RankedResult, +} from "./store.js"; +import { + getDefaultLlamaCpp, + formatQueryForEmbedding, + formatDocForEmbedding, +} from "./llm.js"; +import { ollamaSummarize, ollamaRecall as ollamaRecallSynthesize } from "./ollama.js"; + +// ============================================================================= +// Types +// ============================================================================= + +export type MemorySession = { + id: string; + title: string; + created_at: string; + updated_at: string; + summary: string | null; + summary_at: string | null; + active: number; +}; + +export type MemoryMessage = { + id: number; + session_id: string; + role: string; + content: string; + hash: string; + created_at: string; + metadata: Record | null; +}; + +export type MemorySearchResult = { + session_id: string; + session_title: string; + message_id: number; + role: string; + content: string; + score: number; + source: "fts" | "vec"; +}; + +// ============================================================================= +// Schema Initialization +// ============================================================================= + +/** + * Create memory tables, indexes, triggers in the QMD database. + * Safe to call multiple times (uses IF NOT EXISTS). + */ +export function initializeMemoryTables(db: Database): void { + // Sessions table + db.exec(` + CREATE TABLE IF NOT EXISTS memory_sessions ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + summary TEXT, + summary_at TEXT, + active INTEGER NOT NULL DEFAULT 1 + ) + `); + + // Messages table + db.exec(` + CREATE TABLE IF NOT EXISTS memory_messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + role TEXT NOT NULL, + content TEXT NOT NULL, + hash TEXT NOT NULL, + created_at TEXT NOT NULL, + metadata TEXT, + FOREIGN KEY (session_id) REFERENCES memory_sessions(id) ON DELETE CASCADE + ) + `); + + db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_session ON memory_messages(session_id)`); + db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_hash ON memory_messages(hash)`); + + // FTS5 for memory search + db.exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5( + session_title, role, content, + tokenize='porter unicode61' + ) + `); + + // Triggers to sync memory_fts + db.exec(` + CREATE TRIGGER IF NOT EXISTS memory_messages_ai AFTER INSERT ON memory_messages + BEGIN + INSERT INTO memory_fts(rowid, session_title, role, content) + SELECT + new.id, + (SELECT title FROM memory_sessions WHERE id = new.session_id), + new.role, + new.content; + END + `); + + db.exec(` + CREATE TRIGGER IF NOT EXISTS memory_messages_ad AFTER DELETE ON memory_messages + BEGIN + DELETE FROM memory_fts WHERE rowid = old.id; + END + `); +} + +// ============================================================================= +// FTS5 Query Building (same pattern as store.ts buildFTS5Query) +// ============================================================================= + +function sanitizeMemoryFTSTerm(term: string): string { + return term.replace(/[^\p{L}\p{N}']/gu, "").toLowerCase(); +} + +function buildMemoryFTS5Query(query: string): string | null { + const terms = query + .split(/\s+/) + .map((t) => sanitizeMemoryFTSTerm(t)) + .filter((t) => t.length > 0); + if (terms.length === 0) return null; + if (terms.length === 1) return `"${terms[0]}"*`; + return terms.map((t) => `"${t}"*`).join(" AND "); +} + +// ============================================================================= +// Session CRUD +// ============================================================================= + +/** + * Create a new memory session. If id is "new", generates a random ID. + */ +export function createSession( + db: Database, + id: string, + title: string = "" +): MemorySession { + const now = new Date().toISOString(); + const sessionId = id === "new" ? crypto.randomUUID().slice(0, 8) : id; + + db.prepare( + `INSERT INTO memory_sessions (id, title, created_at, updated_at, active) VALUES (?, ?, ?, ?, 1)` + ).run(sessionId, title, now, now); + + return { + id: sessionId, + title, + created_at: now, + updated_at: now, + summary: null, + summary_at: null, + active: 1, + }; +} + +/** + * Get a session by ID. + */ +export function getSession(db: Database, id: string): MemorySession | null { + return ( + (db + .prepare(`SELECT * FROM memory_sessions WHERE id = ?`) + .get(id) as MemorySession | null) || null + ); +} + +/** + * List sessions, most recent first. + */ +export function listSessions( + db: Database, + options: { limit?: number; includeInactive?: boolean } = {} +): MemorySession[] { + const limit = options.limit ?? 20; + const where = options.includeInactive ? "" : "WHERE active = 1"; + return db + .prepare( + `SELECT * FROM memory_sessions ${where} ORDER BY updated_at DESC LIMIT ?` + ) + .all(limit) as MemorySession[]; +} + +/** + * Soft-delete a session (set active = 0). If hard = true, permanently delete. + */ +export function deleteSession( + db: Database, + id: string, + hard: boolean = false +): void { + if (hard) { + // Delete messages first (CASCADE should handle, but be explicit) + db.prepare(`DELETE FROM memory_messages WHERE session_id = ?`).run(id); + db.prepare(`DELETE FROM memory_sessions WHERE id = ?`).run(id); + } else { + db.prepare(`UPDATE memory_sessions SET active = 0 WHERE id = ?`).run(id); + } +} + +/** + * Clear all sessions (soft or hard delete). + */ +export function clearAllSessions(db: Database, hard: boolean = false): number { + if (hard) { + const count = ( + db.prepare(`SELECT COUNT(*) as count FROM memory_sessions`).get() as { + count: number; + } + ).count; + db.exec(`DELETE FROM memory_messages`); + db.exec(`DELETE FROM memory_sessions`); + db.exec(`DELETE FROM memory_fts`); + return count; + } else { + const result = db.prepare( + `UPDATE memory_sessions SET active = 0 WHERE active = 1` + ); + return result.run().changes; + } +} + +// ============================================================================= +// Message CRUD +// ============================================================================= + +/** + * Add a message to a session. Creates session if it doesn't exist. + */ +export async function addMessage( + db: Database, + sessionId: string, + role: string, + content: string, + options: { title?: string; metadata?: Record } = {} +): Promise { + const now = new Date().toISOString(); + const hash = await hashContent(content); + + // Ensure session exists (createSession may generate a new ID for "new") + let session = getSession(db, sessionId); + if (!session) { + session = createSession(db, sessionId, options.title || ""); + } else if (options.title && !session.title) { + db.prepare(`UPDATE memory_sessions SET title = ? WHERE id = ?`).run( + options.title, + sessionId + ); + } + + // Use the resolved session ID (may differ from input if "new" was passed) + const resolvedSessionId = session.id; + + const metadataStr = options.metadata + ? JSON.stringify(options.metadata) + : null; + + const result = db + .prepare( + `INSERT INTO memory_messages (session_id, role, content, hash, created_at, metadata) + VALUES (?, ?, ?, ?, ?, ?)` + ) + .run(resolvedSessionId, role, content, hash, now, metadataStr); + + // Update session timestamp + db.prepare(`UPDATE memory_sessions SET updated_at = ? WHERE id = ?`).run( + now, + resolvedSessionId + ); + + return { + id: Number(result.lastInsertRowid), + session_id: resolvedSessionId, + role, + content, + hash, + created_at: now, + metadata: options.metadata || null, + }; +} + +/** + * Get messages for a session, ordered by creation time. + */ +export function getMessages( + db: Database, + sessionId: string, + options: { limit?: number } = {} +): MemoryMessage[] { + let sql = `SELECT * FROM memory_messages WHERE session_id = ? ORDER BY created_at ASC`; + const params: (string | number)[] = [sessionId]; + if (options.limit) { + sql += ` LIMIT ?`; + params.push(options.limit); + } + return db.prepare(sql).all(...params) as MemoryMessage[]; +} + +/** + * Get a formatted transcript for a session. + */ +export function getSessionTranscript( + db: Database, + sessionId: string +): string { + const messages = getMessages(db, sessionId); + return messages.map((m) => `${m.role}: ${m.content}`).join("\n\n"); +} + +// ============================================================================= +// Search +// ============================================================================= + +/** + * Search memory using FTS5 (BM25). Same normalization as store.ts searchFTS. + */ +export function searchMemoryFTS( + db: Database, + query: string, + limit: number = 20 +): MemorySearchResult[] { + const ftsQuery = buildMemoryFTS5Query(query); + if (!ftsQuery) return []; + + const sql = ` + SELECT + m.session_id, + s.title as session_title, + m.id as message_id, + m.role, + m.content, + bm25(memory_fts, 5.0, 1.0, 1.0) as bm25_score + FROM memory_fts f + JOIN memory_messages m ON m.id = f.rowid + JOIN memory_sessions s ON s.id = m.session_id + WHERE memory_fts MATCH ? AND s.active = 1 + ORDER BY bm25_score ASC + LIMIT ? + `; + + const rows = db.prepare(sql).all(ftsQuery, limit) as { + session_id: string; + session_title: string; + message_id: number; + role: string; + content: string; + bm25_score: number; + }[]; + + return rows.map((row) => ({ + session_id: row.session_id, + session_title: row.session_title, + message_id: row.message_id, + role: row.role, + content: row.content, + // Same BM25 normalization as store.ts: 1 / (1 + |score|) + score: 1 / (1 + Math.abs(row.bm25_score)), + source: "fts" as const, + })); +} + +/** + * Search memory using vector similarity. + * Two-step pattern: query vectors_vec first, then JOIN separately. + */ +export async function searchMemoryVec( + db: Database, + query: string, + limit: number = 20 +): Promise { + // Check if vectors_vec table exists + const tableExists = db + .prepare( + `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'` + ) + .get(); + if (!tableExists) return []; + + // Get query embedding + const llm = getDefaultLlamaCpp(); + const formattedQuery = formatQueryForEmbedding(query); + const result = await llm.embed(formattedQuery, { isQuery: true }); + if (!result) return []; + + // Step 1: Get vector matches (no JOINs - sqlite-vec hangs with JOINs) + const vecResults = db + .prepare( + `SELECT hash_seq, distance FROM vectors_vec WHERE embedding MATCH ? AND k = ?` + ) + .all(new Float32Array(result.embedding), limit * 3) as { + hash_seq: string; + distance: number; + }[]; + + if (vecResults.length === 0) return []; + + // Step 2: Match against memory_messages by hash + const hashSeqs = vecResults.map((r) => r.hash_seq); + const distanceMap = new Map(vecResults.map((r) => [r.hash_seq, r.distance])); + + // Extract unique hashes from hash_seq (format: "hash_seq") + const hashes = [ + ...new Set(hashSeqs.map((hs) => hs.split("_").slice(0, -1).join("_"))), + ]; + const hashPlaceholders = hashes.map(() => "?").join(","); + + const docSql = ` + SELECT + m.id as message_id, + m.session_id, + s.title as session_title, + m.role, + m.content, + m.hash, + cv.hash || '_' || cv.seq as hash_seq + FROM memory_messages m + JOIN memory_sessions s ON s.id = m.session_id + JOIN content_vectors cv ON cv.hash = m.hash + WHERE m.hash IN (${hashPlaceholders}) AND s.active = 1 + `; + + const docRows = db.prepare(docSql).all(...hashes) as { + message_id: number; + session_id: string; + session_title: string; + role: string; + content: string; + hash: string; + hash_seq: string; + }[]; + + // Combine with distances, dedupe by message_id + const seen = new Map< + number, + { row: (typeof docRows)[0]; bestDist: number } + >(); + for (const row of docRows) { + const distance = distanceMap.get(row.hash_seq) ?? 1; + const existing = seen.get(row.message_id); + if (!existing || distance < existing.bestDist) { + seen.set(row.message_id, { row, bestDist: distance }); + } + } + + return Array.from(seen.values()) + .sort((a, b) => a.bestDist - b.bestDist) + .slice(0, limit) + .map(({ row, bestDist }) => ({ + session_id: row.session_id, + session_title: row.session_title, + message_id: row.message_id, + role: row.role, + content: row.content, + score: 1 - bestDist, // cosine similarity + source: "vec" as const, + })); +} + +// ============================================================================= +// Embedding +// ============================================================================= + +/** + * Embed unembedded memory messages. + * Reuses existing content_vectors + vectors_vec tables. + * Returns count of newly embedded messages. + */ +export async function embedMemoryMessages( + db: Database, + options: { onProgress?: (done: number, total: number) => void } = {} +): Promise { + // Find messages without embeddings + const unembedded = db + .prepare( + ` + SELECT m.hash, m.content, m.session_id + FROM memory_messages m + LEFT JOIN content_vectors cv ON cv.hash = m.hash AND cv.seq = 0 + WHERE cv.hash IS NULL + GROUP BY m.hash + ` + ) + .all() as { hash: string; content: string; session_id: string }[]; + + if (unembedded.length === 0) return 0; + + const llm = getDefaultLlamaCpp(); + let embedded = 0; + + for (const msg of unembedded) { + // Chunk the message content + const chunks = await chunkDocumentByTokens(msg.content); + + // Ensure vec table exists with correct dimensions + // Get dimension from first embedding + const firstText = formatDocForEmbedding(chunks[0]!.text); + const firstEmbed = await llm.embed(firstText); + if (!firstEmbed) continue; + + const dimensions = firstEmbed.embedding.length; + + // Ensure vectors_vec table exists with correct dimensions + const tableInfo = db + .prepare( + `SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'` + ) + .get() as { sql: string } | null; + if (!tableInfo) { + db.exec( + `CREATE VIRTUAL TABLE vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[${dimensions}] distance_metric=cosine)` + ); + } + + const now = new Date().toISOString(); + + // Insert first chunk embedding + insertEmbedding( + db, + msg.hash, + 0, + chunks[0]!.pos, + new Float32Array(firstEmbed.embedding), + firstEmbed.model, + now + ); + + // Embed remaining chunks + for (let i = 1; i < chunks.length; i++) { + const chunk = chunks[i]!; + const text = formatDocForEmbedding(chunk.text); + const embedResult = await llm.embed(text); + if (embedResult) { + insertEmbedding( + db, + msg.hash, + i, + chunk.pos, + new Float32Array(embedResult.embedding), + embedResult.model, + now + ); + } + } + + embedded++; + options.onProgress?.(embedded, unembedded.length); + } + + return embedded; +} + +// ============================================================================= +// Summarization +// ============================================================================= + +/** + * Summarize a session via Ollama and store the summary. + */ +export async function summarizeSession( + db: Database, + sessionId: string, + options: { model?: string; force?: boolean } = {} +): Promise { + const session = getSession(db, sessionId); + if (!session) throw new Error(`Session not found: ${sessionId}`); + + // Check if already summarized (unless force) + if (session.summary && !options.force) { + return session.summary; + } + + const transcript = getSessionTranscript(db, sessionId); + if (!transcript.trim()) throw new Error(`Session ${sessionId} has no messages`); + + const summary = await ollamaSummarize(transcript, { model: options.model }); + const now = new Date().toISOString(); + + db.prepare( + `UPDATE memory_sessions SET summary = ?, summary_at = ? WHERE id = ?` + ).run(summary, now, sessionId); + + return summary; +} + +/** + * Summarize recent sessions that don't have summaries yet. + * Returns count of sessions summarized. + */ +export async function summarizeRecentSessions( + db: Database, + options: { limit?: number; model?: string } = {} +): Promise { + const limit = options.limit ?? 10; + const sessions = db + .prepare( + `SELECT id FROM memory_sessions WHERE active = 1 AND summary IS NULL ORDER BY updated_at DESC LIMIT ?` + ) + .all(limit) as { id: string }[]; + + let count = 0; + for (const s of sessions) { + try { + await summarizeSession(db, s.id, { model: options.model }); + count++; + } catch { + // Skip sessions that fail to summarize + } + } + return count; +} + +// ============================================================================= +// Recall +// ============================================================================= + +/** + * Recall relevant memories for a query. + * Combines FTS + vector search using RRF, deduplicates by session, + * and optionally synthesizes via Ollama. + */ +export async function recallMemories( + db: Database, + query: string, + options: { + limit?: number; + synthesize?: boolean; + model?: string; + maxTokens?: number; + } = {} +): Promise<{ results: MemorySearchResult[]; synthesis?: string }> { + const limit = options.limit ?? 10; + + // Run FTS and vector search + const ftsResults = searchMemoryFTS(db, query, limit); + let vecResults: MemorySearchResult[] = []; + try { + vecResults = await searchMemoryVec(db, query, limit); + } catch { + // Vector search may fail if no embeddings exist + } + + // Convert to RankedResult format for RRF + const toRanked = (results: MemorySearchResult[]): RankedResult[] => + results.map((r) => ({ + file: `${r.session_id}:${r.message_id}`, + displayPath: r.session_title, + title: r.role, + body: r.content, + score: r.score, + })); + + // Fuse results with RRF + const fused = reciprocalRankFusion( + [toRanked(ftsResults), toRanked(vecResults)], + [1.0, 1.0] + ); + + // Deduplicate by session, keeping best score per session + const sessionSeen = new Map(); + const dedupedResults: MemorySearchResult[] = []; + + for (const r of fused) { + const [sessionId] = r.file.split(":"); + if (!sessionId) continue; + + // Find the original result to preserve all fields + const original = + [...ftsResults, ...vecResults].find( + (o) => `${o.session_id}:${o.message_id}` === r.file + ) || null; + + if (original && !sessionSeen.has(sessionId)) { + sessionSeen.set(sessionId, true); + dedupedResults.push({ ...original, score: r.score }); + } else if (!original && !sessionSeen.has(sessionId)) { + sessionSeen.set(sessionId, true); + dedupedResults.push({ + session_id: sessionId!, + session_title: r.displayPath, + message_id: parseInt(r.file.split(":")[1] || "0"), + role: r.title, + content: r.body, + score: r.score, + source: "fts", + }); + } + } + + const results = dedupedResults.slice(0, limit); + + // Optionally synthesize via Ollama + let synthesis: string | undefined; + if (options.synthesize && results.length > 0) { + const memoriesText = results + .map( + (r) => + `[Session: ${r.session_title || r.session_id}]\n${r.role}: ${r.content}` + ) + .join("\n\n---\n\n"); + + synthesis = await ollamaRecallSynthesize(query, memoriesText, { + model: options.model, + maxTokens: options.maxTokens, + }); + } + + return { results, synthesis }; +} + +// ============================================================================= +// Import +// ============================================================================= + +/** + * Import a conversation transcript from a file. + * Supports 'chat' format (role: content) and 'jsonl' format. + */ +export async function importTranscript( + db: Database, + content: string, + options: { title?: string; format?: "chat" | "jsonl"; sessionId?: string } = {} +): Promise<{ sessionId: string; messageCount: number }> { + const format = options.format ?? "chat"; + const sessionId = options.sessionId || crypto.randomUUID().slice(0, 8); + + let messages: { role: string; content: string }[] = []; + + if (format === "jsonl") { + messages = content + .split("\n") + .filter((line) => line.trim()) + .map((line) => { + const parsed = JSON.parse(line); + return { role: parsed.role || "user", content: parsed.content || "" }; + }); + } else { + // Chat format: "role: content" separated by blank lines + const blocks = content.split(/\n\n+/); + for (const block of blocks) { + const trimmed = block.trim(); + if (!trimmed) continue; + const colonIdx = trimmed.indexOf(":"); + if (colonIdx > 0 && colonIdx < 20) { + const role = trimmed.slice(0, colonIdx).trim().toLowerCase(); + const msgContent = trimmed.slice(colonIdx + 1).trim(); + if (msgContent) { + messages.push({ role, content: msgContent }); + } + } else { + messages.push({ role: "user", content: trimmed }); + } + } + } + + for (const msg of messages) { + await addMessage(db, sessionId, msg.role, msg.content, { + title: options.title, + }); + } + + return { sessionId, messageCount: messages.length }; +} + +// ============================================================================= +// Status +// ============================================================================= + +/** + * Get memory statistics. + */ +export function getMemoryStatus(db: Database): { + sessions: number; + activeSessions: number; + messages: number; + embeddedMessages: number; + summarizedSessions: number; +} { + const sessions = ( + db + .prepare(`SELECT COUNT(*) as count FROM memory_sessions`) + .get() as { count: number } + ).count; + + const activeSessions = ( + db + .prepare( + `SELECT COUNT(*) as count FROM memory_sessions WHERE active = 1` + ) + .get() as { count: number } + ).count; + + const messages = ( + db + .prepare(`SELECT COUNT(*) as count FROM memory_messages`) + .get() as { count: number } + ).count; + + const embeddedMessages = ( + db + .prepare( + `SELECT COUNT(DISTINCT m.hash) as count FROM memory_messages m + JOIN content_vectors cv ON cv.hash = m.hash` + ) + .get() as { count: number } + ).count; + + const summarizedSessions = ( + db + .prepare( + `SELECT COUNT(*) as count FROM memory_sessions WHERE summary IS NOT NULL` + ) + .get() as { count: number } + ).count; + + return { + sessions, + activeSessions, + messages, + embeddedMessages, + summarizedSessions, + }; +} diff --git a/src/ollama.ts b/src/ollama.ts new file mode 100644 index 00000000..0bbbde98 --- /dev/null +++ b/src/ollama.ts @@ -0,0 +1,169 @@ +/** + * ollama.ts - Ollama API client for QMD memory summarization and synthesis + * + * Uses Bun's fetch() to call Ollama's HTTP API. Separate from llm.ts which + * uses node-llama-cpp for embeddings/reranking. + * + * Config via env: + * OLLAMA_HOST - Ollama server URL (default: http://127.0.0.1:11434) + * QMD_MEMORY_MODEL - Model for summarization/synthesis (default: qwen3:8b-tuned) + */ + +// ============================================================================= +// Configuration +// ============================================================================= + +const OLLAMA_HOST = Bun.env.OLLAMA_HOST || "http://127.0.0.1:11434"; +const DEFAULT_MEMORY_MODEL = Bun.env.QMD_MEMORY_MODEL || "qwen3:8b-tuned"; + +// ============================================================================= +// Types +// ============================================================================= + +export type OllamaChatMessage = { + role: "system" | "user" | "assistant"; + content: string; +}; + +export type OllamaChatOptions = { + model?: string; + temperature?: number; + maxTokens?: number; +}; + +export type OllamaChatResponse = { + model: string; + message: OllamaChatMessage; + done: boolean; + total_duration?: number; + eval_count?: number; +}; + +// ============================================================================= +// Core API +// ============================================================================= + +/** + * Send a chat completion request to Ollama. + * Uses stream: false for simple request/response. + */ +export async function ollamaChat( + messages: OllamaChatMessage[], + options: OllamaChatOptions = {} +): Promise { + const model = options.model || DEFAULT_MEMORY_MODEL; + const resp = await fetch(`${OLLAMA_HOST}/api/chat`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model, + messages, + stream: false, + options: { + ...(options.temperature !== undefined && { temperature: options.temperature }), + ...(options.maxTokens !== undefined && { num_predict: options.maxTokens }), + }, + }), + }); + + if (!resp.ok) { + const body = await resp.text(); + throw new Error(`Ollama chat failed (${resp.status}): ${body}`); + } + + return resp.json() as Promise; +} + +/** + * Summarize a conversation transcript via Ollama. + * Returns a concise summary of the conversation. + */ +export async function ollamaSummarize( + transcript: string, + options: OllamaChatOptions = {} +): Promise { + const messages: OllamaChatMessage[] = [ + { + role: "system", + content: + "You are a conversation summarizer. Produce a concise summary of the following conversation. " + + "Focus on key topics discussed, decisions made, questions asked, and solutions provided. " + + "Keep it under 200 words. Output only the summary, no preamble.", + }, + { + role: "user", + content: transcript, + }, + ]; + + const resp = await ollamaChat(messages, { + ...options, + temperature: options.temperature ?? 0.3, + maxTokens: options.maxTokens ?? 512, + }); + + return resp.message.content.trim(); +} + +/** + * Synthesize recalled memories into coherent context via Ollama. + * Takes a query and memory fragments, returns a synthesized response. + */ +export async function ollamaRecall( + query: string, + memories: string, + options: OllamaChatOptions = {} +): Promise { + const messages: OllamaChatMessage[] = [ + { + role: "system", + content: + "You are a memory recall assistant. Given a query and relevant past conversation memories, " + + "synthesize the memories into useful context for answering the query. " + + "Be concise and focus on information directly relevant to the query. " + + "If memories contain contradictory information, note the most recent. " + + "Output only the synthesized context, no preamble.", + }, + { + role: "user", + content: `Query: ${query}\n\nRelevant memories:\n${memories}`, + }, + ]; + + const resp = await ollamaChat(messages, { + ...options, + temperature: options.temperature ?? 0.3, + maxTokens: options.maxTokens ?? 1024, + }); + + return resp.message.content.trim(); +} + +/** + * Check if Ollama is running and accessible. + * Pings the /api/tags endpoint. + */ +export async function ollamaHealthCheck(): Promise<{ + ok: boolean; + models?: string[]; + error?: string; +}> { + try { + const resp = await fetch(`${OLLAMA_HOST}/api/tags`, { + signal: AbortSignal.timeout(5000), + }); + if (!resp.ok) { + return { ok: false, error: `HTTP ${resp.status}` }; + } + const data = (await resp.json()) as { models?: { name: string }[] }; + const models = data.models?.map((m) => m.name) || []; + return { ok: true, models }; + } catch (err) { + return { + ok: false, + error: err instanceof Error ? err.message : String(err), + }; + } +} + +export { DEFAULT_MEMORY_MODEL, OLLAMA_HOST }; From e257bb7b4eeca81b268b091d5ad8e8842f31af5d Mon Sep 17 00:00:00 2001 From: Ashutosh Tripathi Date: Fri, 27 Feb 2026 15:55:41 +0530 Subject: [PATCH 2/5] perf(memory): optimize addMessage session upsert and add coverage --- src/memory.test.ts | 64 +++++++++++++++++++++++++ src/memory.ts | 116 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 159 insertions(+), 21 deletions(-) create mode 100644 src/memory.test.ts diff --git a/src/memory.test.ts b/src/memory.test.ts new file mode 100644 index 00000000..75ad2c74 --- /dev/null +++ b/src/memory.test.ts @@ -0,0 +1,64 @@ +import { afterEach, beforeEach, expect, test } from "bun:test"; +import { Database } from "bun:sqlite"; +import { addMessage, initializeMemoryTables } from "./memory"; + +let db: Database; + +beforeEach(() => { + db = new Database(":memory:"); + db.exec("PRAGMA foreign_keys = ON"); + initializeMemoryTables(db); +}); + +afterEach(() => { + db.close(); +}); + +test("addMessage creates session once and does not overwrite existing title", async () => { + await addMessage(db, "s1", "user", "first", { title: "Initial Title" }); + await addMessage(db, "s1", "assistant", "second", { title: "New Title" }); + + const session = db + .prepare(`SELECT id, title FROM memory_sessions WHERE id = ?`) + .get("s1") as { id: string; title: string } | null; + const sessionCount = ( + db.prepare(`SELECT COUNT(*) AS count FROM memory_sessions WHERE id = ?`).get("s1") as { + count: number; + } + ).count; + const messageCount = ( + db.prepare(`SELECT COUNT(*) AS count FROM memory_messages WHERE session_id = ?`).get("s1") as { + count: number; + } + ).count; + + expect(session).not.toBeNull(); + expect(session?.title).toBe("Initial Title"); + expect(sessionCount).toBe(1); + expect(messageCount).toBe(2); +}); + +test("addMessage backfills empty title when provided later", async () => { + const now = new Date().toISOString(); + db.prepare( + `INSERT INTO memory_sessions (id, title, created_at, updated_at, active) VALUES (?, ?, ?, ?, 1)` + ).run("s-empty", "", now, now); + + await addMessage(db, "s-empty", "user", "hello", { title: "Backfilled Title" }); + + const session = db + .prepare(`SELECT title FROM memory_sessions WHERE id = ?`) + .get("s-empty") as { title: string } | null; + expect(session?.title).toBe("Backfilled Title"); +}); + +test("addMessage with sessionId 'new' generates a concrete session id", async () => { + const message = await addMessage(db, "new", "user", "hello"); + expect(message.session_id).not.toBe("new"); + expect(message.session_id.length).toBe(8); + + const session = db + .prepare(`SELECT id FROM memory_sessions WHERE id = ?`) + .get(message.session_id) as { id: string } | null; + expect(session?.id).toBe(message.session_id); +}); diff --git a/src/memory.ts b/src/memory.ts index 46a7ff2e..8559341d 100644 --- a/src/memory.ts +++ b/src/memory.ts @@ -64,6 +64,14 @@ export type MemorySearchResult = { source: "fts" | "vec"; }; +type RecallTimings = { + ftsMs: number; + vecMs: number; + fuseMs: number; + dedupeMs: number; + totalMs: number; +}; + // ============================================================================= // Schema Initialization // ============================================================================= @@ -102,6 +110,7 @@ export function initializeMemoryTables(db: Database): void { db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_session ON memory_messages(session_id)`); db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_hash ON memory_messages(hash)`); + db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_sessions_active ON memory_sessions(active, id)`); // FTS5 for memory search db.exec(` @@ -263,19 +272,26 @@ export async function addMessage( const now = new Date().toISOString(); const hash = await hashContent(content); - // Ensure session exists (createSession may generate a new ID for "new") - let session = getSession(db, sessionId); - if (!session) { - session = createSession(db, sessionId, options.title || ""); - } else if (options.title && !session.title) { - db.prepare(`UPDATE memory_sessions SET title = ? WHERE id = ?`).run( - options.title, - sessionId - ); + // Preserve "new" behavior, which generates an ID. + let resolvedSessionId = sessionId; + if (sessionId === "new") { + resolvedSessionId = crypto.randomUUID().slice(0, 8); } - // Use the resolved session ID (may differ from input if "new" was passed) - const resolvedSessionId = session.id; + // Ensure session exists without a pre-read on every message. + db.prepare( + `INSERT OR IGNORE INTO memory_sessions (id, title, created_at, updated_at, active) + VALUES (?, ?, ?, ?, 1)` + ).run(resolvedSessionId, options.title || "", now, now); + + // If title is provided later, fill it only when current title is empty. + if (options.title) { + db.prepare( + `UPDATE memory_sessions + SET title = ? + WHERE id = ? AND (title = '' OR title IS NULL)` + ).run(options.title, resolvedSessionId); + } const metadataStr = options.metadata ? JSON.stringify(options.metadata) @@ -347,24 +363,37 @@ export function searchMemoryFTS( ): MemorySearchResult[] { const ftsQuery = buildMemoryFTS5Query(query); if (!ftsQuery) return []; + const candidateLimit = Math.max(limit * 3, limit); + // Rank candidate rowids in FTS first, then join to payload tables. + // This keeps the expensive bm25 ordering on the smallest possible row shape. const sql = ` + WITH ranked AS ( + SELECT + rowid, + bm25(memory_fts, 5.0, 1.0, 1.0) as bm25_score + FROM memory_fts + WHERE memory_fts MATCH ? + ORDER BY bm25_score ASC + LIMIT ? + ) SELECT m.session_id, s.title as session_title, m.id as message_id, m.role, m.content, - bm25(memory_fts, 5.0, 1.0, 1.0) as bm25_score - FROM memory_fts f - JOIN memory_messages m ON m.id = f.rowid + r.bm25_score + FROM ranked r + JOIN memory_messages m ON m.id = r.rowid JOIN memory_sessions s ON s.id = m.session_id - WHERE memory_fts MATCH ? AND s.active = 1 - ORDER BY bm25_score ASC + WHERE s.active = 1 + ORDER BY r.bm25_score ASC LIMIT ? `; - const rows = db.prepare(sql).all(ftsQuery, limit) as { + const stmt = getMemoryFtsStmt(db, sql); + const rows = stmt.all(ftsQuery, candidateLimit, limit) as { session_id: string; session_title: string; message_id: number; @@ -385,6 +414,17 @@ export function searchMemoryFTS( })); } +const memoryFtsStmtCache = new WeakMap>(); + +function getMemoryFtsStmt(db: Database, sql: string) { + let stmt = memoryFtsStmtCache.get(db); + if (!stmt) { + stmt = db.prepare(sql); + memoryFtsStmtCache.set(db, stmt); + } + return stmt; +} + /** * Search memory using vector similarity. * Two-step pattern: query vectors_vec first, then JOIN separately. @@ -654,16 +694,22 @@ export async function recallMemories( maxTokens?: number; } = {} ): Promise<{ results: MemorySearchResult[]; synthesis?: string }> { + const startedAt = performance.now(); + const shouldTraceRecall = process.env.SMRITI_BENCH_TRACE === "1"; const limit = options.limit ?? 10; // Run FTS and vector search + const ftsStartedAt = performance.now(); const ftsResults = searchMemoryFTS(db, query, limit); + const ftsMs = performance.now() - ftsStartedAt; let vecResults: MemorySearchResult[] = []; + const vecStartedAt = performance.now(); try { vecResults = await searchMemoryVec(db, query, limit); } catch { // Vector search may fail if no embeddings exist } + const vecMs = performance.now() - vecStartedAt; // Convert to RankedResult format for RRF const toRanked = (results: MemorySearchResult[]): RankedResult[] => @@ -676,24 +722,33 @@ export async function recallMemories( })); // Fuse results with RRF + const fuseStartedAt = performance.now(); const fused = reciprocalRankFusion( [toRanked(ftsResults), toRanked(vecResults)], [1.0, 1.0] ); + const fuseMs = performance.now() - fuseStartedAt; // Deduplicate by session, keeping best score per session + const dedupeStartedAt = performance.now(); const sessionSeen = new Map(); const dedupedResults: MemorySearchResult[] = []; + const originalByKey = new Map(); + for (const result of ftsResults) { + originalByKey.set(`${result.session_id}:${result.message_id}`, result); + } + for (const result of vecResults) { + const key = `${result.session_id}:${result.message_id}`; + // Prefer vector entry if both are present because it typically carries the better semantic score. + originalByKey.set(key, result); + } for (const r of fused) { const [sessionId] = r.file.split(":"); if (!sessionId) continue; // Find the original result to preserve all fields - const original = - [...ftsResults, ...vecResults].find( - (o) => `${o.session_id}:${o.message_id}` === r.file - ) || null; + const original = originalByKey.get(r.file) ?? null; if (original && !sessionSeen.has(sessionId)) { sessionSeen.set(sessionId, true); @@ -711,6 +766,7 @@ export async function recallMemories( }); } } + const dedupeMs = performance.now() - dedupeStartedAt; const results = dedupedResults.slice(0, limit); @@ -730,6 +786,24 @@ export async function recallMemories( }); } + if (shouldTraceRecall) { + const timings: RecallTimings = { + ftsMs, + vecMs, + fuseMs, + dedupeMs, + totalMs: performance.now() - startedAt, + }; + console.error( + `[recall.trace] q="${query.slice(0, 64)}" ` + + `fts=${timings.ftsMs.toFixed(3)}ms ` + + `vec=${timings.vecMs.toFixed(3)}ms ` + + `fuse=${timings.fuseMs.toFixed(3)}ms ` + + `dedupe=${timings.dedupeMs.toFixed(3)}ms ` + + `total=${timings.totalMs.toFixed(3)}ms` + ); + } + return { results, synthesis }; } From 0274a2c9a3f3d42082ced1fd66f01254b116b723 Mon Sep 17 00:00:00 2001 From: Ashutosh Tripathi Date: Tue, 10 Mar 2026 08:07:50 +0530 Subject: [PATCH 3/5] refactor: move memory.ts + ollama.ts to smriti/src/ These modules are Smriti-specific additions (conversation memory layer and Ollama client) that don't belong in the QMD source tree. Moving them to smriti/src/ means: - qmd/ submodule stays as pure upstream code - Upstream syncs are conflict-free (no smriti code in qmd/src/) - A thin adapter (smriti/src/qmd-internals.ts) is the sole coupling point --- src/memory.ts | 922 -------------------------------------------------- src/ollama.ts | 169 --------- 2 files changed, 1091 deletions(-) delete mode 100644 src/memory.ts delete mode 100644 src/ollama.ts diff --git a/src/memory.ts b/src/memory.ts deleted file mode 100644 index 8559341d..00000000 --- a/src/memory.ts +++ /dev/null @@ -1,922 +0,0 @@ -/** - * memory.ts - Conversation memory storage & retrieval for QMD - * - * Stores conversation messages in sessions, provides FTS5 + vector search, - * summarization via Ollama, and memory recall for LLM context. - * - * Reuses QMD's existing infrastructure: - * - content_vectors + vectors_vec tables for embeddings - * - hashContent() for content-addressable storage - * - chunkDocumentByTokens() for chunking - * - insertEmbedding() for vector storage - * - BM25 normalization pattern from searchFTS - * - Two-step vector search pattern from searchVec - * - reciprocalRankFusion() for combining results - */ - -import { Database } from "bun:sqlite"; -import { - hashContent, - chunkDocumentByTokens, - insertEmbedding, - reciprocalRankFusion, - getDocid, - type RankedResult, -} from "./store.js"; -import { - getDefaultLlamaCpp, - formatQueryForEmbedding, - formatDocForEmbedding, -} from "./llm.js"; -import { ollamaSummarize, ollamaRecall as ollamaRecallSynthesize } from "./ollama.js"; - -// ============================================================================= -// Types -// ============================================================================= - -export type MemorySession = { - id: string; - title: string; - created_at: string; - updated_at: string; - summary: string | null; - summary_at: string | null; - active: number; -}; - -export type MemoryMessage = { - id: number; - session_id: string; - role: string; - content: string; - hash: string; - created_at: string; - metadata: Record | null; -}; - -export type MemorySearchResult = { - session_id: string; - session_title: string; - message_id: number; - role: string; - content: string; - score: number; - source: "fts" | "vec"; -}; - -type RecallTimings = { - ftsMs: number; - vecMs: number; - fuseMs: number; - dedupeMs: number; - totalMs: number; -}; - -// ============================================================================= -// Schema Initialization -// ============================================================================= - -/** - * Create memory tables, indexes, triggers in the QMD database. - * Safe to call multiple times (uses IF NOT EXISTS). - */ -export function initializeMemoryTables(db: Database): void { - // Sessions table - db.exec(` - CREATE TABLE IF NOT EXISTS memory_sessions ( - id TEXT PRIMARY KEY, - title TEXT NOT NULL DEFAULT '', - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL, - summary TEXT, - summary_at TEXT, - active INTEGER NOT NULL DEFAULT 1 - ) - `); - - // Messages table - db.exec(` - CREATE TABLE IF NOT EXISTS memory_messages ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - session_id TEXT NOT NULL, - role TEXT NOT NULL, - content TEXT NOT NULL, - hash TEXT NOT NULL, - created_at TEXT NOT NULL, - metadata TEXT, - FOREIGN KEY (session_id) REFERENCES memory_sessions(id) ON DELETE CASCADE - ) - `); - - db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_session ON memory_messages(session_id)`); - db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_messages_hash ON memory_messages(hash)`); - db.exec(`CREATE INDEX IF NOT EXISTS idx_memory_sessions_active ON memory_sessions(active, id)`); - - // FTS5 for memory search - db.exec(` - CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5( - session_title, role, content, - tokenize='porter unicode61' - ) - `); - - // Triggers to sync memory_fts - db.exec(` - CREATE TRIGGER IF NOT EXISTS memory_messages_ai AFTER INSERT ON memory_messages - BEGIN - INSERT INTO memory_fts(rowid, session_title, role, content) - SELECT - new.id, - (SELECT title FROM memory_sessions WHERE id = new.session_id), - new.role, - new.content; - END - `); - - db.exec(` - CREATE TRIGGER IF NOT EXISTS memory_messages_ad AFTER DELETE ON memory_messages - BEGIN - DELETE FROM memory_fts WHERE rowid = old.id; - END - `); -} - -// ============================================================================= -// FTS5 Query Building (same pattern as store.ts buildFTS5Query) -// ============================================================================= - -function sanitizeMemoryFTSTerm(term: string): string { - return term.replace(/[^\p{L}\p{N}']/gu, "").toLowerCase(); -} - -function buildMemoryFTS5Query(query: string): string | null { - const terms = query - .split(/\s+/) - .map((t) => sanitizeMemoryFTSTerm(t)) - .filter((t) => t.length > 0); - if (terms.length === 0) return null; - if (terms.length === 1) return `"${terms[0]}"*`; - return terms.map((t) => `"${t}"*`).join(" AND "); -} - -// ============================================================================= -// Session CRUD -// ============================================================================= - -/** - * Create a new memory session. If id is "new", generates a random ID. - */ -export function createSession( - db: Database, - id: string, - title: string = "" -): MemorySession { - const now = new Date().toISOString(); - const sessionId = id === "new" ? crypto.randomUUID().slice(0, 8) : id; - - db.prepare( - `INSERT INTO memory_sessions (id, title, created_at, updated_at, active) VALUES (?, ?, ?, ?, 1)` - ).run(sessionId, title, now, now); - - return { - id: sessionId, - title, - created_at: now, - updated_at: now, - summary: null, - summary_at: null, - active: 1, - }; -} - -/** - * Get a session by ID. - */ -export function getSession(db: Database, id: string): MemorySession | null { - return ( - (db - .prepare(`SELECT * FROM memory_sessions WHERE id = ?`) - .get(id) as MemorySession | null) || null - ); -} - -/** - * List sessions, most recent first. - */ -export function listSessions( - db: Database, - options: { limit?: number; includeInactive?: boolean } = {} -): MemorySession[] { - const limit = options.limit ?? 20; - const where = options.includeInactive ? "" : "WHERE active = 1"; - return db - .prepare( - `SELECT * FROM memory_sessions ${where} ORDER BY updated_at DESC LIMIT ?` - ) - .all(limit) as MemorySession[]; -} - -/** - * Soft-delete a session (set active = 0). If hard = true, permanently delete. - */ -export function deleteSession( - db: Database, - id: string, - hard: boolean = false -): void { - if (hard) { - // Delete messages first (CASCADE should handle, but be explicit) - db.prepare(`DELETE FROM memory_messages WHERE session_id = ?`).run(id); - db.prepare(`DELETE FROM memory_sessions WHERE id = ?`).run(id); - } else { - db.prepare(`UPDATE memory_sessions SET active = 0 WHERE id = ?`).run(id); - } -} - -/** - * Clear all sessions (soft or hard delete). - */ -export function clearAllSessions(db: Database, hard: boolean = false): number { - if (hard) { - const count = ( - db.prepare(`SELECT COUNT(*) as count FROM memory_sessions`).get() as { - count: number; - } - ).count; - db.exec(`DELETE FROM memory_messages`); - db.exec(`DELETE FROM memory_sessions`); - db.exec(`DELETE FROM memory_fts`); - return count; - } else { - const result = db.prepare( - `UPDATE memory_sessions SET active = 0 WHERE active = 1` - ); - return result.run().changes; - } -} - -// ============================================================================= -// Message CRUD -// ============================================================================= - -/** - * Add a message to a session. Creates session if it doesn't exist. - */ -export async function addMessage( - db: Database, - sessionId: string, - role: string, - content: string, - options: { title?: string; metadata?: Record } = {} -): Promise { - const now = new Date().toISOString(); - const hash = await hashContent(content); - - // Preserve "new" behavior, which generates an ID. - let resolvedSessionId = sessionId; - if (sessionId === "new") { - resolvedSessionId = crypto.randomUUID().slice(0, 8); - } - - // Ensure session exists without a pre-read on every message. - db.prepare( - `INSERT OR IGNORE INTO memory_sessions (id, title, created_at, updated_at, active) - VALUES (?, ?, ?, ?, 1)` - ).run(resolvedSessionId, options.title || "", now, now); - - // If title is provided later, fill it only when current title is empty. - if (options.title) { - db.prepare( - `UPDATE memory_sessions - SET title = ? - WHERE id = ? AND (title = '' OR title IS NULL)` - ).run(options.title, resolvedSessionId); - } - - const metadataStr = options.metadata - ? JSON.stringify(options.metadata) - : null; - - const result = db - .prepare( - `INSERT INTO memory_messages (session_id, role, content, hash, created_at, metadata) - VALUES (?, ?, ?, ?, ?, ?)` - ) - .run(resolvedSessionId, role, content, hash, now, metadataStr); - - // Update session timestamp - db.prepare(`UPDATE memory_sessions SET updated_at = ? WHERE id = ?`).run( - now, - resolvedSessionId - ); - - return { - id: Number(result.lastInsertRowid), - session_id: resolvedSessionId, - role, - content, - hash, - created_at: now, - metadata: options.metadata || null, - }; -} - -/** - * Get messages for a session, ordered by creation time. - */ -export function getMessages( - db: Database, - sessionId: string, - options: { limit?: number } = {} -): MemoryMessage[] { - let sql = `SELECT * FROM memory_messages WHERE session_id = ? ORDER BY created_at ASC`; - const params: (string | number)[] = [sessionId]; - if (options.limit) { - sql += ` LIMIT ?`; - params.push(options.limit); - } - return db.prepare(sql).all(...params) as MemoryMessage[]; -} - -/** - * Get a formatted transcript for a session. - */ -export function getSessionTranscript( - db: Database, - sessionId: string -): string { - const messages = getMessages(db, sessionId); - return messages.map((m) => `${m.role}: ${m.content}`).join("\n\n"); -} - -// ============================================================================= -// Search -// ============================================================================= - -/** - * Search memory using FTS5 (BM25). Same normalization as store.ts searchFTS. - */ -export function searchMemoryFTS( - db: Database, - query: string, - limit: number = 20 -): MemorySearchResult[] { - const ftsQuery = buildMemoryFTS5Query(query); - if (!ftsQuery) return []; - const candidateLimit = Math.max(limit * 3, limit); - - // Rank candidate rowids in FTS first, then join to payload tables. - // This keeps the expensive bm25 ordering on the smallest possible row shape. - const sql = ` - WITH ranked AS ( - SELECT - rowid, - bm25(memory_fts, 5.0, 1.0, 1.0) as bm25_score - FROM memory_fts - WHERE memory_fts MATCH ? - ORDER BY bm25_score ASC - LIMIT ? - ) - SELECT - m.session_id, - s.title as session_title, - m.id as message_id, - m.role, - m.content, - r.bm25_score - FROM ranked r - JOIN memory_messages m ON m.id = r.rowid - JOIN memory_sessions s ON s.id = m.session_id - WHERE s.active = 1 - ORDER BY r.bm25_score ASC - LIMIT ? - `; - - const stmt = getMemoryFtsStmt(db, sql); - const rows = stmt.all(ftsQuery, candidateLimit, limit) as { - session_id: string; - session_title: string; - message_id: number; - role: string; - content: string; - bm25_score: number; - }[]; - - return rows.map((row) => ({ - session_id: row.session_id, - session_title: row.session_title, - message_id: row.message_id, - role: row.role, - content: row.content, - // Same BM25 normalization as store.ts: 1 / (1 + |score|) - score: 1 / (1 + Math.abs(row.bm25_score)), - source: "fts" as const, - })); -} - -const memoryFtsStmtCache = new WeakMap>(); - -function getMemoryFtsStmt(db: Database, sql: string) { - let stmt = memoryFtsStmtCache.get(db); - if (!stmt) { - stmt = db.prepare(sql); - memoryFtsStmtCache.set(db, stmt); - } - return stmt; -} - -/** - * Search memory using vector similarity. - * Two-step pattern: query vectors_vec first, then JOIN separately. - */ -export async function searchMemoryVec( - db: Database, - query: string, - limit: number = 20 -): Promise { - // Check if vectors_vec table exists - const tableExists = db - .prepare( - `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'` - ) - .get(); - if (!tableExists) return []; - - // Get query embedding - const llm = getDefaultLlamaCpp(); - const formattedQuery = formatQueryForEmbedding(query); - const result = await llm.embed(formattedQuery, { isQuery: true }); - if (!result) return []; - - // Step 1: Get vector matches (no JOINs - sqlite-vec hangs with JOINs) - const vecResults = db - .prepare( - `SELECT hash_seq, distance FROM vectors_vec WHERE embedding MATCH ? AND k = ?` - ) - .all(new Float32Array(result.embedding), limit * 3) as { - hash_seq: string; - distance: number; - }[]; - - if (vecResults.length === 0) return []; - - // Step 2: Match against memory_messages by hash - const hashSeqs = vecResults.map((r) => r.hash_seq); - const distanceMap = new Map(vecResults.map((r) => [r.hash_seq, r.distance])); - - // Extract unique hashes from hash_seq (format: "hash_seq") - const hashes = [ - ...new Set(hashSeqs.map((hs) => hs.split("_").slice(0, -1).join("_"))), - ]; - const hashPlaceholders = hashes.map(() => "?").join(","); - - const docSql = ` - SELECT - m.id as message_id, - m.session_id, - s.title as session_title, - m.role, - m.content, - m.hash, - cv.hash || '_' || cv.seq as hash_seq - FROM memory_messages m - JOIN memory_sessions s ON s.id = m.session_id - JOIN content_vectors cv ON cv.hash = m.hash - WHERE m.hash IN (${hashPlaceholders}) AND s.active = 1 - `; - - const docRows = db.prepare(docSql).all(...hashes) as { - message_id: number; - session_id: string; - session_title: string; - role: string; - content: string; - hash: string; - hash_seq: string; - }[]; - - // Combine with distances, dedupe by message_id - const seen = new Map< - number, - { row: (typeof docRows)[0]; bestDist: number } - >(); - for (const row of docRows) { - const distance = distanceMap.get(row.hash_seq) ?? 1; - const existing = seen.get(row.message_id); - if (!existing || distance < existing.bestDist) { - seen.set(row.message_id, { row, bestDist: distance }); - } - } - - return Array.from(seen.values()) - .sort((a, b) => a.bestDist - b.bestDist) - .slice(0, limit) - .map(({ row, bestDist }) => ({ - session_id: row.session_id, - session_title: row.session_title, - message_id: row.message_id, - role: row.role, - content: row.content, - score: 1 - bestDist, // cosine similarity - source: "vec" as const, - })); -} - -// ============================================================================= -// Embedding -// ============================================================================= - -/** - * Embed unembedded memory messages. - * Reuses existing content_vectors + vectors_vec tables. - * Returns count of newly embedded messages. - */ -export async function embedMemoryMessages( - db: Database, - options: { onProgress?: (done: number, total: number) => void } = {} -): Promise { - // Find messages without embeddings - const unembedded = db - .prepare( - ` - SELECT m.hash, m.content, m.session_id - FROM memory_messages m - LEFT JOIN content_vectors cv ON cv.hash = m.hash AND cv.seq = 0 - WHERE cv.hash IS NULL - GROUP BY m.hash - ` - ) - .all() as { hash: string; content: string; session_id: string }[]; - - if (unembedded.length === 0) return 0; - - const llm = getDefaultLlamaCpp(); - let embedded = 0; - - for (const msg of unembedded) { - // Chunk the message content - const chunks = await chunkDocumentByTokens(msg.content); - - // Ensure vec table exists with correct dimensions - // Get dimension from first embedding - const firstText = formatDocForEmbedding(chunks[0]!.text); - const firstEmbed = await llm.embed(firstText); - if (!firstEmbed) continue; - - const dimensions = firstEmbed.embedding.length; - - // Ensure vectors_vec table exists with correct dimensions - const tableInfo = db - .prepare( - `SELECT sql FROM sqlite_master WHERE type='table' AND name='vectors_vec'` - ) - .get() as { sql: string } | null; - if (!tableInfo) { - db.exec( - `CREATE VIRTUAL TABLE vectors_vec USING vec0(hash_seq TEXT PRIMARY KEY, embedding float[${dimensions}] distance_metric=cosine)` - ); - } - - const now = new Date().toISOString(); - - // Insert first chunk embedding - insertEmbedding( - db, - msg.hash, - 0, - chunks[0]!.pos, - new Float32Array(firstEmbed.embedding), - firstEmbed.model, - now - ); - - // Embed remaining chunks - for (let i = 1; i < chunks.length; i++) { - const chunk = chunks[i]!; - const text = formatDocForEmbedding(chunk.text); - const embedResult = await llm.embed(text); - if (embedResult) { - insertEmbedding( - db, - msg.hash, - i, - chunk.pos, - new Float32Array(embedResult.embedding), - embedResult.model, - now - ); - } - } - - embedded++; - options.onProgress?.(embedded, unembedded.length); - } - - return embedded; -} - -// ============================================================================= -// Summarization -// ============================================================================= - -/** - * Summarize a session via Ollama and store the summary. - */ -export async function summarizeSession( - db: Database, - sessionId: string, - options: { model?: string; force?: boolean } = {} -): Promise { - const session = getSession(db, sessionId); - if (!session) throw new Error(`Session not found: ${sessionId}`); - - // Check if already summarized (unless force) - if (session.summary && !options.force) { - return session.summary; - } - - const transcript = getSessionTranscript(db, sessionId); - if (!transcript.trim()) throw new Error(`Session ${sessionId} has no messages`); - - const summary = await ollamaSummarize(transcript, { model: options.model }); - const now = new Date().toISOString(); - - db.prepare( - `UPDATE memory_sessions SET summary = ?, summary_at = ? WHERE id = ?` - ).run(summary, now, sessionId); - - return summary; -} - -/** - * Summarize recent sessions that don't have summaries yet. - * Returns count of sessions summarized. - */ -export async function summarizeRecentSessions( - db: Database, - options: { limit?: number; model?: string } = {} -): Promise { - const limit = options.limit ?? 10; - const sessions = db - .prepare( - `SELECT id FROM memory_sessions WHERE active = 1 AND summary IS NULL ORDER BY updated_at DESC LIMIT ?` - ) - .all(limit) as { id: string }[]; - - let count = 0; - for (const s of sessions) { - try { - await summarizeSession(db, s.id, { model: options.model }); - count++; - } catch { - // Skip sessions that fail to summarize - } - } - return count; -} - -// ============================================================================= -// Recall -// ============================================================================= - -/** - * Recall relevant memories for a query. - * Combines FTS + vector search using RRF, deduplicates by session, - * and optionally synthesizes via Ollama. - */ -export async function recallMemories( - db: Database, - query: string, - options: { - limit?: number; - synthesize?: boolean; - model?: string; - maxTokens?: number; - } = {} -): Promise<{ results: MemorySearchResult[]; synthesis?: string }> { - const startedAt = performance.now(); - const shouldTraceRecall = process.env.SMRITI_BENCH_TRACE === "1"; - const limit = options.limit ?? 10; - - // Run FTS and vector search - const ftsStartedAt = performance.now(); - const ftsResults = searchMemoryFTS(db, query, limit); - const ftsMs = performance.now() - ftsStartedAt; - let vecResults: MemorySearchResult[] = []; - const vecStartedAt = performance.now(); - try { - vecResults = await searchMemoryVec(db, query, limit); - } catch { - // Vector search may fail if no embeddings exist - } - const vecMs = performance.now() - vecStartedAt; - - // Convert to RankedResult format for RRF - const toRanked = (results: MemorySearchResult[]): RankedResult[] => - results.map((r) => ({ - file: `${r.session_id}:${r.message_id}`, - displayPath: r.session_title, - title: r.role, - body: r.content, - score: r.score, - })); - - // Fuse results with RRF - const fuseStartedAt = performance.now(); - const fused = reciprocalRankFusion( - [toRanked(ftsResults), toRanked(vecResults)], - [1.0, 1.0] - ); - const fuseMs = performance.now() - fuseStartedAt; - - // Deduplicate by session, keeping best score per session - const dedupeStartedAt = performance.now(); - const sessionSeen = new Map(); - const dedupedResults: MemorySearchResult[] = []; - const originalByKey = new Map(); - for (const result of ftsResults) { - originalByKey.set(`${result.session_id}:${result.message_id}`, result); - } - for (const result of vecResults) { - const key = `${result.session_id}:${result.message_id}`; - // Prefer vector entry if both are present because it typically carries the better semantic score. - originalByKey.set(key, result); - } - - for (const r of fused) { - const [sessionId] = r.file.split(":"); - if (!sessionId) continue; - - // Find the original result to preserve all fields - const original = originalByKey.get(r.file) ?? null; - - if (original && !sessionSeen.has(sessionId)) { - sessionSeen.set(sessionId, true); - dedupedResults.push({ ...original, score: r.score }); - } else if (!original && !sessionSeen.has(sessionId)) { - sessionSeen.set(sessionId, true); - dedupedResults.push({ - session_id: sessionId!, - session_title: r.displayPath, - message_id: parseInt(r.file.split(":")[1] || "0"), - role: r.title, - content: r.body, - score: r.score, - source: "fts", - }); - } - } - const dedupeMs = performance.now() - dedupeStartedAt; - - const results = dedupedResults.slice(0, limit); - - // Optionally synthesize via Ollama - let synthesis: string | undefined; - if (options.synthesize && results.length > 0) { - const memoriesText = results - .map( - (r) => - `[Session: ${r.session_title || r.session_id}]\n${r.role}: ${r.content}` - ) - .join("\n\n---\n\n"); - - synthesis = await ollamaRecallSynthesize(query, memoriesText, { - model: options.model, - maxTokens: options.maxTokens, - }); - } - - if (shouldTraceRecall) { - const timings: RecallTimings = { - ftsMs, - vecMs, - fuseMs, - dedupeMs, - totalMs: performance.now() - startedAt, - }; - console.error( - `[recall.trace] q="${query.slice(0, 64)}" ` + - `fts=${timings.ftsMs.toFixed(3)}ms ` + - `vec=${timings.vecMs.toFixed(3)}ms ` + - `fuse=${timings.fuseMs.toFixed(3)}ms ` + - `dedupe=${timings.dedupeMs.toFixed(3)}ms ` + - `total=${timings.totalMs.toFixed(3)}ms` - ); - } - - return { results, synthesis }; -} - -// ============================================================================= -// Import -// ============================================================================= - -/** - * Import a conversation transcript from a file. - * Supports 'chat' format (role: content) and 'jsonl' format. - */ -export async function importTranscript( - db: Database, - content: string, - options: { title?: string; format?: "chat" | "jsonl"; sessionId?: string } = {} -): Promise<{ sessionId: string; messageCount: number }> { - const format = options.format ?? "chat"; - const sessionId = options.sessionId || crypto.randomUUID().slice(0, 8); - - let messages: { role: string; content: string }[] = []; - - if (format === "jsonl") { - messages = content - .split("\n") - .filter((line) => line.trim()) - .map((line) => { - const parsed = JSON.parse(line); - return { role: parsed.role || "user", content: parsed.content || "" }; - }); - } else { - // Chat format: "role: content" separated by blank lines - const blocks = content.split(/\n\n+/); - for (const block of blocks) { - const trimmed = block.trim(); - if (!trimmed) continue; - const colonIdx = trimmed.indexOf(":"); - if (colonIdx > 0 && colonIdx < 20) { - const role = trimmed.slice(0, colonIdx).trim().toLowerCase(); - const msgContent = trimmed.slice(colonIdx + 1).trim(); - if (msgContent) { - messages.push({ role, content: msgContent }); - } - } else { - messages.push({ role: "user", content: trimmed }); - } - } - } - - for (const msg of messages) { - await addMessage(db, sessionId, msg.role, msg.content, { - title: options.title, - }); - } - - return { sessionId, messageCount: messages.length }; -} - -// ============================================================================= -// Status -// ============================================================================= - -/** - * Get memory statistics. - */ -export function getMemoryStatus(db: Database): { - sessions: number; - activeSessions: number; - messages: number; - embeddedMessages: number; - summarizedSessions: number; -} { - const sessions = ( - db - .prepare(`SELECT COUNT(*) as count FROM memory_sessions`) - .get() as { count: number } - ).count; - - const activeSessions = ( - db - .prepare( - `SELECT COUNT(*) as count FROM memory_sessions WHERE active = 1` - ) - .get() as { count: number } - ).count; - - const messages = ( - db - .prepare(`SELECT COUNT(*) as count FROM memory_messages`) - .get() as { count: number } - ).count; - - const embeddedMessages = ( - db - .prepare( - `SELECT COUNT(DISTINCT m.hash) as count FROM memory_messages m - JOIN content_vectors cv ON cv.hash = m.hash` - ) - .get() as { count: number } - ).count; - - const summarizedSessions = ( - db - .prepare( - `SELECT COUNT(*) as count FROM memory_sessions WHERE summary IS NOT NULL` - ) - .get() as { count: number } - ).count; - - return { - sessions, - activeSessions, - messages, - embeddedMessages, - summarizedSessions, - }; -} diff --git a/src/ollama.ts b/src/ollama.ts deleted file mode 100644 index 0bbbde98..00000000 --- a/src/ollama.ts +++ /dev/null @@ -1,169 +0,0 @@ -/** - * ollama.ts - Ollama API client for QMD memory summarization and synthesis - * - * Uses Bun's fetch() to call Ollama's HTTP API. Separate from llm.ts which - * uses node-llama-cpp for embeddings/reranking. - * - * Config via env: - * OLLAMA_HOST - Ollama server URL (default: http://127.0.0.1:11434) - * QMD_MEMORY_MODEL - Model for summarization/synthesis (default: qwen3:8b-tuned) - */ - -// ============================================================================= -// Configuration -// ============================================================================= - -const OLLAMA_HOST = Bun.env.OLLAMA_HOST || "http://127.0.0.1:11434"; -const DEFAULT_MEMORY_MODEL = Bun.env.QMD_MEMORY_MODEL || "qwen3:8b-tuned"; - -// ============================================================================= -// Types -// ============================================================================= - -export type OllamaChatMessage = { - role: "system" | "user" | "assistant"; - content: string; -}; - -export type OllamaChatOptions = { - model?: string; - temperature?: number; - maxTokens?: number; -}; - -export type OllamaChatResponse = { - model: string; - message: OllamaChatMessage; - done: boolean; - total_duration?: number; - eval_count?: number; -}; - -// ============================================================================= -// Core API -// ============================================================================= - -/** - * Send a chat completion request to Ollama. - * Uses stream: false for simple request/response. - */ -export async function ollamaChat( - messages: OllamaChatMessage[], - options: OllamaChatOptions = {} -): Promise { - const model = options.model || DEFAULT_MEMORY_MODEL; - const resp = await fetch(`${OLLAMA_HOST}/api/chat`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - model, - messages, - stream: false, - options: { - ...(options.temperature !== undefined && { temperature: options.temperature }), - ...(options.maxTokens !== undefined && { num_predict: options.maxTokens }), - }, - }), - }); - - if (!resp.ok) { - const body = await resp.text(); - throw new Error(`Ollama chat failed (${resp.status}): ${body}`); - } - - return resp.json() as Promise; -} - -/** - * Summarize a conversation transcript via Ollama. - * Returns a concise summary of the conversation. - */ -export async function ollamaSummarize( - transcript: string, - options: OllamaChatOptions = {} -): Promise { - const messages: OllamaChatMessage[] = [ - { - role: "system", - content: - "You are a conversation summarizer. Produce a concise summary of the following conversation. " + - "Focus on key topics discussed, decisions made, questions asked, and solutions provided. " + - "Keep it under 200 words. Output only the summary, no preamble.", - }, - { - role: "user", - content: transcript, - }, - ]; - - const resp = await ollamaChat(messages, { - ...options, - temperature: options.temperature ?? 0.3, - maxTokens: options.maxTokens ?? 512, - }); - - return resp.message.content.trim(); -} - -/** - * Synthesize recalled memories into coherent context via Ollama. - * Takes a query and memory fragments, returns a synthesized response. - */ -export async function ollamaRecall( - query: string, - memories: string, - options: OllamaChatOptions = {} -): Promise { - const messages: OllamaChatMessage[] = [ - { - role: "system", - content: - "You are a memory recall assistant. Given a query and relevant past conversation memories, " + - "synthesize the memories into useful context for answering the query. " + - "Be concise and focus on information directly relevant to the query. " + - "If memories contain contradictory information, note the most recent. " + - "Output only the synthesized context, no preamble.", - }, - { - role: "user", - content: `Query: ${query}\n\nRelevant memories:\n${memories}`, - }, - ]; - - const resp = await ollamaChat(messages, { - ...options, - temperature: options.temperature ?? 0.3, - maxTokens: options.maxTokens ?? 1024, - }); - - return resp.message.content.trim(); -} - -/** - * Check if Ollama is running and accessible. - * Pings the /api/tags endpoint. - */ -export async function ollamaHealthCheck(): Promise<{ - ok: boolean; - models?: string[]; - error?: string; -}> { - try { - const resp = await fetch(`${OLLAMA_HOST}/api/tags`, { - signal: AbortSignal.timeout(5000), - }); - if (!resp.ok) { - return { ok: false, error: `HTTP ${resp.status}` }; - } - const data = (await resp.json()) as { models?: { name: string }[] }; - const models = data.models?.map((m) => m.name) || []; - return { ok: true, models }; - } catch (err) { - return { - ok: false, - error: err instanceof Error ? err.message : String(err), - }; - } -} - -export { DEFAULT_MEMORY_MODEL, OLLAMA_HOST }; From 14b1ce06ae1d59df249e7687dc8d71932befe298 Mon Sep 17 00:00:00 2001 From: Ashutosh Tripathi Date: Tue, 10 Mar 2026 08:19:57 +0530 Subject: [PATCH 4/5] refactor: remove memory.test.ts (tests moved to smriti/test/) --- src/memory.test.ts | 64 ---------------------------------------------- 1 file changed, 64 deletions(-) delete mode 100644 src/memory.test.ts diff --git a/src/memory.test.ts b/src/memory.test.ts deleted file mode 100644 index 75ad2c74..00000000 --- a/src/memory.test.ts +++ /dev/null @@ -1,64 +0,0 @@ -import { afterEach, beforeEach, expect, test } from "bun:test"; -import { Database } from "bun:sqlite"; -import { addMessage, initializeMemoryTables } from "./memory"; - -let db: Database; - -beforeEach(() => { - db = new Database(":memory:"); - db.exec("PRAGMA foreign_keys = ON"); - initializeMemoryTables(db); -}); - -afterEach(() => { - db.close(); -}); - -test("addMessage creates session once and does not overwrite existing title", async () => { - await addMessage(db, "s1", "user", "first", { title: "Initial Title" }); - await addMessage(db, "s1", "assistant", "second", { title: "New Title" }); - - const session = db - .prepare(`SELECT id, title FROM memory_sessions WHERE id = ?`) - .get("s1") as { id: string; title: string } | null; - const sessionCount = ( - db.prepare(`SELECT COUNT(*) AS count FROM memory_sessions WHERE id = ?`).get("s1") as { - count: number; - } - ).count; - const messageCount = ( - db.prepare(`SELECT COUNT(*) AS count FROM memory_messages WHERE session_id = ?`).get("s1") as { - count: number; - } - ).count; - - expect(session).not.toBeNull(); - expect(session?.title).toBe("Initial Title"); - expect(sessionCount).toBe(1); - expect(messageCount).toBe(2); -}); - -test("addMessage backfills empty title when provided later", async () => { - const now = new Date().toISOString(); - db.prepare( - `INSERT INTO memory_sessions (id, title, created_at, updated_at, active) VALUES (?, ?, ?, ?, 1)` - ).run("s-empty", "", now, now); - - await addMessage(db, "s-empty", "user", "hello", { title: "Backfilled Title" }); - - const session = db - .prepare(`SELECT title FROM memory_sessions WHERE id = ?`) - .get("s-empty") as { title: string } | null; - expect(session?.title).toBe("Backfilled Title"); -}); - -test("addMessage with sessionId 'new' generates a concrete session id", async () => { - const message = await addMessage(db, "new", "user", "hello"); - expect(message.session_id).not.toBe("new"); - expect(message.session_id.length).toBe(8); - - const session = db - .prepare(`SELECT id FROM memory_sessions WHERE id = ?`) - .get(message.session_id) as { id: string } | null; - expect(session?.id).toBe(message.session_id); -}); From f951f9966ceafb4cd2046f4dce641c78ab10af8a Mon Sep 17 00:00:00 2001 From: Ashutosh Tripathi Date: Tue, 10 Mar 2026 15:16:44 +0530 Subject: [PATCH 5/5] chore: regenerate bun.lock after upstream merge --- bun.lock | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bun.lock b/bun.lock index 9cb44a60..da8f9dc6 100644 --- a/bun.lock +++ b/bun.lock @@ -22,8 +22,9 @@ "optionalDependencies": { "sqlite-vec-darwin-arm64": "^0.1.7-alpha.2", "sqlite-vec-darwin-x64": "^0.1.7-alpha.2", + "sqlite-vec-linux-arm64": "^0.1.7-alpha.2", "sqlite-vec-linux-x64": "^0.1.7-alpha.2", - "sqlite-vec-win32-x64": "^0.1.7-alpha.2", + "sqlite-vec-windows-x64": "^0.1.7-alpha.2", }, "peerDependencies": { "typescript": "^5.9.3",