diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b74542..12a3280 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,15 @@ `Shell`, `ReadFile`, and `WriteFile`, and maps hidden managed Kimi Code model aliases to priced Kimi K2 entries. +### Fixed (CLI) +- **Cursor agentKv timestamps no longer use database mtime.** Cursor agentKv + rows now require an internal timestamp (`createdAt`, `timestamp`, or `time`) + before CodeBurn reports usage for that session. Rows without an internal + timestamp are skipped instead of being attributed to the mutable SQLite file + modification time, preventing historical Cursor usage from appearing under + today's date. Cursor result cache version bumped to recompute older cached + entries. Closes #325. + ## 0.9.9 - 2026-05-15 ### Added (CLI) diff --git a/src/cursor-cache.ts b/src/cursor-cache.ts index 390dcfa..ebc187f 100644 --- a/src/cursor-cache.ts +++ b/src/cursor-cache.ts @@ -5,13 +5,11 @@ import { randomBytes } from 'crypto' import type { ParsedProviderCall } from './providers/types.js' -// Bumped to 3 for the workspace-aware breakdown change: the cursor parser -// now derives `sessionId` from the bubble row key (the real composer id) -// rather than the empty `conversationId` JSON field, and the workspace -// router relies on those composer ids to bucket calls per project. -// Version 2 caches contain `sessionId: 'unknown'` for every call and would -// route everything to the orphan project, so we invalidate them. -const CURSOR_CACHE_VERSION = 3 +// Bumped to 4 for the Cursor timestamp hardening: agentKv calls now require +// an internal row timestamp instead of using the mutable SQLite database mtime. +// Version 3 caches can contain historical agentKv calls bucketed under the +// database modification day, so they must be invalidated. +const CURSOR_CACHE_VERSION = 4 type ResultCache = { version?: number @@ -23,7 +21,7 @@ type ResultCache = { const CACHE_FILE = 'cursor-results.json' function getCacheDir(): string { - return join(homedir(), '.cache', 'codeburn') + return process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn') } function getCachePath(): string { diff --git a/src/providers/cursor.ts b/src/providers/cursor.ts index ebd7f91..1083470 100644 --- a/src/providers/cursor.ts +++ b/src/providers/cursor.ts @@ -1,4 +1,4 @@ -import { existsSync, statSync, readdirSync, readFileSync } from 'fs' +import { existsSync, readdirSync, readFileSync } from 'fs' import { join } from 'path' import { homedir } from 'os' @@ -44,6 +44,7 @@ type AgentKvRow = { role: string | null content: Uint8Array | string | null request_id: string | null + created_at: string | number | null content_length: number } @@ -305,6 +306,11 @@ const AGENTKV_QUERY = ` json_extract(value, '$.role') as role, CAST(json_extract(value, '$.content') AS BLOB) as content, json_extract(value, '$.providerOptions.cursor.requestId') as request_id, + COALESCE( + json_extract(value, '$.createdAt'), + json_extract(value, '$.timestamp'), + json_extract(value, '$.time') + ) as created_at, length(value) as content_length FROM cursorDiskKV WHERE key LIKE 'agentKv:blob:%' @@ -547,20 +553,18 @@ function extractTextLength(content: AgentKvContent[]): number { return total } -function parseAgentKv(db: SqliteDatabase, seenKeys: Set, dbPath: string): { calls: ParsedProviderCall[] } { - const results: ParsedProviderCall[] = [] +function parseCursorTimestamp(raw: string | number | null | undefined): string | null { + if (raw === null || raw === undefined || raw === '') return null + const numeric = typeof raw === 'string' && /^\d+$/.test(raw.trim()) ? Number(raw) : raw + const date = typeof numeric === 'number' && numeric < 1_000_000_000_000 + ? new Date(numeric * 1000) + : new Date(numeric) + if (Number.isNaN(date.getTime())) return null + return date.toISOString() +} - // Cursor's agentKv schema does not record per-message timestamps. Use the - // SQLite file's mtime as a bounded "last write" timestamp for all calls; - // it's at least honest (no future time, no always-now). Users running - // codeburn against an idle Cursor install will see agentKv calls land at - // the actual last activity time rather than today's date. - let agentKvTimestamp: string - try { - agentKvTimestamp = new Date(statSync(dbPath).mtimeMs).toISOString() - } catch { - agentKvTimestamp = new Date().toISOString() - } +function parseAgentKv(db: SqliteDatabase, seenKeys: Set): { calls: ParsedProviderCall[] } { + const results: ParsedProviderCall[] = [] let rows: AgentKvRow[] try { @@ -569,9 +573,10 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set, dbPath: string) return { calls: results } } - const sessions: Map = new Map() + const sessions: Map = new Map() let currentRequestId = 'unknown' let turnIndex = 0 + let skippedMissingTimestamp = 0 for (const row of rows) { if (!row.role || !row.content) continue @@ -600,10 +605,12 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set, dbPath: string) const textLength = plainTextLength || extractTextLength(content) const model = extractModelFromContent(content) + const timestamp = parseCursorTimestamp(row.created_at) if (row.role === 'user') { - const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' } + const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '', timestamp: null } existing.inputChars += textLength + if (!existing.timestamp && timestamp) existing.timestamp = timestamp if (!existing.userText) { const text = content[0]?.text ?? contentText const queryMatch = text.match(/([\s\S]*?)<\/user_query>/) @@ -611,19 +618,25 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set, dbPath: string) } sessions.set(requestId, existing) } else if (row.role === 'assistant') { - const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' } + const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '', timestamp: null } existing.outputChars += textLength + if (!existing.timestamp && timestamp) existing.timestamp = timestamp if (model) existing.model = model sessions.set(requestId, existing) } else if (row.role === 'tool' || row.role === 'system') { - const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' } + const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '', timestamp: null } existing.inputChars += textLength + if (!existing.timestamp && timestamp) existing.timestamp = timestamp sessions.set(requestId, existing) } } for (const [requestId, session] of sessions) { if (session.inputChars === 0 && session.outputChars === 0) continue + if (!session.timestamp) { + skippedMissingTimestamp += 1 + continue + } const inputTokens = Math.ceil(session.inputChars / CHARS_PER_TOKEN) const outputTokens = Math.ceil(session.outputChars / CHARS_PER_TOKEN) @@ -649,7 +662,7 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set, dbPath: string) costUSD, tools: [], bashCommands: [], - timestamp: agentKvTimestamp, + timestamp: session.timestamp, speed: 'standard', deduplicationKey: dedupKey, userMessage: session.userText, @@ -657,6 +670,10 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set, dbPath: string) }) } + if (skippedMissingTimestamp > 0) { + process.stderr.write(`codeburn: skipped ${skippedMissingTimestamp} Cursor agentKv sessions without internal timestamps\n`) + } + return { calls: results } } @@ -720,7 +737,7 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars // about to drop. Cross-source dedup happens at yield time. const localSeen = new Set() const { calls: bubbleCalls } = parseBubbles(db, localSeen) - const { calls: agentKvCalls } = parseAgentKv(db, localSeen, dbPath) + const { calls: agentKvCalls } = parseAgentKv(db, localSeen) allCalls = [...bubbleCalls, ...agentKvCalls] await writeCachedResults(dbPath, allCalls) } finally { diff --git a/tests/providers/cursor-agentkv-timestamp.test.ts b/tests/providers/cursor-agentkv-timestamp.test.ts new file mode 100644 index 0000000..26fd067 --- /dev/null +++ b/tests/providers/cursor-agentkv-timestamp.test.ts @@ -0,0 +1,151 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import { mkdtemp, rm, utimes, writeFile } from 'fs/promises' +import { join } from 'path' +import { tmpdir } from 'os' +import { createRequire } from 'node:module' + +import { createCursorProvider } from '../../src/providers/cursor.js' +import { isSqliteAvailable } from '../../src/sqlite.js' +import type { ParsedProviderCall } from '../../src/providers/types.js' + +const requireForTest = createRequire(import.meta.url) +const skipUnlessSqlite = isSqliteAvailable() ? describe : describe.skip + +let tmpDir: string +let oldCacheDir: string | undefined + +beforeEach(async () => { + tmpDir = await mkdtemp(join(tmpdir(), 'cursor-agentkv-timestamp-')) + oldCacheDir = process.env['CODEBURN_CACHE_DIR'] + process.env['CODEBURN_CACHE_DIR'] = join(tmpDir, 'cache') +}) + +afterEach(async () => { + if (oldCacheDir === undefined) { + delete process.env['CODEBURN_CACHE_DIR'] + } else { + process.env['CODEBURN_CACHE_DIR'] = oldCacheDir + } + await rm(tmpDir, { recursive: true, force: true }) +}) + +function agentKvValue(opts: { + role: 'user' | 'assistant' + text: string + requestId: string + createdAt?: string | number + modelName?: string +}): string { + return JSON.stringify({ + role: opts.role, + ...(opts.createdAt ? { createdAt: opts.createdAt } : {}), + providerOptions: { cursor: { requestId: opts.requestId } }, + content: [{ + text: opts.text, + ...(opts.modelName ? { providerOptions: { cursor: { modelName: opts.modelName } } } : {}), + }], + }) +} + +async function createAgentKvDb(rows: Array<{ key: string; value: string }>): Promise { + const dbPath = join(tmpDir, 'state.vscdb') + await writeFile(dbPath, '') + const { DatabaseSync: Database } = requireForTest('node:sqlite') as { + DatabaseSync: new (path: string) => { + exec(sql: string): void + prepare(sql: string): { run(...params: unknown[]): void } + close(): void + } + } + const db = new Database(dbPath) + db.exec('CREATE TABLE cursorDiskKV (key TEXT PRIMARY KEY, value BLOB)') + const insert = db.prepare('INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)') + for (const row of rows) insert.run(row.key, row.value) + db.close() + return dbPath +} + +async function collectCursorCalls(dbPath: string): Promise { + const provider = createCursorProvider(dbPath) + const source = { path: dbPath, project: 'cursor', provider: 'cursor' } + const calls: ParsedProviderCall[] = [] + for await (const call of provider.createSessionParser(source, new Set()).parse()) calls.push(call) + return calls +} + +skipUnlessSqlite('cursor agentKv timestamps', () => { + it('skips agentKv sessions without internal timestamps instead of using database mtime', async () => { + const dbPath = await createAgentKvDb([ + { + key: 'agentKv:blob:req-1:user', + value: agentKvValue({ role: 'user', requestId: 'req-1', text: 'old task' }), + }, + { + key: 'agentKv:blob:req-1:assistant', + value: agentKvValue({ role: 'assistant', requestId: 'req-1', text: 'old answer', modelName: 'gpt-5' }), + }, + ]) + await utimes(dbPath, new Date('2099-01-01T00:00:00.000Z'), new Date('2099-01-01T00:00:00.000Z')) + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true) + + try { + const calls = await collectCursorCalls(dbPath) + + expect(calls).toHaveLength(0) + expect(String(stderrSpy.mock.calls.at(-1)?.[0] ?? '')).toContain('without internal timestamps') + } finally { + stderrSpy.mockRestore() + } + }) + + it('uses agentKv internal createdAt when present', async () => { + const createdAt = '2025-01-02T03:04:05.000Z' + const dbPath = await createAgentKvDb([ + { + key: 'agentKv:blob:req-2:user', + value: agentKvValue({ role: 'user', requestId: 'req-2', text: 'old task', createdAt }), + }, + { + key: 'agentKv:blob:req-2:assistant', + value: agentKvValue({ role: 'assistant', requestId: 'req-2', text: 'old answer', modelName: 'gpt-5', createdAt }), + }, + ]) + await utimes(dbPath, new Date('2099-01-01T00:00:00.000Z'), new Date('2099-01-01T00:00:00.000Z')) + + const calls = await collectCursorCalls(dbPath) + + expect(calls).toHaveLength(1) + expect(calls[0]!.timestamp).toBe(createdAt) + expect(calls[0]!.deduplicationKey).toBe('cursor:agentKv:req-2') + expect(calls[0]!.model).toBe('gpt-5') + }) + + it('accepts numeric agentKv timestamps stored as JSON strings', async () => { + const dbPath = await createAgentKvDb([ + { + key: 'agentKv:blob:req-3:user', + value: agentKvValue({ + role: 'user', + requestId: 'req-3', + text: 'old task', + createdAt: '1735787045', + }), + }, + { + key: 'agentKv:blob:req-3:assistant', + value: agentKvValue({ + role: 'assistant', + requestId: 'req-3', + text: 'old answer', + modelName: 'gpt-5', + createdAt: '1735787045', + }), + }, + ]) + + const calls = await collectCursorCalls(dbPath) + + expect(calls).toHaveLength(1) + expect(calls[0]!.timestamp).toBe('2025-01-02T03:04:05.000Z') + }) +})