diff --git a/.changeset/savings-storage-at-rest-detach-from-savings-report.md b/.changeset/savings-storage-at-rest-detach-from-savings-report.md new file mode 100644 index 0000000..a3ae843 --- /dev/null +++ b/.changeset/savings-storage-at-rest-detach-from-savings-report.md @@ -0,0 +1,9 @@ +--- +'@colony/core': patch +--- + +Stop attributing the storage-at-rest compression claim to live `savings_report` calls + +The `Storage at rest (per observation)` reference row used to map to `['savings_report']`. Live `savings_report` output is structured JSON (~3.5k tokens per call) where the caveman compressor preserves technical tokens byte-for-byte, so the live comparison projected the row's 1k-token baseline against ~3.5k actual tokens and reported negative savings (e.g. `-155%`). + +The row stays in the static reference — caveman compression really does shrink prose observations on disk — but it is now a structural claim about the storage layer rather than a per-call cost, so `mcp_operations` is empty. `savings_report` calls now show up under `unmatched_operations` instead of inflating the row. diff --git a/apps/mcp-server/test/task-threads.test.ts b/apps/mcp-server/test/task-threads.test.ts index ce54598..987ce54 100644 --- a/apps/mcp-server/test/task-threads.test.ts +++ b/apps/mcp-server/test/task-threads.test.ts @@ -1554,7 +1554,9 @@ describe('task_claim_file — protected-branch guard', () => { arguments: { task_id: thread.task_id, session_id: 'S1', file_path: '/repo/src/index.ts' }, }); expect(res.isError).toBe(true); - const body = JSON.parse((res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}'); + const body = JSON.parse( + (res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}', + ); expect(body.code).toBe(TASK_THREAD_ERROR_CODES.PROTECTED_BRANCH_CLAIM_REJECTED); // No claim row written. expect(guardedStore.storage.getClaim(thread.task_id, '/repo/src/index.ts')).toBeFalsy(); @@ -1572,7 +1574,9 @@ describe('task_claim_file — protected-branch guard', () => { arguments: { task_id: thread.task_id, session_id: 'S2', file_path: '/repo/src/index.ts' }, }); expect(res.isError).toBeFalsy(); - const body = JSON.parse((res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}'); + const body = JSON.parse( + (res.content as Array<{ type: string; text: string }>)[0]?.text ?? '{}', + ); expect(body.claim_status).toBe('claimed'); }); }); diff --git a/packages/core/src/savings-reference.ts b/packages/core/src/savings-reference.ts index 73f7594..32f474b 100644 --- a/packages/core/src/savings-reference.ts +++ b/packages/core/src/savings-reference.ts @@ -228,8 +228,8 @@ export const SAVINGS_REFERENCE_ROWS: ReadonlyArray = [ 1, 1_000, 300, - 'caveman compression preserves technical tokens byte-for-byte', - ['savings_report'], + 'caveman compression keeps prose observations small on disk; this is a structural claim about the storage layer, not the cost of any single MCP call, so no live operation is mapped (savings_report itself emits structured JSON that is mostly preserved-as-is technical tokens, which made it a misleading proxy)', + [], ), row( 'Plan publication & goal anchoring', diff --git a/packages/core/test/savings-reference.test.ts b/packages/core/test/savings-reference.test.ts index 2841874..e95e784 100644 --- a/packages/core/test/savings-reference.test.ts +++ b/packages/core/test/savings-reference.test.ts @@ -1,5 +1,9 @@ import { describe, expect, it } from 'vitest'; -import { savingsLiveComparison, savingsLiveComparisonCost } from '../src/savings-reference.js'; +import { + SAVINGS_REFERENCE_ROWS, + savingsLiveComparison, + savingsLiveComparisonCost, +} from '../src/savings-reference.js'; describe('savings reference receipts', () => { it('estimates USD saved from matched live mcp_metrics costs', () => { @@ -49,4 +53,33 @@ describe('savings reference receipts', () => { saved_cost_usd: 0.097, }); }); + + it('does not attribute structured-output savings_report calls to the at-rest compression row', () => { + // Regression: the "Storage at rest (per observation)" row used to map to + // ['savings_report'], which produced negative live savings because + // savings_report emits ~3.5k tokens of structured JSON per call against a + // 1k baseline. The row stays in the static reference (compression is real + // for prose observations) but no live MCP operation should be mapped to + // it. + const storageAtRestRow = SAVINGS_REFERENCE_ROWS.find( + (r) => r.operation === 'Storage at rest (per observation)', + ); + expect(storageAtRestRow).toBeDefined(); + expect(storageAtRestRow?.mcp_operations).toEqual([]); + + const comparison = savingsLiveComparison([ + { + operation: 'savings_report', + calls: 2, + total_tokens: 5100, + last_ts: 1, + }, + ]); + expect( + comparison.rows.find((r) => r.operation === 'Storage at rest (per observation)'), + ).toBeUndefined(); + expect( + comparison.unmatched_operations.find((u) => u.operation === 'savings_report'), + ).toMatchObject({ operation: 'savings_report', calls: 2, colony_tokens: 5100 }); + }); });