From 55df71975007a7914405c20a6e7d6e9eb359374e Mon Sep 17 00:00:00 2001 From: Suresh Date: Sat, 16 May 2026 09:48:25 +0530 Subject: [PATCH] Stream large database exports --- src/export/csv.test.ts | 161 ++++++++++++++--------- src/export/csv.ts | 62 ++++----- src/export/dump.test.ts | 126 +++++++++++++----- src/export/dump.ts | 97 +++++++------- src/export/json.test.ts | 117 ++++++++++------- src/export/json.ts | 34 +++-- src/export/streaming.test.ts | 175 +++++++++++++++++++++++++ src/export/streaming.ts | 246 +++++++++++++++++++++++++++++++++++ 8 files changed, 789 insertions(+), 229 deletions(-) create mode 100644 src/export/streaming.test.ts create mode 100644 src/export/streaming.ts diff --git a/src/export/csv.test.ts b/src/export/csv.test.ts index b186aeb..7e1989d 100644 --- a/src/export/csv.test.ts +++ b/src/export/csv.test.ts @@ -1,13 +1,12 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' import { exportTableToCsvRoute } from './csv' -import { getTableData, createExportResponse } from './index' +import { executeOperation } from '.' import { createResponse } from '../utils' import type { DataSource } from '../types' import type { StarbaseDBConfiguration } from '../handler' -vi.mock('./index', () => ({ - getTableData: vi.fn(), - createExportResponse: vi.fn(), +vi.mock('.', () => ({ + executeOperation: vi.fn(), })) vi.mock('../utils', () => ({ @@ -42,17 +41,18 @@ beforeEach(() => { }) describe('CSV Export Module', () => { - it('should return a CSV file when table data exists', async () => { - vi.mocked(getTableData).mockResolvedValue([ - { id: 1, name: 'Alice', age: 30 }, - { id: 2, name: 'Bob', age: 25 }, - ]) - - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-csv-content', { - headers: { 'Content-Type': 'text/csv' }, - }) - ) + it('should stream a CSV file when table data exists', async () => { + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'users' }]) + .mockResolvedValueOnce([ + { name: 'id', pk: 1 }, + { name: 'name', pk: 0 }, + { name: 'age', pk: 0 }, + ]) + .mockResolvedValueOnce([ + { id: 1, name: 'Alice', age: 30 }, + { id: 2, name: 'Bob', age: 25 }, + ]) const response = await exportTableToCsvRoute( 'users', @@ -60,21 +60,17 @@ describe('CSV Export Module', () => { mockConfig ) - expect(getTableData).toHaveBeenCalledWith( - 'users', - mockDataSource, - mockConfig + expect(response.headers.get('Content-Type')).toBe('text/csv') + expect(response.headers.get('Content-Disposition')).toBe( + 'attachment; filename="users_export.csv"' ) - expect(createExportResponse).toHaveBeenCalledWith( - 'id,name,age\n1,Alice,30\n2,Bob,25\n', - 'users_export.csv', - 'text/csv' + await expect(response.text()).resolves.toBe( + 'id,name,age\n1,Alice,30\n2,Bob,25\n' ) - expect(response.headers.get('Content-Type')).toBe('text/csv') }) it('should return 404 if table does not exist', async () => { - vi.mocked(getTableData).mockResolvedValue(null) + vi.mocked(executeOperation).mockResolvedValueOnce([]) const response = await exportTableToCsvRoute( 'non_existent_table', @@ -82,11 +78,6 @@ describe('CSV Export Module', () => { mockConfig ) - expect(getTableData).toHaveBeenCalledWith( - 'non_existent_table', - mockDataSource, - mockConfig - ) expect(response.status).toBe(404) const jsonResponse: { error: string } = await response.json() @@ -95,14 +86,14 @@ describe('CSV Export Module', () => { ) }) - it('should handle empty table (return only headers)', async () => { - vi.mocked(getTableData).mockResolvedValue([]) - - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-csv-content', { - headers: { 'Content-Type': 'text/csv' }, - }) - ) + it('should include headers for empty tables', async () => { + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'empty_table' }]) + .mockResolvedValueOnce([ + { name: 'id', pk: 1 }, + { name: 'name', pk: 0 }, + ]) + .mockResolvedValueOnce([]) const response = await exportTableToCsvRoute( 'empty_table', @@ -110,49 +101,90 @@ describe('CSV Export Module', () => { mockConfig ) - expect(getTableData).toHaveBeenCalledWith( - 'empty_table', + await expect(response.text()).resolves.toBe('id,name\n') + }) + + it('should escape commas, quotes, and newlines in CSV values', async () => { + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'special_chars' }]) + .mockResolvedValueOnce([ + { name: 'id', pk: 1 }, + { name: 'name', pk: 0 }, + { name: 'bio', pk: 0 }, + ]) + .mockResolvedValueOnce([ + { + id: 1, + name: 'Sahithi, is', + bio: 'my forever "penguin"\nline', + }, + ]) + + const response = await exportTableToCsvRoute( + 'special_chars', mockDataSource, mockConfig ) - expect(createExportResponse).toHaveBeenCalledWith( - '', - 'empty_table_export.csv', - 'text/csv' + + await expect(response.text()).resolves.toBe( + 'id,name,bio\n1,"Sahithi, is","my forever ""penguin""\nline"\n' ) - expect(response.headers.get('Content-Type')).toBe('text/csv') }) - it('should escape commas and quotes in CSV values', async () => { - vi.mocked(getTableData).mockResolvedValue([ - { id: 1, name: 'Sahithi, is', bio: 'my forever "penguin"' }, - ]) - - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-csv-content', { - headers: { 'Content-Type': 'text/csv' }, - }) - ) + it('should page table data with a stable order instead of loading the full table', async () => { + const firstPage = Array.from({ length: 500 }, (_, index) => ({ + id: index + 1, + name: `User ${index + 1}`, + })) + + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'users' }]) + .mockResolvedValueOnce([ + { name: 'id', pk: 1 }, + { name: 'name', pk: 0 }, + ]) + .mockResolvedValueOnce(firstPage) + .mockResolvedValueOnce([{ id: 501, name: 'Last User' }]) const response = await exportTableToCsvRoute( - 'special_chars', + 'users', mockDataSource, mockConfig ) - - expect(createExportResponse).toHaveBeenCalledWith( - 'id,name,bio\n1,"Sahithi, is","my forever ""penguin"""\n', - 'special_chars_export.csv', - 'text/csv' + const csv = await response.text() + + expect(csv).toContain('501,Last User\n') + expect(executeOperation).toHaveBeenNthCalledWith( + 3, + [ + { + sql: 'SELECT * FROM "users" ORDER BY "id" LIMIT ? OFFSET ?;', + params: [500, 0], + }, + ], + mockDataSource, + mockConfig + ) + expect(executeOperation).toHaveBeenNthCalledWith( + 4, + [ + { + sql: 'SELECT * FROM "users" ORDER BY "id" LIMIT ? OFFSET ?;', + params: [500, 500], + }, + ], + mockDataSource, + mockConfig ) - expect(response.headers.get('Content-Type')).toBe('text/csv') }) - it('should return 500 on an unexpected error', async () => { + it('should return 500 on an unexpected error before streaming starts', async () => { const consoleErrorMock = vi .spyOn(console, 'error') .mockImplementation(() => {}) - vi.mocked(getTableData).mockRejectedValue(new Error('Database Error')) + vi.mocked(executeOperation).mockRejectedValue( + new Error('Database Error') + ) const response = await exportTableToCsvRoute( 'users', @@ -163,5 +195,6 @@ describe('CSV Export Module', () => { expect(response.status).toBe(500) const jsonResponse: { error: string } = await response.json() expect(jsonResponse.error).toBe('Failed to export table to CSV') + consoleErrorMock.mockRestore() }) }) diff --git a/src/export/csv.ts b/src/export/csv.ts index 22a4591..ce27141 100644 --- a/src/export/csv.ts +++ b/src/export/csv.ts @@ -1,7 +1,34 @@ -import { getTableData, createExportResponse } from './index' import { createResponse } from '../utils' import { DataSource } from '../types' import { StarbaseDBConfiguration } from '../handler' +import { + createStreamingExportResponse, + formatCsvValue, + getTablePagePlan, + iterateTableRows, + tableExists, +} from './streaming' + +async function* csvTableChunks( + tableName: string, + dataSource: DataSource, + config: StarbaseDBConfiguration +): AsyncGenerator { + const pagePlan = await getTablePagePlan(tableName, dataSource, config) + + if (pagePlan.columns.length) { + yield `${pagePlan.columns.map(formatCsvValue).join(',')}\n` + } + + for await (const row of iterateTableRows( + tableName, + dataSource, + config, + pagePlan + )) { + yield `${pagePlan.columns.map((column) => formatCsvValue(row[column])).join(',')}\n` + } +} export async function exportTableToCsvRoute( tableName: string, @@ -9,9 +36,7 @@ export async function exportTableToCsvRoute( config: StarbaseDBConfiguration ): Promise { try { - const data = await getTableData(tableName, dataSource, config) - - if (data === null) { + if (!(await tableExists(tableName, dataSource, config))) { return createResponse( undefined, `Table '${tableName}' does not exist.`, @@ -19,33 +44,8 @@ export async function exportTableToCsvRoute( ) } - // Convert the result to CSV - let csvContent = '' - if (data.length > 0) { - // Add headers - csvContent += Object.keys(data[0]).join(',') + '\n' - - // Add data rows - data.forEach((row: any) => { - csvContent += - Object.values(row) - .map((value) => { - if ( - typeof value === 'string' && - (value.includes(',') || - value.includes('"') || - value.includes('\n')) - ) { - return `"${value.replace(/"/g, '""')}"` - } - return value - }) - .join(',') + '\n' - }) - } - - return createExportResponse( - csvContent, + return createStreamingExportResponse( + csvTableChunks(tableName, dataSource, config), `${tableName}_export.csv`, 'text/csv' ) diff --git a/src/export/dump.test.ts b/src/export/dump.test.ts index ca65b43..3d0455e 100644 --- a/src/export/dump.test.ts +++ b/src/export/dump.test.ts @@ -39,23 +39,31 @@ beforeEach(() => { }) describe('Database Dump Module', () => { - it('should return a database dump when tables exist', async () => { + it('should stream a database dump when tables exist', async () => { vi.mocked(executeOperation) - .mockResolvedValueOnce([{ name: 'users' }, { name: 'orders' }]) + .mockResolvedValueOnce([{ name: 'orders' }, { name: 'users' }]) .mockResolvedValueOnce([ - { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }, - ]) - .mockResolvedValueOnce([ - { id: 1, name: 'Alice' }, - { id: 2, name: 'Bob' }, + { sql: 'CREATE TABLE orders (id INTEGER, total REAL);' }, ]) .mockResolvedValueOnce([ - { sql: 'CREATE TABLE orders (id INTEGER, total REAL);' }, + { name: 'id', pk: 1 }, + { name: 'total', pk: 0 }, ]) .mockResolvedValueOnce([ { id: 1, total: 99.99 }, { id: 2, total: 49.5 }, ]) + .mockResolvedValueOnce([ + { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }, + ]) + .mockResolvedValueOnce([ + { name: 'id', pk: 1 }, + { name: 'name', pk: 0 }, + ]) + .mockResolvedValueOnce([ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) @@ -71,64 +79,123 @@ describe('Database Dump Module', () => { expect(dumpText).toContain( 'CREATE TABLE users (id INTEGER, name TEXT);' ) - expect(dumpText).toContain("INSERT INTO users VALUES (1, 'Alice');") - expect(dumpText).toContain("INSERT INTO users VALUES (2, 'Bob');") + expect(dumpText).toContain('INSERT INTO "users" VALUES (1, \'Alice\');') + expect(dumpText).toContain('INSERT INTO "users" VALUES (2, \'Bob\');') expect(dumpText).toContain( 'CREATE TABLE orders (id INTEGER, total REAL);' ) - expect(dumpText).toContain('INSERT INTO orders VALUES (1, 99.99);') - expect(dumpText).toContain('INSERT INTO orders VALUES (2, 49.5);') + expect(dumpText).toContain('INSERT INTO "orders" VALUES (1, 99.99);') + expect(dumpText).toContain('INSERT INTO "orders" VALUES (2, 49.5);') }) - it('should handle empty databases (no tables)', async () => { + it('should handle empty databases', async () => { vi.mocked(executeOperation).mockResolvedValueOnce([]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) expect(response).toBeInstanceOf(Response) - expect(response.headers.get('Content-Type')).toBe( - 'application/x-sqlite3' - ) const dumpText = await response.text() expect(dumpText).toBe('SQLite format 3\0') }) - it('should handle databases with tables but no data', async () => { + it('should escape SQL values and emit NULL for nullish values', async () => { vi.mocked(executeOperation) .mockResolvedValueOnce([{ name: 'users' }]) .mockResolvedValueOnce([ - { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }, + { sql: 'CREATE TABLE users (id INTEGER, bio TEXT);' }, + ]) + .mockResolvedValueOnce([ + { name: 'id', pk: 1 }, + { name: 'bio', pk: 0 }, + { name: 'empty', pk: 0 }, + ]) + .mockResolvedValueOnce([ + { id: 1, bio: "Alice's adventure", empty: null }, ]) - .mockResolvedValueOnce([]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) - expect(response).toBeInstanceOf(Response) const dumpText = await response.text() expect(dumpText).toContain( - 'CREATE TABLE users (id INTEGER, name TEXT);' + "INSERT INTO \"users\" VALUES (1, 'Alice''s adventure', NULL);" ) - expect(dumpText).not.toContain('INSERT INTO users VALUES') }) - it('should escape single quotes properly in string values', async () => { + it('should page table data with a stable order instead of loading the full table', async () => { + const firstPage = Array.from({ length: 500 }, (_, index) => ({ + id: index + 1, + name: `User${index + 1}`, + })) + const partialPage = [{ id: 501, name: 'User501' }] + vi.mocked(executeOperation) .mockResolvedValueOnce([{ name: 'users' }]) .mockResolvedValueOnce([ - { sql: 'CREATE TABLE users (id INTEGER, bio TEXT);' }, + { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }, + ]) + .mockResolvedValueOnce([ + { name: 'id', pk: 1 }, + { name: 'name', pk: 0 }, ]) - .mockResolvedValueOnce([{ id: 1, bio: "Alice's adventure" }]) + .mockResolvedValueOnce(firstPage) + .mockResolvedValueOnce(partialPage) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) - - expect(response).toBeInstanceOf(Response) const dumpText = await response.text() + expect(dumpText).toContain( - "INSERT INTO users VALUES (1, 'Alice''s adventure');" + 'INSERT INTO "users" VALUES (501, \'User501\');' + ) + expect(executeOperation).toHaveBeenNthCalledWith( + 4, + [ + { + sql: 'SELECT * FROM "users" ORDER BY "id" LIMIT ? OFFSET ?;', + params: [500, 0], + }, + ], + mockDataSource, + mockConfig + ) + expect(executeOperation).toHaveBeenNthCalledWith( + 5, + [ + { + sql: 'SELECT * FROM "users" ORDER BY "id" LIMIT ? OFFSET ?;', + params: [500, 500], + }, + ], + mockDataSource, + mockConfig + ) + }) + + it('should fall back to rowid ordering when a table has no primary key', async () => { + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'logs' }]) + .mockResolvedValueOnce([ + { sql: 'CREATE TABLE logs (message TEXT);' }, + ]) + .mockResolvedValueOnce([{ name: 'message', pk: 0 }]) + .mockResolvedValueOnce([{ message: 'ready' }]) + + const response = await dumpDatabaseRoute(mockDataSource, mockConfig) + await response.text() + + expect(executeOperation).toHaveBeenNthCalledWith( + 4, + [ + { + sql: 'SELECT * FROM "logs" ORDER BY rowid LIMIT ? OFFSET ?;', + params: [500, 0], + }, + ], + mockDataSource, + mockConfig ) }) - it('should return a 500 response when an error occurs', async () => { + it('should return a 500 response when an error occurs before streaming starts', async () => { const consoleErrorMock = vi .spyOn(console, 'error') .mockImplementation(() => {}) @@ -141,5 +208,6 @@ describe('Database Dump Module', () => { expect(response.status).toBe(500) const jsonResponse: { error: string } = await response.json() expect(jsonResponse.error).toBe('Failed to create database dump') + consoleErrorMock.mockRestore() }) }) diff --git a/src/export/dump.ts b/src/export/dump.ts index 91a2e89..63ca86f 100644 --- a/src/export/dump.ts +++ b/src/export/dump.ts @@ -2,68 +2,69 @@ import { executeOperation } from '.' import { StarbaseDBConfiguration } from '../handler' import { DataSource } from '../types' import { createResponse } from '../utils' +import { + createStreamingExportResponse, + formatSqlValue, + getTablePagePlan, + iterateTableRows, + listExportableTables, + quoteSqlIdentifier, +} from './streaming' -export async function dumpDatabaseRoute( +async function* dumpDatabaseChunks( + tables: string[], dataSource: DataSource, config: StarbaseDBConfiguration -): Promise { - try { - // Get all table names - const tablesResult = await executeOperation( - [{ sql: "SELECT name FROM sqlite_master WHERE type='table';" }], +): AsyncGenerator { + yield 'SQLite format 3\0' + + for (const table of tables) { + const schemaResult = await executeOperation( + [ + { + sql: "SELECT sql FROM sqlite_master WHERE type='table' AND name=?;", + params: [table], + }, + ], dataSource, config ) - const tables = tablesResult.map((row: any) => row.name) - let dumpContent = 'SQLite format 3\0' // SQLite file header - - // Iterate through all tables - for (const table of tables) { - // Get table schema - const schemaResult = await executeOperation( - [ - { - sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`, - }, - ], - dataSource, - config - ) + if (schemaResult.length) { + yield `\n-- Table: ${table}\n${schemaResult[0].sql};\n\n` + } - if (schemaResult.length) { - const schema = schemaResult[0].sql - dumpContent += `\n-- Table: ${table}\n${schema};\n\n` - } + const pagePlan = await getTablePagePlan(table, dataSource, config) + const quotedTableName = quoteSqlIdentifier(table) - // Get table data - const dataResult = await executeOperation( - [{ sql: `SELECT * FROM ${table};` }], - dataSource, - config + for await (const row of iterateTableRows( + table, + dataSource, + config, + pagePlan + )) { + const values = pagePlan.columns.map((column) => + formatSqlValue(row[column]) ) - - for (const row of dataResult) { - const values = Object.values(row).map((value) => - typeof value === 'string' - ? `'${value.replace(/'/g, "''")}'` - : value - ) - dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n` - } - - dumpContent += '\n' + yield `INSERT INTO ${quotedTableName} VALUES (${values.join(', ')});\n` } - // Create a Blob from the dump content - const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' }) + yield '\n' + } +} - const headers = new Headers({ - 'Content-Type': 'application/x-sqlite3', - 'Content-Disposition': 'attachment; filename="database_dump.sql"', - }) +export async function dumpDatabaseRoute( + dataSource: DataSource, + config: StarbaseDBConfiguration +): Promise { + try { + const tables = await listExportableTables(dataSource, config) - return new Response(blob, { headers }) + return createStreamingExportResponse( + dumpDatabaseChunks(tables, dataSource, config), + 'database_dump.sql', + 'application/x-sqlite3' + ) } catch (error: any) { console.error('Database Dump Error:', error) return createResponse(undefined, 'Failed to create database dump', 500) diff --git a/src/export/json.test.ts b/src/export/json.test.ts index 3fe4a8c..290a0cf 100644 --- a/src/export/json.test.ts +++ b/src/export/json.test.ts @@ -1,13 +1,12 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' import { exportTableToJsonRoute } from './json' -import { getTableData, createExportResponse } from './index' +import { executeOperation } from '.' import { createResponse } from '../utils' import type { DataSource } from '../types' import type { StarbaseDBConfiguration } from '../handler' -vi.mock('./index', () => ({ - getTableData: vi.fn(), - createExportResponse: vi.fn(), +vi.mock('.', () => ({ + executeOperation: vi.fn(), })) vi.mock('../utils', () => ({ @@ -41,7 +40,7 @@ beforeEach(() => { describe('JSON Export Module', () => { it('should return a 404 response if table does not exist', async () => { - vi.mocked(getTableData).mockResolvedValue(null) + vi.mocked(executeOperation).mockResolvedValueOnce([]) const response = await exportTableToJsonRoute( 'missing_table', @@ -54,18 +53,16 @@ describe('JSON Export Module', () => { expect(jsonResponse.error).toBe("Table 'missing_table' does not exist.") }) - it('should return a JSON file when table data exists', async () => { + it('should stream a JSON file when table data exists', async () => { const mockData = [ { id: 1, name: 'Alice' }, { id: 2, name: 'Bob' }, ] - vi.mocked(getTableData).mockResolvedValue(mockData) - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-json-content', { - headers: { 'Content-Type': 'application/json' }, - }) - ) + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'users' }]) + .mockResolvedValueOnce([{ name: 'id', pk: 1 }]) + .mockResolvedValueOnce(mockData) const response = await exportTableToJsonRoute( 'users', @@ -73,27 +70,18 @@ describe('JSON Export Module', () => { mockConfig ) - expect(getTableData).toHaveBeenCalledWith( - 'users', - mockDataSource, - mockConfig - ) - expect(createExportResponse).toHaveBeenCalledWith( - JSON.stringify(mockData, null, 4), - 'users_export.json', - 'application/json' - ) expect(response.headers.get('Content-Type')).toBe('application/json') + expect(response.headers.get('Content-Disposition')).toBe( + 'attachment; filename="users_export.json"' + ) + await expect(response.json()).resolves.toEqual(mockData) }) it('should return an empty JSON array when table has no data', async () => { - vi.mocked(getTableData).mockResolvedValue([]) - - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-json-content', { - headers: { 'Content-Type': 'application/json' }, - }) - ) + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'empty_table' }]) + .mockResolvedValueOnce([{ name: 'id', pk: 1 }]) + .mockResolvedValueOnce([]) const response = await exportTableToJsonRoute( 'empty_table', @@ -101,12 +89,7 @@ describe('JSON Export Module', () => { mockConfig ) - expect(createExportResponse).toHaveBeenCalledWith( - '[]', - 'empty_table_export.json', - 'application/json' - ) - expect(response.headers.get('Content-Type')).toBe('application/json') + await expect(response.json()).resolves.toEqual([]) }) it('should escape special characters in JSON properly', async () => { @@ -114,13 +97,11 @@ describe('JSON Export Module', () => { { id: 1, name: 'Sahithi "The Best"' }, { id: 2, description: 'New\nLine' }, ] - vi.mocked(getTableData).mockResolvedValue(specialCharsData) - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-json-content', { - headers: { 'Content-Type': 'application/json' }, - }) - ) + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'special_chars' }]) + .mockResolvedValueOnce([{ name: 'id', pk: 1 }]) + .mockResolvedValueOnce(specialCharsData) const response = await exportTableToJsonRoute( 'special_chars', @@ -128,19 +109,58 @@ describe('JSON Export Module', () => { mockConfig ) - expect(createExportResponse).toHaveBeenCalledWith( - JSON.stringify(specialCharsData, null, 4), - 'special_chars_export.json', - 'application/json' + await expect(response.json()).resolves.toEqual(specialCharsData) + }) + + it('should page table data with a stable order instead of loading the full table', async () => { + const firstPage = Array.from({ length: 500 }, (_, index) => ({ + id: index + 1, + })) + + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'users' }]) + .mockResolvedValueOnce([{ name: 'id', pk: 1 }]) + .mockResolvedValueOnce(firstPage) + .mockResolvedValueOnce([{ id: 501 }]) + + const response = await exportTableToJsonRoute( + 'users', + mockDataSource, + mockConfig + ) + + await expect(response.json()).resolves.toHaveLength(501) + expect(executeOperation).toHaveBeenNthCalledWith( + 3, + [ + { + sql: 'SELECT * FROM "users" ORDER BY "id" LIMIT ? OFFSET ?;', + params: [500, 0], + }, + ], + mockDataSource, + mockConfig + ) + expect(executeOperation).toHaveBeenNthCalledWith( + 4, + [ + { + sql: 'SELECT * FROM "users" ORDER BY "id" LIMIT ? OFFSET ?;', + params: [500, 500], + }, + ], + mockDataSource, + mockConfig ) - expect(response.headers.get('Content-Type')).toBe('application/json') }) - it('should return a 500 response when an error occurs', async () => { + it('should return a 500 response when an error occurs before streaming starts', async () => { const consoleErrorMock = vi .spyOn(console, 'error') .mockImplementation(() => {}) - vi.mocked(getTableData).mockRejectedValue(new Error('Database Error')) + vi.mocked(executeOperation).mockRejectedValue( + new Error('Database Error') + ) const response = await exportTableToJsonRoute( 'users', @@ -151,5 +171,6 @@ describe('JSON Export Module', () => { expect(response.status).toBe(500) const jsonResponse = (await response.json()) as { error: string } expect(jsonResponse.error).toBe('Failed to export table to JSON') + consoleErrorMock.mockRestore() }) }) diff --git a/src/export/json.ts b/src/export/json.ts index c0ab811..b5a831c 100644 --- a/src/export/json.ts +++ b/src/export/json.ts @@ -1,7 +1,28 @@ -import { getTableData, createExportResponse } from './index' import { createResponse } from '../utils' import { DataSource } from '../types' import { StarbaseDBConfiguration } from '../handler' +import { + createStreamingExportResponse, + iterateTableRows, + tableExists, +} from './streaming' + +async function* jsonTableChunks( + tableName: string, + dataSource: DataSource, + config: StarbaseDBConfiguration +): AsyncGenerator { + let isFirstRow = true + + yield '[' + + for await (const row of iterateTableRows(tableName, dataSource, config)) { + yield `${isFirstRow ? '' : ','}\n${JSON.stringify(row, null, 4)}` + isFirstRow = false + } + + yield isFirstRow ? ']' : '\n]' +} export async function exportTableToJsonRoute( tableName: string, @@ -9,9 +30,7 @@ export async function exportTableToJsonRoute( config: StarbaseDBConfiguration ): Promise { try { - const data = await getTableData(tableName, dataSource, config) - - if (data === null) { + if (!(await tableExists(tableName, dataSource, config))) { return createResponse( undefined, `Table '${tableName}' does not exist.`, @@ -19,11 +38,8 @@ export async function exportTableToJsonRoute( ) } - // Convert the result to JSON - const jsonData = JSON.stringify(data, null, 4) - - return createExportResponse( - jsonData, + return createStreamingExportResponse( + jsonTableChunks(tableName, dataSource, config), `${tableName}_export.json`, 'application/json' ) diff --git a/src/export/streaming.test.ts b/src/export/streaming.test.ts new file mode 100644 index 0000000..128c947 --- /dev/null +++ b/src/export/streaming.test.ts @@ -0,0 +1,175 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' +import { executeOperation } from '.' +import type { DataSource } from '../types' +import type { StarbaseDBConfiguration } from '../handler' +import { + createStreamingExportResponse, + formatCsvValue, + formatSqlValue, + getTablePagePlan, + iterateTableRows, + quoteSqlIdentifier, + sanitizeExportFileName, + tableExists, +} from './streaming' + +vi.mock('.', () => ({ + executeOperation: vi.fn(), +})) + +let mockDataSource: DataSource +let mockConfig: StarbaseDBConfiguration + +beforeEach(() => { + vi.clearAllMocks() + + mockDataSource = { + source: 'external', + external: { dialect: 'sqlite' }, + rpc: { executeQuery: vi.fn() }, + } as any + + mockConfig = { + outerbaseApiKey: 'mock-api-key', + role: 'admin', + features: { allowlist: true, rls: true, rest: true }, + } +}) + +afterEach(() => { + vi.unstubAllGlobals() +}) + +describe('Export Streaming Helpers', () => { + it('should quote SQL identifiers safely', () => { + expect(quoteSqlIdentifier('users')).toBe('"users"') + expect(quoteSqlIdentifier('weird"name')).toBe('"weird""name"') + }) + + it('should format SQL values for dumps', () => { + expect(formatSqlValue(null)).toBe('NULL') + expect(formatSqlValue(undefined)).toBe('NULL') + expect(formatSqlValue(Number.NaN)).toBe('NULL') + expect(formatSqlValue("Alice's")).toBe("'Alice''s'") + expect(formatSqlValue(true)).toBe('1') + expect(formatSqlValue(new Uint8Array([0, 15, 255]))).toBe("X'000fff'") + }) + + it('should escape CSV values only when needed', () => { + expect(formatCsvValue('Alice')).toBe('Alice') + expect(formatCsvValue('Alice, Bob')).toBe('"Alice, Bob"') + expect(formatCsvValue('Alice "A"')).toBe('"Alice ""A"""') + expect(formatCsvValue(null)).toBe('') + }) + + it('should sanitize generated download filenames', () => { + expect(sanitizeExportFileName('../bad:name.sql')).toBe( + '.._bad_name.sql' + ) + }) + + it('should check table existence with a bound table name', async () => { + vi.mocked(executeOperation).mockResolvedValueOnce([{ name: 'users' }]) + + await expect( + tableExists('users', mockDataSource, mockConfig) + ).resolves.toBe(true) + + expect(executeOperation).toHaveBeenCalledWith( + [ + { + sql: "SELECT name FROM sqlite_master WHERE type='table' AND name=?;", + params: ['users'], + }, + ], + mockDataSource, + mockConfig + ) + }) + + it('should prefer primary key ordering for stable pagination', async () => { + vi.mocked(executeOperation).mockResolvedValueOnce([ + { name: 'tenant_id', pk: 1 }, + { name: 'id', pk: 2 }, + { name: 'name', pk: 0 }, + ]) + + await expect( + getTablePagePlan('users', mockDataSource, mockConfig) + ).resolves.toEqual({ + columns: ['tenant_id', 'id', 'name'], + orderBy: '"tenant_id", "id"', + pageSize: 500, + }) + }) + + it('should iterate table rows in bounded pages and yield between full pages', async () => { + const wait = vi.fn().mockResolvedValue(undefined) + vi.stubGlobal('scheduler', { wait }) + + vi.mocked(executeOperation) + .mockResolvedValueOnce([ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ]) + .mockResolvedValueOnce([{ id: 3, name: 'Carla' }]) + + const rows = [] + + for await (const row of iterateTableRows( + 'users', + mockDataSource, + mockConfig, + { columns: ['id', 'name'], orderBy: '"id"', pageSize: 2 } + )) { + rows.push(row) + } + + expect(rows).toEqual([ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + { id: 3, name: 'Carla' }, + ]) + expect(wait).toHaveBeenCalledWith(0) + expect(executeOperation).toHaveBeenNthCalledWith( + 1, + [ + { + sql: 'SELECT * FROM "users" ORDER BY "id" LIMIT ? OFFSET ?;', + params: [2, 0], + }, + ], + mockDataSource, + mockConfig + ) + expect(executeOperation).toHaveBeenNthCalledWith( + 2, + [ + { + sql: 'SELECT * FROM "users" ORDER BY "id" LIMIT ? OFFSET ?;', + params: [2, 2], + }, + ], + mockDataSource, + mockConfig + ) + }) + + it('should create a readable streaming response', async () => { + async function* chunks() { + yield 'hello' + yield ' ' + yield 'world' + } + + const response = createStreamingExportResponse( + chunks(), + 'hello.txt', + 'text/plain' + ) + + expect(response.headers.get('Content-Type')).toBe('text/plain') + expect(response.headers.get('Cache-Control')).toBe('no-store') + await expect(response.text()).resolves.toBe('hello world') + }) +}) diff --git a/src/export/streaming.ts b/src/export/streaming.ts new file mode 100644 index 0000000..e9eb373 --- /dev/null +++ b/src/export/streaming.ts @@ -0,0 +1,246 @@ +import { StarbaseDBConfiguration } from '../handler' +import { DataSource } from '../types' +import { executeOperation } from './index' + +export const DEFAULT_EXPORT_PAGE_SIZE = 500 + +type SchedulerWithWait = { + wait?: (duration: number) => Promise +} + +export type TablePagePlan = { + columns: string[] + orderBy: string + pageSize: number +} + +export function quoteSqlIdentifier(identifier: string): string { + return `"${identifier.replace(/"/g, '""')}"` +} + +function bytesToHex(value: Uint8Array): string { + return Array.from(value) + .map((byte) => byte.toString(16).padStart(2, '0')) + .join('') +} + +function binaryToHexLiteral(value: ArrayBuffer | ArrayBufferView): string { + const bytes = + value instanceof ArrayBuffer + ? new Uint8Array(value) + : new Uint8Array(value.buffer, value.byteOffset, value.byteLength) + + return `X'${bytesToHex(bytes)}'` +} + +export function formatSqlValue(value: unknown): string { + if (value === null || value === undefined) return 'NULL' + + if (typeof value === 'number') { + return Number.isFinite(value) ? String(value) : 'NULL' + } + + if (typeof value === 'bigint') return value.toString() + if (typeof value === 'boolean') return value ? '1' : '0' + + if (value instanceof ArrayBuffer || ArrayBuffer.isView(value)) { + return binaryToHexLiteral(value) + } + + return `'${String(value).replace(/'/g, "''")}'` +} + +export function formatCsvValue(value: unknown): string { + if (value === null || value === undefined) return '' + + const text = + value instanceof ArrayBuffer || ArrayBuffer.isView(value) + ? binaryToHexLiteral(value) + : String(value) + + if (/[",\r\n]/.test(text)) { + return `"${text.replace(/"/g, '""')}"` + } + + return text +} + +export function sanitizeExportFileName(fileName: string): string { + return fileName.replace(/[\x00-\x1f\x7f"\\/:*?<>|]+/g, '_') +} + +export async function yieldToRuntime(): Promise { + const scheduler = (globalThis as { scheduler?: SchedulerWithWait }) + .scheduler + + if (scheduler?.wait) { + await scheduler.wait(0) + return + } + + await new Promise((resolve) => setTimeout(resolve, 0)) +} + +export async function tableExists( + tableName: string, + dataSource: DataSource, + config: StarbaseDBConfiguration +): Promise { + const result = await executeOperation( + [ + { + sql: "SELECT name FROM sqlite_master WHERE type='table' AND name=?;", + params: [tableName], + }, + ], + dataSource, + config + ) + + return result.length > 0 +} + +export async function listExportableTables( + dataSource: DataSource, + config: StarbaseDBConfiguration +): Promise { + const result = await executeOperation( + [ + { + sql: "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name;", + }, + ], + dataSource, + config + ) + + return result + .map((row: any) => row.name) + .filter((name: unknown): name is string => typeof name === 'string') +} + +export async function getTableColumns( + tableName: string, + dataSource: DataSource, + config: StarbaseDBConfiguration +): Promise { + const result = await executeOperation( + [{ sql: `PRAGMA table_info(${quoteSqlIdentifier(tableName)});` }], + dataSource, + config + ) + + return result + .map((column: any) => column.name) + .filter((name: unknown): name is string => typeof name === 'string') +} + +export async function getTablePagePlan( + tableName: string, + dataSource: DataSource, + config: StarbaseDBConfiguration, + pageSize = DEFAULT_EXPORT_PAGE_SIZE +): Promise { + const result = await executeOperation( + [{ sql: `PRAGMA table_info(${quoteSqlIdentifier(tableName)});` }], + dataSource, + config + ) + + const columns = result + .map((column: any) => column.name) + .filter((name: unknown): name is string => typeof name === 'string') + + const primaryKeyColumns = result + .filter((column: any) => Number(column.pk) > 0) + .sort((left: any, right: any) => Number(left.pk) - Number(right.pk)) + .map((column: any) => quoteSqlIdentifier(column.name)) + + return { + columns, + orderBy: primaryKeyColumns.length + ? primaryKeyColumns.join(', ') + : 'rowid', + pageSize, + } +} + +export async function* iterateTableRows( + tableName: string, + dataSource: DataSource, + config: StarbaseDBConfiguration, + plan?: TablePagePlan +): AsyncGenerator> { + const pagePlan = + plan ?? + (await getTablePagePlan( + tableName, + dataSource, + config, + DEFAULT_EXPORT_PAGE_SIZE + )) + const quotedTableName = quoteSqlIdentifier(tableName) + let offset = 0 + + while (true) { + const rows = await executeOperation( + [ + { + sql: `SELECT * FROM ${quotedTableName} ORDER BY ${pagePlan.orderBy} LIMIT ? OFFSET ?;`, + params: [pagePlan.pageSize, offset], + }, + ], + dataSource, + config + ) + + if (!rows.length) return + + for (const row of rows) { + yield row + } + + offset += rows.length + + if (rows.length < pagePlan.pageSize) return + + await yieldToRuntime() + } +} + +export function createStreamingExportResponse( + chunks: AsyncIterable, + fileName: string, + contentType: string +): Response { + const encoder = new TextEncoder() + const iterator = chunks[Symbol.asyncIterator]() + + const body = new ReadableStream({ + async pull(controller) { + try { + const { done, value } = await iterator.next() + + if (done) { + controller.close() + return + } + + controller.enqueue(encoder.encode(value)) + } catch (error) { + controller.error(error) + } + }, + async cancel() { + await iterator.return?.() + }, + }) + + return new Response(body, { + headers: { + 'Cache-Control': 'no-store', + 'Content-Type': contentType, + 'Content-Disposition': `attachment; filename="${sanitizeExportFileName(fileName)}"`, + }, + }) +}