+ {title &&
{title}
}
+ {description &&
{description}
}
+
+
+
+
+
+ {showCostTable && (
+
+
+
+
+ | Configuration |
+ Time |
+ Cost |
+ $/hour |
+
+
+
+ {configs.map((config, idx) => {
+ const cost = calculateCost(config);
+ const hourlyRate = config.platform === 'snowflake'
+ ? (SNOWFLAKE_CREDITS_PER_HOUR[config.warehouseSize || ''] || 0) * SNOWFLAKE_CREDIT_PRICE
+ : config.platform === 'local'
+ ? 0
+ : (AWS_PRICING[config.instanceType || ''] || 0) * (config.instanceCount || 1);
+ return (
+
+ |
+ {config.label}
+ |
+
+ {formatTime(config.totalTimeSeconds)}
+ |
+
+ ${cost.toFixed(2)}
+ |
+
+ ${hourlyRate.toFixed(2)}
+ |
+
+ );
+ })}
+
+
+
+ )}
+
+ );
+}
+
+// Pre-configured benchmark data for different scale factors
+
+export const SF10_CONFIGS: BenchmarkConfig[] = [
+ {
+ label: 'Airflow + Snowflake Small',
+ platform: 'snowflake',
+ warehouseSize: 'Small',
+ totalTimeSeconds: 85.26,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 19.41 },
+ { stage: 'validate', wallClockSeconds: 5.91 },
+ { stage: 'denormalize', wallClockSeconds: 37.57 },
+ { stage: 'aggregate', wallClockSeconds: 7.74 },
+ { stage: 'queries', wallClockSeconds: 4.71 },
+ { stage: 'verify', wallClockSeconds: 2.32 }
+ ]
+ },
+ {
+ label: 'Windmill 2× m6i.4xlarge',
+ platform: 'windmill',
+ instanceCount: 2,
+ instanceType: 'm6i.4xlarge',
+ totalTimeSeconds: 85.78,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 21.74 },
+ { stage: 'validate', wallClockSeconds: 10.24 },
+ { stage: 'denormalize', wallClockSeconds: 23.20 },
+ { stage: 'aggregate', wallClockSeconds: 12.44 },
+ { stage: 'query', wallClockSeconds: 17.77 }
+ ]
+ },
+ {
+ label: 'Windmill 2× m6a.8xlarge',
+ platform: 'windmill',
+ instanceCount: 2,
+ instanceType: 'm6a.8xlarge',
+ totalTimeSeconds: 67.44,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 23.72 },
+ { stage: 'validate', wallClockSeconds: 6.04 },
+ { stage: 'denormalize', wallClockSeconds: 19.11 },
+ { stage: 'aggregate', wallClockSeconds: 6.84 },
+ { stage: 'query', wallClockSeconds: 10.87 }
+ ]
+ }
+];
+
+export const SF100_CONFIGS: BenchmarkConfig[] = [
+ {
+ label: 'Airflow + Snowflake Small',
+ platform: 'snowflake',
+ warehouseSize: 'Small',
+ totalTimeSeconds: 711.14,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 218.49 },
+ { stage: 'validate', wallClockSeconds: 7.43 },
+ { stage: 'denormalize', wallClockSeconds: 394.44 },
+ { stage: 'aggregate', wallClockSeconds: 69.38 },
+ { stage: 'queries', wallClockSeconds: 11.88 },
+ { stage: 'verify', wallClockSeconds: 3.46 }
+ ]
+ },
+ {
+ label: 'Airflow + Snowflake Large',
+ platform: 'snowflake',
+ warehouseSize: 'Large',
+ totalTimeSeconds: 194.05,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 39.71 },
+ { stage: 'validate', wallClockSeconds: 6.15 },
+ { stage: 'denormalize', wallClockSeconds: 107.65 },
+ { stage: 'aggregate', wallClockSeconds: 23.08 },
+ { stage: 'queries', wallClockSeconds: 6.99 },
+ { stage: 'verify', wallClockSeconds: 2.61 }
+ ]
+ },
+ {
+ label: 'Windmill 1× m6a.8xlarge',
+ platform: 'windmill',
+ instanceCount: 1,
+ instanceType: 'm6a.8xlarge',
+ totalTimeSeconds: 651.98,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 141.64 },
+ { stage: 'validate', wallClockSeconds: 61.00 },
+ { stage: 'denormalize', wallClockSeconds: 159.21 },
+ { stage: 'aggregate', wallClockSeconds: 147.90 },
+ { stage: 'query', wallClockSeconds: 141.24 }
+ ]
+ },
+ {
+ label: 'Windmill 1× m6a.16xlarge',
+ platform: 'windmill',
+ instanceCount: 1,
+ instanceType: 'm6a.16xlarge',
+ totalTimeSeconds: 391.33,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 96.15 },
+ { stage: 'validate', wallClockSeconds: 30.19 },
+ { stage: 'denormalize', wallClockSeconds: 114.33 },
+ { stage: 'aggregate', wallClockSeconds: 65.09 },
+ { stage: 'query', wallClockSeconds: 84.57 }
+ ]
+ },
+ {
+ label: 'Windmill 3× m6a.8xlarge',
+ platform: 'windmill',
+ instanceCount: 3,
+ instanceType: 'm6a.8xlarge',
+ totalTimeSeconds: 261.27,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 50.82 },
+ { stage: 'validate', wallClockSeconds: 23.27 },
+ { stage: 'denormalize', wallClockSeconds: 79.45 },
+ { stage: 'aggregate', wallClockSeconds: 56.03 },
+ { stage: 'query', wallClockSeconds: 51.30 }
+ ]
+ },
+ {
+ label: 'Windmill 3× m6a.16xlarge',
+ platform: 'windmill',
+ instanceCount: 3,
+ instanceType: 'm6a.16xlarge',
+ totalTimeSeconds: 157.11,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 32.03 },
+ { stage: 'validate', wallClockSeconds: 11.51 },
+ { stage: 'denormalize', wallClockSeconds: 53.60 },
+ { stage: 'aggregate', wallClockSeconds: 22.50 },
+ { stage: 'query', wallClockSeconds: 37.09 }
+ ]
+ }
+];
+
+export const SF1000_CONFIGS: BenchmarkConfig[] = [
+ {
+ label: 'Airflow + Snowflake Large',
+ platform: 'snowflake',
+ warehouseSize: 'Large',
+ totalTimeSeconds: 1670.66,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 326.74 },
+ { stage: 'validate', wallClockSeconds: 12.53 },
+ { stage: 'denormalize', wallClockSeconds: 1168.55 },
+ { stage: 'aggregate', wallClockSeconds: 134.66 },
+ { stage: 'queries', wallClockSeconds: 19.97 },
+ { stage: 'verify', wallClockSeconds: 2.86 }
+ ]
+ },
+ {
+ label: 'Windmill 3× r6a.8xlarge',
+ platform: 'windmill',
+ instanceCount: 3,
+ instanceType: 'r6a.8xlarge',
+ totalTimeSeconds: 4198.39,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 395.66 },
+ { stage: 'validate', wallClockSeconds: 252.32 },
+ { stage: 'denormalize', wallClockSeconds: 1012.17 },
+ { stage: 'aggregate', wallClockSeconds: 874.26 },
+ { stage: 'query', wallClockSeconds: 1663.36 }
+ ]
+ }
+];
+
+// Airflow + Pandas comparison configs (SF10 only - Pandas cannot scale beyond this)
+export const AIRFLOW_PANDAS_SF10_CONFIGS: BenchmarkConfig[] = [
+ {
+ label: 'Airflow + Pandas (64 GB local)',
+ platform: 'local',
+ totalTimeSeconds: 2813.82,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 600 },
+ { stage: 'validate', wallClockSeconds: 168 },
+ { stage: 'denormalize', wallClockSeconds: 323 },
+ { stage: 'aggregate', wallClockSeconds: 433 },
+ { stage: 'query', wallClockSeconds: 824 },
+ { stage: 'verify', wallClockSeconds: 131 }
+ ]
+ },
+ {
+ label: 'Windmill 2× m6a.8xlarge',
+ platform: 'windmill',
+ instanceCount: 2,
+ instanceType: 'm6a.8xlarge',
+ totalTimeSeconds: 67.44,
+ stages: [
+ { stage: 'ingest', wallClockSeconds: 23.72 },
+ { stage: 'validate', wallClockSeconds: 6.04 },
+ { stage: 'denormalize', wallClockSeconds: 19.11 },
+ { stage: 'aggregate', wallClockSeconds: 6.84 },
+ { stage: 'query', wallClockSeconds: 10.87 }
+ ]
+ }
+];
diff --git a/src/components/TpcDsBenchmarkSection.mdx b/src/components/TpcDsBenchmarkSection.mdx
new file mode 100644
index 000000000..aef6b8c7e
--- /dev/null
+++ b/src/components/TpcDsBenchmarkSection.mdx
@@ -0,0 +1,105 @@
+import TpcDsBenchmark, {
+ SF10_CONFIGS,
+ SF100_CONFIGS,
+ SF1000_CONFIGS,
+ AIRFLOW_PANDAS_SF10_CONFIGS
+} from '@site/src/components/TpcDsBenchmark';
+
+We ran the [TPC-DS](https://www.tpc.org/tpcds/) benchmark at three scale factors (10 GB, 100 GB, 1 TB) to compare Windmill + Ducklake against Airflow + Snowflake, the most common open-source orchestrator paired with a managed data warehouse.
+
+### Why TPC-DS?
+
+TPC-DS simulates a retail company's data warehouse, which mirrors a real ETL pipeline. The benchmark runs 52 tasks organized in 5 stages:
+
+| Stage | Tasks | What it does |
+| ----- | ----: | ------------ |
+| Ingest | 24 | Load raw data from 24 source tables (sales, returns, inventory, customers, products...) |
+| Validate | 8 | Check referential integrity and data quality across fact and dimension tables |
+| Denormalize | 3 | Join fact tables with dimensions to create analytics-ready tables |
+| Aggregate | 6 | Compute business metrics: daily sales, customer LTV, return rates, channel comparison... |
+| Query | 10 | Run 10 analytical queries (TPC-DS queries 3, 7, 19, 27, 34, 43, 46, 53, 67, 79) |
+
+### Pricing assumptions
+
+- Airflow + Snowflake: Snowflake Standard tier at $2/credit. Enterprise is $3-4/credit, Business Critical is $4-5/credit. Airflow orchestration cost excluded (adds $100-500/month for managed Airflow).
+- AWS EC2: On-demand pricing in us-east-1
+
+| Snowflake Warehouse | Credits/hour | $/hour |
+| ------------------- | -----------: | -----: |
+| Small | 2 | $4 |
+| Large | 8 | $16 |
+
+| AWS Instance | vCPUs | Memory | $/hour |
+| -------------- | ----: | -----: | -----: |
+| m6i.4xlarge | 16 | 64 GB | $0.77 |
+| m6a.8xlarge | 32 | 128 GB | $1.38 |
+| m6a.16xlarge | 64 | 256 GB | $2.76 |
+| r6a.8xlarge | 32 | 256 GB | $1.81 |
+
+### SF10: 10 GB dataset
+
+At 10 GB, Windmill matches Snowflake's speed while costing 2-2.5x less.
+
+