From 85c10b49c470483c1577e71b4296444ffa0afe77 Mon Sep 17 00:00:00 2001 From: Hippolyte Barraud Date: Wed, 8 Apr 2026 00:26:16 -0400 Subject: [PATCH] feat(parquet): add struct-column writer benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three new benchmark cases to the arrow_writer benchmark suite for evaluating write performance on struct columns at varying null densities: - `struct_non_null`: a nullable struct with 0% null rows and non-nullable primitive children; - `struct_sparse_99pct_null`: a nullable struct with 99% null rows, exercising null batching through one level of struct nesting; - `struct_all_null`: a nullable struct with 100% null rows, exercising the uniform-null path through struct nesting. Baseline results (Apple M1 Max): struct_non_null/default 29.9 ms struct_non_null/parquet_2 38.2 ms struct_non_null/zstd_parquet_2 50.9 ms struct_sparse_99pct_null/default 7.2 ms struct_sparse_99pct_null/parquet_2 7.3 ms struct_sparse_99pct_null/zstd_p2 8.1 ms struct_all_null/default 83.3 µs struct_all_null/parquet_2 82.5 µs struct_all_null/zstd_parquet_2 106.6 µs Signed-off-by: Hippolyte Barraud --- parquet/benches/arrow_writer.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs index 909d41982523..6b48afbf3d81 100644 --- a/parquet/benches/arrow_writer.rs +++ b/parquet/benches/arrow_writer.rs @@ -266,6 +266,25 @@ fn create_list_primitive_bench_batch_non_null( )?) } +fn create_struct_bench_batch(size: usize, null_density: f32) -> Result { + let fields = vec![Field::new( + "_1", + DataType::Struct(Fields::from(vec![ + Field::new("_1", DataType::Int32, false), + Field::new("_2", DataType::Int64, false), + Field::new("_3", DataType::Float32, false), + ])), + true, + )]; + let schema = Schema::new(fields); + Ok(create_random_batch( + Arc::new(schema), + size, + null_density, + 0.75, + )?) +} + fn _create_nested_bench_batch( size: usize, null_density: f32, @@ -400,6 +419,15 @@ fn create_batches() -> Vec<(&'static str, RecordBatch)> { let batch = create_primitive_bench_batch(BATCH_SIZE, 1.0, 0.75).unwrap(); batches.push(("primitive_all_null", batch)); + let batch = create_struct_bench_batch(BATCH_SIZE, 0.0).unwrap(); + batches.push(("struct_non_null", batch)); + + let batch = create_struct_bench_batch(BATCH_SIZE, 0.99).unwrap(); + batches.push(("struct_sparse_99pct_null", batch)); + + let batch = create_struct_bench_batch(BATCH_SIZE, 1.0).unwrap(); + batches.push(("struct_all_null", batch)); + batches }