From ae66420d0a5181087248289bae70e4b359bb9d78 Mon Sep 17 00:00:00 2001 From: Anastasios Bakogiannis Date: Thu, 16 Apr 2026 09:19:08 +0000 Subject: [PATCH] Use ListArray nullability instead of offsets. In array_element, array_any_value use the ListArray's nullability to detect null rows instead of relying on the offsets. --- datafusion/functions-nested/src/extract.rs | 63 +++++++++++++++++++--- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs index 4176a31f2a58a..12cca0e52ab7f 100644 --- a/datafusion/functions-nested/src/extract.rs +++ b/datafusion/functions-nested/src/extract.rs @@ -257,7 +257,7 @@ where let len = end - start; // array is null - if len == O::usize_as(0) { + if array.is_null(row_index) { mutable.extend_nulls(1); continue; } @@ -1064,11 +1064,9 @@ where for (row_index, offset_window) in array.offsets().windows(2).enumerate() { let start = offset_window[0]; - let end = offset_window[1]; - let len = end - start; // array is null - if len == O::usize_as(0) { + if array.is_null(row_index) { mutable.extend_nulls(1); continue; } @@ -1101,14 +1099,18 @@ where #[cfg(test)] mod tests { - use super::{array_element_udf, general_list_view_array_slice}; + use super::{ + array_element_udf, general_array_any_value, general_array_element, + general_list_view_array_slice, + }; use arrow::array::{ Array, ArrayRef, GenericListViewArray, Int32Array, Int64Array, ListViewArray, cast::AsArray, }; - use arrow::buffer::ScalarBuffer; + use arrow::array::{ListArray, RecordBatch}; + use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field}; - use datafusion_common::{Column, DFSchema, Result}; + use datafusion_common::{Column, DFSchema, Result, assert_batches_eq}; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::{Expr, ExprSchemable}; use std::collections::HashMap; @@ -1169,6 +1171,53 @@ mod tests { ); } + #[test] + fn test_array_element_null_handling() -> Result<()> { + let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])); + let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 4, 5])); + let nulls = NullBuffer::from(vec![true, false, true]); + let field = Arc::new(Field::new("item", DataType::Int32, true)); + + let list_array = ListArray::new(field, offsets, values, Some(nulls)); + let indexes = Int64Array::from(vec![1, 1, 1]); + + let result = general_array_element(&list_array, &indexes)?; + + let expected = [ + "+--------+", + "| result |", + "+--------+", + "| 1 |", + "| |", + "| 5 |", + "+--------+", + ]; + + let batch = RecordBatch::try_from_iter([("result", result)])?; + + assert_batches_eq!(expected, &[batch]); + + Ok(()) + } + + #[test] + fn test_array_any_null_handling() -> Result<()> { + let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])); + let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 4, 5])); + let nulls = NullBuffer::from(vec![true, false, true]); + let field = Arc::new(Field::new("item", DataType::Int32, true)); + + let list_array = ListArray::new(field, offsets, values, Some(nulls)); + + let result = general_array_any_value(&list_array)?; + + assert!(!result.is_null(0)); + assert!(result.is_null(1)); + assert!(!result.is_null(2)); + + Ok(()) + } + #[test] fn test_array_slice_list_view_basic() -> Result<()> { let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));