From a4228b9ac87eb68e7120867284554881459251d7 Mon Sep 17 00:00:00 2001 From: JeelRajodiya Date: Wed, 15 Apr 2026 11:47:26 +0530 Subject: [PATCH 1/3] feat: Add Spark-compatible `monthname` function to datafusion-spark Implements `monthname(date_or_timestamp)` that returns the three-letter abbreviated month name (Jan, Feb, ..., Dec) from a date or timestamp, matching Apache Spark's behavior. --- datafusion/spark/src/function/datetime/mod.rs | 8 + .../spark/src/function/datetime/monthname.rs | 296 ++++++++++++++++++ 2 files changed, 304 insertions(+) create mode 100644 datafusion/spark/src/function/datetime/monthname.rs diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index 3133ed7337f25..98afa91ddc834 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -26,6 +26,7 @@ pub mod from_utc_timestamp; pub mod last_day; pub mod make_dt_interval; pub mod make_interval; +pub mod monthname; pub mod next_day; pub mod time_trunc; pub mod to_utc_timestamp; @@ -52,6 +53,7 @@ make_udf_function!(extract::SparkSecond, second); make_udf_function!(last_day::SparkLastDay, last_day); make_udf_function!(make_dt_interval::SparkMakeDtInterval, make_dt_interval); make_udf_function!(make_interval::SparkMakeInterval, make_interval); +make_udf_function!(monthname::SparkMonthName, monthname); make_udf_function!(next_day::SparkNextDay, next_day); make_udf_function!(time_trunc::SparkTimeTrunc, time_trunc); make_udf_function!(to_utc_timestamp::SparkToUtcTimestamp, to_utc_timestamp); @@ -117,6 +119,11 @@ pub mod expr_fn { "Make interval from years, months, weeks, days, hours, mins and secs.", years months weeks days hours mins secs )); + export_functions!(( + monthname, + "Returns the three-letter abbreviated month name from a date or timestamp.", + arg1 + )); // TODO: add once ANSI support is added: // "When both of the input parameters are not NULL and day_of_week is an invalid input, the function throws SparkIllegalArgumentException if spark.sql.ansi.enabled is set to true, otherwise NULL." export_functions!(( @@ -195,6 +202,7 @@ pub fn functions() -> Vec> { make_dt_interval(), make_interval(), minute(), + monthname(), next_day(), second(), time_trunc(), diff --git a/datafusion/spark/src/function/datetime/monthname.rs b/datafusion/spark/src/function/datetime/monthname.rs new file mode 100644 index 0000000000000..1a02afae67ae1 --- /dev/null +++ b/datafusion/spark/src/function/datetime/monthname.rs @@ -0,0 +1,296 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow::array::{AsArray, StringArray}; +use arrow::compute::{DatePart, date_part}; +use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, ScalarValue, exec_err, internal_err}; +use datafusion_expr::{ + Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, + Signature, TypeSignatureClass, Volatility, +}; + +const MONTH_NAMES: [&str; 12] = [ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", +]; + +fn month_number_to_name(month: i32) -> Option<&'static str> { + MONTH_NAMES.get((month - 1) as usize).copied() +} + +/// Spark-compatible `monthname` expression. +/// Returns the three-letter abbreviated month name from a date or timestamp. +/// +/// +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkMonthName { + signature: Signature, +} + +impl Default for SparkMonthName { + fn default() -> Self { + Self::new() + } +} + +impl SparkMonthName { + pub fn new() -> Self { + Self { + signature: Signature::coercible( + vec![Coercion::new_exact(TypeSignatureClass::Timestamp)], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for SparkMonthName { + fn name(&self) -> &str { + "monthname" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); + Ok(Arc::new(Field::new(self.name(), DataType::Utf8, nullable))) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let [arg] = take_function_args(self.name(), args.args)?; + match arg { + ColumnarValue::Scalar(scalar) => { + if scalar.is_null() { + return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))); + } + let arr = scalar.to_array_of_size(1)?; + let month_arr = date_part(&arr, DatePart::Month)?; + let month_val = month_arr + .as_primitive::() + .value(0); + match month_number_to_name(month_val) { + Some(name) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some( + name.to_string(), + )))), + None => { + exec_err!("Invalid month number: {month_val}") + } + } + } + ColumnarValue::Array(arr) => { + let month_arr = date_part(&arr, DatePart::Month)?; + let int_arr = month_arr.as_primitive::(); + + let result: StringArray = int_arr + .iter() + .map(|maybe_month| match maybe_month { + Some(m) => Ok(month_number_to_name(m)), + None => Ok(None), + }) + .collect::>()?; + + Ok(ColumnarValue::Array(Arc::new(result))) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::{Array, ArrayRef, Date32Array}; + use arrow::datatypes::TimeUnit; + use datafusion_common::config::ConfigOptions; + + fn make_args( + args: Vec, + arg_fields: Vec, + number_rows: usize, + ) -> ScalarFunctionArgs { + ScalarFunctionArgs { + args, + arg_fields, + number_rows, + return_field: Arc::new(Field::new("monthname", DataType::Utf8, true)), + config_options: Arc::new(ConfigOptions::default()), + } + } + + #[test] + fn test_monthname_scalar_date() { + let func = SparkMonthName::new(); + // 2024-03-15 = 19797 days since epoch + let result = func + .invoke_with_args(make_args( + vec![ColumnarValue::Scalar(ScalarValue::Date32(Some(19797)))], + vec![Arc::new(Field::new("d", DataType::Date32, true))], + 1, + )) + .unwrap(); + match result { + ColumnarValue::Scalar(ScalarValue::Utf8(Some(name))) => { + assert_eq!(name, "Mar"); + } + other => panic!("Expected scalar Utf8, got {other:?}"), + } + } + + #[test] + fn test_monthname_array_dates() { + let func = SparkMonthName::new(); + let date_array: ArrayRef = Arc::new(Date32Array::from(vec![ + Some(19723), // 2024-01-01 => Jan + Some(19797), // 2024-03-15 => Mar + Some(20088), // 2024-12-31 => Dec + None, + ])); + + let result = func + .invoke_with_args(make_args( + vec![ColumnarValue::Array(date_array)], + vec![Arc::new(Field::new("d", DataType::Date32, true))], + 4, + )) + .unwrap(); + + match result { + ColumnarValue::Array(arr) => { + let str_arr = arr.as_any().downcast_ref::().unwrap(); + assert_eq!(str_arr.value(0), "Jan"); + assert_eq!(str_arr.value(1), "Mar"); + assert_eq!(str_arr.value(2), "Dec"); + assert!(str_arr.is_null(3)); + } + other => panic!("Expected array, got {other:?}"), + } + } + + #[test] + fn test_monthname_null_scalar() { + let func = SparkMonthName::new(); + let result = func + .invoke_with_args(make_args( + vec![ColumnarValue::Scalar(ScalarValue::Date32(None))], + vec![Arc::new(Field::new("d", DataType::Date32, true))], + 1, + )) + .unwrap(); + match result { + ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {} + other => panic!("Expected Utf8(None), got {other:?}"), + } + } + + #[test] + fn test_monthname_timestamp_micros() { + let func = SparkMonthName::new(); + // 2024-07-15 10:30:00 UTC in microseconds + let result = func + .invoke_with_args(make_args( + vec![ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond( + Some(1721038200000000), + None, + ))], + vec![Arc::new(Field::new( + "ts", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ))], + 1, + )) + .unwrap(); + match result { + ColumnarValue::Scalar(ScalarValue::Utf8(Some(name))) => { + assert_eq!(name, "Jul"); + } + other => panic!("Expected scalar Utf8, got {other:?}"), + } + } + + #[test] + fn test_monthname_all_months() { + let func = SparkMonthName::new(); + let dates: Vec> = vec![ + Some(19737), // 2024-01-15 + Some(19768), // 2024-02-15 + Some(19797), // 2024-03-15 + Some(19828), // 2024-04-15 + Some(19858), // 2024-05-15 + Some(19889), // 2024-06-15 + Some(19919), // 2024-07-15 + Some(19950), // 2024-08-15 + Some(19981), // 2024-09-15 + Some(20011), // 2024-10-15 + Some(20042), // 2024-11-15 + Some(20072), // 2024-12-15 + ]; + let date_array: ArrayRef = Arc::new(Date32Array::from(dates)); + + let result = func + .invoke_with_args(make_args( + vec![ColumnarValue::Array(date_array)], + vec![Arc::new(Field::new("d", DataType::Date32, true))], + 12, + )) + .unwrap(); + + let expected = [ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", + "Dec", + ]; + match result { + ColumnarValue::Array(arr) => { + let str_arr = arr.as_any().downcast_ref::().unwrap(); + for (i, exp) in expected.iter().enumerate() { + assert_eq!(str_arr.value(i), *exp, "Month {} mismatch", i + 1); + } + } + other => panic!("Expected array, got {other:?}"), + } + } + + #[test] + fn test_monthname_return_field_nullable() { + let func = SparkMonthName::new(); + + let nullable = func + .return_field_from_args(ReturnFieldArgs { + arg_fields: &[Arc::new(Field::new("d", DataType::Date32, true))], + scalar_arguments: &[None], + }) + .unwrap(); + assert!(nullable.is_nullable()); + assert_eq!(nullable.data_type(), &DataType::Utf8); + + let non_nullable = func + .return_field_from_args(ReturnFieldArgs { + arg_fields: &[Arc::new(Field::new("d", DataType::Date32, false))], + scalar_arguments: &[None], + }) + .unwrap(); + assert!(!non_nullable.is_nullable()); + } +} From e066989d758d4cd29f04ab586fa5e77174807990 Mon Sep 17 00:00:00 2001 From: JeelRajodiya Date: Thu, 16 Apr 2026 15:46:55 +0530 Subject: [PATCH 2/3] add SLTs and remove unit tests --- .../spark/src/function/datetime/monthname.rs | 185 +----------------- .../test_files/spark/datetime/monthname.slt | 110 +++++++++++ 2 files changed, 113 insertions(+), 182 deletions(-) create mode 100644 datafusion/sqllogictest/test_files/spark/datetime/monthname.slt diff --git a/datafusion/spark/src/function/datetime/monthname.rs b/datafusion/spark/src/function/datetime/monthname.rs index 1a02afae67ae1..18d2eda7029ba 100644 --- a/datafusion/spark/src/function/datetime/monthname.rs +++ b/datafusion/spark/src/function/datetime/monthname.rs @@ -23,8 +23,8 @@ use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::utils::take_function_args; use datafusion_common::{Result, ScalarValue, exec_err, internal_err}; use datafusion_expr::{ - Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, - Signature, TypeSignatureClass, Volatility, + ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, + Volatility, }; const MONTH_NAMES: [&str; 12] = [ @@ -53,10 +53,7 @@ impl Default for SparkMonthName { impl SparkMonthName { pub fn new() -> Self { Self { - signature: Signature::coercible( - vec![Coercion::new_exact(TypeSignatureClass::Timestamp)], - Volatility::Immutable, - ), + signature: Signature::exact(vec![DataType::Date32], Volatility::Immutable), } } } @@ -118,179 +115,3 @@ impl ScalarUDFImpl for SparkMonthName { } } -#[cfg(test)] -mod tests { - use super::*; - use arrow::array::{Array, ArrayRef, Date32Array}; - use arrow::datatypes::TimeUnit; - use datafusion_common::config::ConfigOptions; - - fn make_args( - args: Vec, - arg_fields: Vec, - number_rows: usize, - ) -> ScalarFunctionArgs { - ScalarFunctionArgs { - args, - arg_fields, - number_rows, - return_field: Arc::new(Field::new("monthname", DataType::Utf8, true)), - config_options: Arc::new(ConfigOptions::default()), - } - } - - #[test] - fn test_monthname_scalar_date() { - let func = SparkMonthName::new(); - // 2024-03-15 = 19797 days since epoch - let result = func - .invoke_with_args(make_args( - vec![ColumnarValue::Scalar(ScalarValue::Date32(Some(19797)))], - vec![Arc::new(Field::new("d", DataType::Date32, true))], - 1, - )) - .unwrap(); - match result { - ColumnarValue::Scalar(ScalarValue::Utf8(Some(name))) => { - assert_eq!(name, "Mar"); - } - other => panic!("Expected scalar Utf8, got {other:?}"), - } - } - - #[test] - fn test_monthname_array_dates() { - let func = SparkMonthName::new(); - let date_array: ArrayRef = Arc::new(Date32Array::from(vec![ - Some(19723), // 2024-01-01 => Jan - Some(19797), // 2024-03-15 => Mar - Some(20088), // 2024-12-31 => Dec - None, - ])); - - let result = func - .invoke_with_args(make_args( - vec![ColumnarValue::Array(date_array)], - vec![Arc::new(Field::new("d", DataType::Date32, true))], - 4, - )) - .unwrap(); - - match result { - ColumnarValue::Array(arr) => { - let str_arr = arr.as_any().downcast_ref::().unwrap(); - assert_eq!(str_arr.value(0), "Jan"); - assert_eq!(str_arr.value(1), "Mar"); - assert_eq!(str_arr.value(2), "Dec"); - assert!(str_arr.is_null(3)); - } - other => panic!("Expected array, got {other:?}"), - } - } - - #[test] - fn test_monthname_null_scalar() { - let func = SparkMonthName::new(); - let result = func - .invoke_with_args(make_args( - vec![ColumnarValue::Scalar(ScalarValue::Date32(None))], - vec![Arc::new(Field::new("d", DataType::Date32, true))], - 1, - )) - .unwrap(); - match result { - ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {} - other => panic!("Expected Utf8(None), got {other:?}"), - } - } - - #[test] - fn test_monthname_timestamp_micros() { - let func = SparkMonthName::new(); - // 2024-07-15 10:30:00 UTC in microseconds - let result = func - .invoke_with_args(make_args( - vec![ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond( - Some(1721038200000000), - None, - ))], - vec![Arc::new(Field::new( - "ts", - DataType::Timestamp(TimeUnit::Microsecond, None), - true, - ))], - 1, - )) - .unwrap(); - match result { - ColumnarValue::Scalar(ScalarValue::Utf8(Some(name))) => { - assert_eq!(name, "Jul"); - } - other => panic!("Expected scalar Utf8, got {other:?}"), - } - } - - #[test] - fn test_monthname_all_months() { - let func = SparkMonthName::new(); - let dates: Vec> = vec![ - Some(19737), // 2024-01-15 - Some(19768), // 2024-02-15 - Some(19797), // 2024-03-15 - Some(19828), // 2024-04-15 - Some(19858), // 2024-05-15 - Some(19889), // 2024-06-15 - Some(19919), // 2024-07-15 - Some(19950), // 2024-08-15 - Some(19981), // 2024-09-15 - Some(20011), // 2024-10-15 - Some(20042), // 2024-11-15 - Some(20072), // 2024-12-15 - ]; - let date_array: ArrayRef = Arc::new(Date32Array::from(dates)); - - let result = func - .invoke_with_args(make_args( - vec![ColumnarValue::Array(date_array)], - vec![Arc::new(Field::new("d", DataType::Date32, true))], - 12, - )) - .unwrap(); - - let expected = [ - "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", - "Dec", - ]; - match result { - ColumnarValue::Array(arr) => { - let str_arr = arr.as_any().downcast_ref::().unwrap(); - for (i, exp) in expected.iter().enumerate() { - assert_eq!(str_arr.value(i), *exp, "Month {} mismatch", i + 1); - } - } - other => panic!("Expected array, got {other:?}"), - } - } - - #[test] - fn test_monthname_return_field_nullable() { - let func = SparkMonthName::new(); - - let nullable = func - .return_field_from_args(ReturnFieldArgs { - arg_fields: &[Arc::new(Field::new("d", DataType::Date32, true))], - scalar_arguments: &[None], - }) - .unwrap(); - assert!(nullable.is_nullable()); - assert_eq!(nullable.data_type(), &DataType::Utf8); - - let non_nullable = func - .return_field_from_args(ReturnFieldArgs { - arg_fields: &[Arc::new(Field::new("d", DataType::Date32, false))], - scalar_arguments: &[None], - }) - .unwrap(); - assert!(!non_nullable.is_nullable()); - } -} diff --git a/datafusion/sqllogictest/test_files/spark/datetime/monthname.slt b/datafusion/sqllogictest/test_files/spark/datetime/monthname.slt new file mode 100644 index 0000000000000..74197ccbe031f --- /dev/null +++ b/datafusion/sqllogictest/test_files/spark/datetime/monthname.slt @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Scalar date input +query T +SELECT monthname('2024-03-15'::DATE); +---- +Mar + +# All 12 months +query T +SELECT monthname('2024-01-15'::DATE); +---- +Jan + +query T +SELECT monthname('2024-02-15'::DATE); +---- +Feb + +query T +SELECT monthname('2024-03-15'::DATE); +---- +Mar + +query T +SELECT monthname('2024-04-15'::DATE); +---- +Apr + +query T +SELECT monthname('2024-05-15'::DATE); +---- +May + +query T +SELECT monthname('2024-06-15'::DATE); +---- +Jun + +query T +SELECT monthname('2024-07-15'::DATE); +---- +Jul + +query T +SELECT monthname('2024-08-15'::DATE); +---- +Aug + +query T +SELECT monthname('2024-09-15'::DATE); +---- +Sep + +query T +SELECT monthname('2024-10-15'::DATE); +---- +Oct + +query T +SELECT monthname('2024-11-15'::DATE); +---- +Nov + +query T +SELECT monthname('2024-12-15'::DATE); +---- +Dec + +# NULL handling +query T +SELECT monthname(NULL::DATE); +---- +NULL + +# Array input +query T +SELECT monthname(d) FROM (VALUES ('2024-01-01'::DATE), ('2024-06-15'::DATE), ('2024-12-31'::DATE), (NULL::DATE)) AS t(d); +---- +Jan +Jun +Dec +NULL + +# Error: wrong argument type (string without cast) +statement error No function matches the given name and argument types 'monthname\(Utf8\)' +SELECT monthname('not-a-date'); + +# Error: wrong argument type (integer) +statement error No function matches the given name and argument types 'monthname\(Int64\)' +SELECT monthname(123); + +# Error: no arguments +statement error 'monthname' does not support zero arguments +SELECT monthname(); From 553b8d9f3fee602335aed88ae68419be11c710a1 Mon Sep 17 00:00:00 2001 From: JeelRajodiya Date: Thu, 16 Apr 2026 16:12:52 +0530 Subject: [PATCH 3/3] remove error returning arm and simplified the code --- .../spark/src/function/datetime/monthname.rs | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/datafusion/spark/src/function/datetime/monthname.rs b/datafusion/spark/src/function/datetime/monthname.rs index 18d2eda7029ba..778021d0523bd 100644 --- a/datafusion/spark/src/function/datetime/monthname.rs +++ b/datafusion/spark/src/function/datetime/monthname.rs @@ -21,7 +21,7 @@ use arrow::array::{AsArray, StringArray}; use arrow::compute::{DatePart, date_part}; use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::utils::take_function_args; -use datafusion_common::{Result, ScalarValue, exec_err, internal_err}; +use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, @@ -88,14 +88,8 @@ impl ScalarUDFImpl for SparkMonthName { let month_val = month_arr .as_primitive::() .value(0); - match month_number_to_name(month_val) { - Some(name) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some( - name.to_string(), - )))), - None => { - exec_err!("Invalid month number: {month_val}") - } - } + let name = month_number_to_name(month_val).map(|s| s.to_string()); + Ok(ColumnarValue::Scalar(ScalarValue::Utf8(name))) } ColumnarValue::Array(arr) => { let month_arr = date_part(&arr, DatePart::Month)?; @@ -103,11 +97,8 @@ impl ScalarUDFImpl for SparkMonthName { let result: StringArray = int_arr .iter() - .map(|maybe_month| match maybe_month { - Some(m) => Ok(month_number_to_name(m)), - None => Ok(None), - }) - .collect::>()?; + .map(|maybe_month| maybe_month.and_then(month_number_to_name)) + .collect(); Ok(ColumnarValue::Array(Arc::new(result))) }