diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index 3133ed7337f25..98afa91ddc834 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -26,6 +26,7 @@ pub mod from_utc_timestamp; pub mod last_day; pub mod make_dt_interval; pub mod make_interval; +pub mod monthname; pub mod next_day; pub mod time_trunc; pub mod to_utc_timestamp; @@ -52,6 +53,7 @@ make_udf_function!(extract::SparkSecond, second); make_udf_function!(last_day::SparkLastDay, last_day); make_udf_function!(make_dt_interval::SparkMakeDtInterval, make_dt_interval); make_udf_function!(make_interval::SparkMakeInterval, make_interval); +make_udf_function!(monthname::SparkMonthName, monthname); make_udf_function!(next_day::SparkNextDay, next_day); make_udf_function!(time_trunc::SparkTimeTrunc, time_trunc); make_udf_function!(to_utc_timestamp::SparkToUtcTimestamp, to_utc_timestamp); @@ -117,6 +119,11 @@ pub mod expr_fn { "Make interval from years, months, weeks, days, hours, mins and secs.", years months weeks days hours mins secs )); + export_functions!(( + monthname, + "Returns the three-letter abbreviated month name from a date or timestamp.", + arg1 + )); // TODO: add once ANSI support is added: // "When both of the input parameters are not NULL and day_of_week is an invalid input, the function throws SparkIllegalArgumentException if spark.sql.ansi.enabled is set to true, otherwise NULL." export_functions!(( @@ -195,6 +202,7 @@ pub fn functions() -> Vec> { make_dt_interval(), make_interval(), minute(), + monthname(), next_day(), second(), time_trunc(), diff --git a/datafusion/spark/src/function/datetime/monthname.rs b/datafusion/spark/src/function/datetime/monthname.rs new file mode 100644 index 0000000000000..778021d0523bd --- /dev/null +++ b/datafusion/spark/src/function/datetime/monthname.rs @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow::array::{AsArray, StringArray}; +use arrow::compute::{DatePart, date_part}; +use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, ScalarValue, internal_err}; +use datafusion_expr::{ + ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, + Volatility, +}; + +const MONTH_NAMES: [&str; 12] = [ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", +]; + +fn month_number_to_name(month: i32) -> Option<&'static str> { + MONTH_NAMES.get((month - 1) as usize).copied() +} + +/// Spark-compatible `monthname` expression. +/// Returns the three-letter abbreviated month name from a date or timestamp. +/// +/// +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkMonthName { + signature: Signature, +} + +impl Default for SparkMonthName { + fn default() -> Self { + Self::new() + } +} + +impl SparkMonthName { + pub fn new() -> Self { + Self { + signature: Signature::exact(vec![DataType::Date32], Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for SparkMonthName { + fn name(&self) -> &str { + "monthname" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); + Ok(Arc::new(Field::new(self.name(), DataType::Utf8, nullable))) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let [arg] = take_function_args(self.name(), args.args)?; + match arg { + ColumnarValue::Scalar(scalar) => { + if scalar.is_null() { + return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))); + } + let arr = scalar.to_array_of_size(1)?; + let month_arr = date_part(&arr, DatePart::Month)?; + let month_val = month_arr + .as_primitive::() + .value(0); + let name = month_number_to_name(month_val).map(|s| s.to_string()); + Ok(ColumnarValue::Scalar(ScalarValue::Utf8(name))) + } + ColumnarValue::Array(arr) => { + let month_arr = date_part(&arr, DatePart::Month)?; + let int_arr = month_arr.as_primitive::(); + + let result: StringArray = int_arr + .iter() + .map(|maybe_month| maybe_month.and_then(month_number_to_name)) + .collect(); + + Ok(ColumnarValue::Array(Arc::new(result))) + } + } + } +} + diff --git a/datafusion/sqllogictest/test_files/spark/datetime/monthname.slt b/datafusion/sqllogictest/test_files/spark/datetime/monthname.slt new file mode 100644 index 0000000000000..74197ccbe031f --- /dev/null +++ b/datafusion/sqllogictest/test_files/spark/datetime/monthname.slt @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Scalar date input +query T +SELECT monthname('2024-03-15'::DATE); +---- +Mar + +# All 12 months +query T +SELECT monthname('2024-01-15'::DATE); +---- +Jan + +query T +SELECT monthname('2024-02-15'::DATE); +---- +Feb + +query T +SELECT monthname('2024-03-15'::DATE); +---- +Mar + +query T +SELECT monthname('2024-04-15'::DATE); +---- +Apr + +query T +SELECT monthname('2024-05-15'::DATE); +---- +May + +query T +SELECT monthname('2024-06-15'::DATE); +---- +Jun + +query T +SELECT monthname('2024-07-15'::DATE); +---- +Jul + +query T +SELECT monthname('2024-08-15'::DATE); +---- +Aug + +query T +SELECT monthname('2024-09-15'::DATE); +---- +Sep + +query T +SELECT monthname('2024-10-15'::DATE); +---- +Oct + +query T +SELECT monthname('2024-11-15'::DATE); +---- +Nov + +query T +SELECT monthname('2024-12-15'::DATE); +---- +Dec + +# NULL handling +query T +SELECT monthname(NULL::DATE); +---- +NULL + +# Array input +query T +SELECT monthname(d) FROM (VALUES ('2024-01-01'::DATE), ('2024-06-15'::DATE), ('2024-12-31'::DATE), (NULL::DATE)) AS t(d); +---- +Jan +Jun +Dec +NULL + +# Error: wrong argument type (string without cast) +statement error No function matches the given name and argument types 'monthname\(Utf8\)' +SELECT monthname('not-a-date'); + +# Error: wrong argument type (integer) +statement error No function matches the given name and argument types 'monthname\(Int64\)' +SELECT monthname(123); + +# Error: no arguments +statement error 'monthname' does not support zero arguments +SELECT monthname();