Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
description = "Apache DataFusion DataFrame and SQL Query Engine"
readme = "README.md"
license = "Apache-2.0"
edition = "2021"
rust-version = "1.78"
edition = "2024"
rust-version = "1.88"
include = [
"/src",
"/datafusion",
Expand Down
2 changes: 1 addition & 1 deletion examples/datafusion-ffi-example/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
[package]
name = "datafusion-ffi-example"
version = "0.2.0"
edition = "2021"
edition = "2024"

[dependencies]
datafusion-catalog = { version = "52" , default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion examples/datafusion-ffi-example/src/aggregate_udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use datafusion_expr::{Accumulator, AggregateUDF, AggregateUDFImpl, Signature};
use datafusion_ffi::udaf::FFI_AggregateUDF;
use datafusion_functions_aggregate::sum::Sum;
use pyo3::types::PyCapsule;
use pyo3::{pyclass, pymethods, Bound, PyResult, Python};
use pyo3::{Bound, PyResult, Python, pyclass, pymethods};

#[pyclass(name = "MySumUDF", module = "datafusion_ffi_example", subclass)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
Expand Down
2 changes: 1 addition & 1 deletion examples/datafusion-ffi-example/src/catalog_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use datafusion_common::error::{DataFusionError, Result};
use datafusion_ffi::catalog_provider::FFI_CatalogProvider;
use datafusion_ffi::schema_provider::FFI_SchemaProvider;
use pyo3::types::PyCapsule;
use pyo3::{pyclass, pymethods, Bound, PyAny, PyResult, Python};
use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods};

use crate::utils::ffi_logical_codec_from_pycapsule;

Expand Down
4 changes: 2 additions & 2 deletions examples/datafusion-ffi-example/src/scalar_udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ use std::sync::Arc;

use arrow_array::{Array, BooleanArray};
use arrow_schema::DataType;
use datafusion_common::error::Result as DataFusionResult;
use datafusion_common::ScalarValue;
use datafusion_common::error::Result as DataFusionResult;
use datafusion_expr::{
ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, TypeSignature,
Volatility,
};
use datafusion_ffi::udf::FFI_ScalarUDF;
use pyo3::types::PyCapsule;
use pyo3::{pyclass, pymethods, Bound, PyResult, Python};
use pyo3::{Bound, PyResult, Python, pyclass, pymethods};

#[pyclass(name = "IsNullUDF", module = "datafusion_ffi_example", subclass)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
Expand Down
2 changes: 1 addition & 1 deletion examples/datafusion-ffi-example/src/table_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use datafusion_common::error::Result as DataFusionResult;
use datafusion_expr::Expr;
use datafusion_ffi::udtf::FFI_TableFunction;
use pyo3::types::PyCapsule;
use pyo3::{pyclass, pymethods, Bound, PyAny, PyResult, Python};
use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods};

use crate::table_provider::MyTableProvider;
use crate::utils::ffi_logical_codec_from_pycapsule;
Expand Down
2 changes: 1 addition & 1 deletion examples/datafusion-ffi-example/src/table_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use datafusion_common::error::{DataFusionError, Result as DataFusionResult};
use datafusion_ffi::table_provider::FFI_TableProvider;
use pyo3::exceptions::PyRuntimeError;
use pyo3::types::PyCapsule;
use pyo3::{pyclass, pymethods, Bound, PyAny, PyResult, Python};
use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods};

use crate::utils::ffi_logical_codec_from_pycapsule;

Expand Down
2 changes: 1 addition & 1 deletion examples/datafusion-ffi-example/src/window_udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use datafusion_expr::{PartitionEvaluator, Signature, WindowUDF, WindowUDFImpl};
use datafusion_ffi::udwf::FFI_WindowUDF;
use datafusion_functions_window::rank::rank_udwf;
use pyo3::types::PyCapsule;
use pyo3::{pyclass, pymethods, Bound, PyResult, Python};
use pyo3::{Bound, PyResult, Python, pyclass, pymethods};

#[pyclass(name = "MyRankUDF", module = "datafusion_ffi_example", subclass)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
Expand Down
2 changes: 1 addition & 1 deletion src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
use arrow::pyarrow::ToPyArrow;
use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods};
use pyo3::types::PyCapsule;
use pyo3::{pyclass, pymethods, Bound, PyAny, PyResult, Python};
use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods};

use crate::errors::PyDataFusionResult;
use crate::utils::validate_pycapsule;
Expand Down
4 changes: 2 additions & 2 deletions src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ use datafusion::common::DataFusionError;
use datafusion::datasource::TableProvider;
use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
use datafusion_ffi::schema_provider::FFI_SchemaProvider;
use pyo3::IntoPyObjectExt;
use pyo3::exceptions::PyKeyError;
use pyo3::prelude::*;
use pyo3::types::PyCapsule;
use pyo3::IntoPyObjectExt;

use crate::dataset::Dataset;
use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionError, PyDataFusionResult};
use crate::errors::{PyDataFusionError, PyDataFusionResult, py_datafusion_err, to_datafusion_err};
use crate::table::PyTable;
use crate::utils::{
create_logical_extension_capsule, extract_logical_extension_codec, validate_pycapsule,
Expand Down
16 changes: 8 additions & 8 deletions src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef};
use datafusion::arrow::pyarrow::PyArrowType;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::catalog::CatalogProvider;
use datafusion::common::{exec_err, ScalarValue, TableReference};
use datafusion::common::{ScalarValue, TableReference, exec_err};
use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
use datafusion::datasource::file_format::parquet::ParquetFormat;
use datafusion::datasource::listing::{
ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
};
use datafusion::datasource::{MemTable, TableProvider};
use datafusion::execution::TaskContextProvider;
use datafusion::execution::context::{
DataFilePaths, SQLOptions, SessionConfig, SessionContext, TaskContext,
};
Expand All @@ -42,7 +43,6 @@ use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, Unboun
use datafusion::execution::options::ReadOptions;
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
use datafusion::execution::session_state::SessionStateBuilder;
use datafusion::execution::TaskContextProvider;
use datafusion::prelude::{
AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions,
};
Expand All @@ -51,18 +51,18 @@ use datafusion_ffi::execution::FFI_TaskContextProvider;
use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec;
use object_store::ObjectStore;
use pyo3::IntoPyObjectExt;
use pyo3::exceptions::{PyKeyError, PyValueError};
use pyo3::prelude::*;
use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple};
use pyo3::IntoPyObjectExt;
use url::Url;
use uuid::Uuid;

use crate::catalog::{PyCatalog, RustWrappedPyCatalogProvider};
use crate::common::data_type::PyScalarValue;
use crate::dataframe::PyDataFrame;
use crate::dataset::Dataset;
use crate::errors::{py_datafusion_err, PyDataFusionError, PyDataFusionResult};
use crate::errors::{PyDataFusionError, PyDataFusionResult, py_datafusion_err};
use crate::expr::sort_expr::PySortExpr;
use crate::options::PyCsvReadOptions;
use crate::physical_plan::PyExecutionPlan;
Expand Down Expand Up @@ -899,10 +899,10 @@ impl PySessionContext {
match res {
Ok(df) => Ok(PyDataFrame::new(df)),
Err(e) => {
if let datafusion::error::DataFusionError::Plan(msg) = &e {
if msg.contains("No table named") {
return Err(PyKeyError::new_err(msg.to_string()));
}
if let datafusion::error::DataFusionError::Plan(msg) = &e
&& msg.contains("No table named")
{
return Err(PyKeyError::new_err(msg.to_string()));
}
Err(py_datafusion_err(e))
}
Expand Down
17 changes: 10 additions & 7 deletions src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::ffi::{CStr, CString};
use std::str::FromStr;
use std::sync::Arc;

use arrow::array::{new_null_array, Array, ArrayRef, RecordBatch, RecordBatchReader};
use arrow::array::{Array, ArrayRef, RecordBatch, RecordBatchReader, new_null_array};
use arrow::compute::can_cast_types;
use arrow::error::ArrowError;
use arrow::ffi::FFI_ArrowSchema;
Expand All @@ -36,23 +36,23 @@ use datafusion::config::{CsvOptions, ParquetColumnOptions, ParquetOptions, Table
use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
use datafusion::error::DataFusionError;
use datafusion::execution::SendableRecordBatchStream;
use datafusion::logical_expr::dml::InsertOp;
use datafusion::logical_expr::SortExpr;
use datafusion::logical_expr::dml::InsertOp;
use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel};
use datafusion::prelude::*;
use futures::{StreamExt, TryStreamExt};
use parking_lot::Mutex;
use pyo3::PyErr;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::pybacked::PyBackedStr;
use pyo3::types::{PyCapsule, PyList, PyTuple, PyTupleMethods};
use pyo3::PyErr;

use crate::errors::{py_datafusion_err, PyDataFusionError, PyDataFusionResult};
use crate::expr::sort_expr::{to_sort_expressions, PySortExpr};
use crate::errors::{PyDataFusionError, PyDataFusionResult, py_datafusion_err};
use crate::expr::PyExpr;
use crate::expr::sort_expr::{PySortExpr, to_sort_expressions};
use crate::physical_plan::PyExecutionPlan;
use crate::record_batch::{poll_next_batch, PyRecordBatchStream};
use crate::record_batch::{PyRecordBatchStream, poll_next_batch};
use crate::sql::logical::PyLogicalPlan;
use crate::table::{PyTable, TempViewTable};
use crate::utils::{
Expand Down Expand Up @@ -1328,7 +1328,10 @@ fn record_batch_into_schema(
} else if field.is_nullable() {
data_arrays.push(new_null_array(desired_data_type, array_size));
} else {
return Err(ArrowError::CastError(format!("Attempting to cast to non-nullable and non-castable field {} during schema projection.", field.name())));
return Err(ArrowError::CastError(format!(
"Attempting to cast to non-nullable and non-castable field {} during schema projection.",
field.name()
)));
}
} else {
if !field.is_nullable() {
Expand Down
4 changes: 2 additions & 2 deletions src/dataset_exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@ use datafusion::arrow::pyarrow::PyArrowType;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::error::{DataFusionError as InnerDataFusionError, Result as DFResult};
use datafusion::execution::context::TaskContext;
use datafusion::logical_expr::utils::conjunction;
use datafusion::logical_expr::Expr;
use datafusion::logical_expr::utils::conjunction;
use datafusion::physical_expr::{EquivalenceProperties, LexOrdering};
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning,
SendableRecordBatchStream, Statistics,
};
use futures::{stream, TryStreamExt};
use futures::{TryStreamExt, stream};
/// Implements a Datafusion physical ExecutionPlan that delegates to a PyArrow Dataset
/// This actually performs the projection, filtering and scanning of a Dataset
use pyo3::prelude::*;
Expand Down
2 changes: 1 addition & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ use std::fmt::Debug;
use datafusion::arrow::error::ArrowError;
use datafusion::error::DataFusionError as InnerDataFusionError;
use prost::EncodeError;
use pyo3::exceptions::PyException;
use pyo3::PyErr;
use pyo3::exceptions::PyException;

pub type PyDataFusionResult<T> = std::result::Result<T, PyDataFusionError>;

Expand Down
48 changes: 30 additions & 18 deletions src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ use datafusion::logical_expr::expr::{
};
use datafusion::logical_expr::utils::exprlist_to_fields;
use datafusion::logical_expr::{
col, lit, lit_with_metadata, Between, BinaryExpr, Case, Cast, Expr, ExprFuncBuilder,
ExprFunctionExt, Like, LogicalPlan, Operator, TryCast, WindowFunctionDefinition,
Between, BinaryExpr, Case, Cast, Expr, ExprFuncBuilder, ExprFunctionExt, Like, LogicalPlan,
Operator, TryCast, WindowFunctionDefinition, col, lit, lit_with_metadata,
};
use pyo3::IntoPyObjectExt;
use pyo3::basic::CompareOp;
use pyo3::prelude::*;
use pyo3::IntoPyObjectExt;
use window::PyWindowFrame;

use self::alias::PyAlias;
Expand All @@ -44,7 +44,7 @@ use self::bool_expr::{
use self::like::{PyILike, PyLike, PySimilarTo};
use self::scalar_variable::PyScalarVariable;
use crate::common::data_type::{DataTypeMap, NullTreatment, PyScalarValue, RexType};
use crate::errors::{py_runtime_err, py_type_err, py_unsupported_variant_err, PyDataFusionResult};
use crate::errors::{PyDataFusionResult, py_runtime_err, py_type_err, py_unsupported_variant_err};
use crate::expr::aggregate_expr::PyAggregateFunction;
use crate::expr::binary_expr::PyBinaryExpr;
use crate::expr::column::PyColumn;
Expand Down Expand Up @@ -111,7 +111,7 @@ pub mod unnest_expr;
pub mod values;
pub mod window;

use sort_expr::{to_sort_expressions, PySortExpr};
use sort_expr::{PySortExpr, to_sort_expressions};

/// A PyExpr that can be used on a DataFrame
#[pyclass(frozen, name = "RawExpr", module = "datafusion.expr", subclass)]
Expand Down Expand Up @@ -141,15 +141,18 @@ pub fn py_expr_list(expr: &[Expr]) -> PyResult<Vec<PyExpr>> {
impl PyExpr {
/// Return the specific expression
fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
Python::attach(|_| {
match &self.expr {
Python::attach(|_| match &self.expr {
Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_bound_py_any(py)?),
Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_bound_py_any(py)?),
Expr::ScalarVariable(field, variables) => {
Ok(PyScalarVariable::new(field, variables).into_bound_py_any(py)?)
}
Expr::Like(value) => Ok(PyLike::from(value.clone()).into_bound_py_any(py)?),
Expr::Literal(value, metadata) => Ok(PyLiteral::new_with_metadata(value.clone(), metadata.clone()).into_bound_py_any(py)?),
Expr::Literal(value, metadata) => Ok(PyLiteral::new_with_metadata(
value.clone(),
metadata.clone(),
)
.into_bound_py_any(py)?),
Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_bound_py_any(py)?),
Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_bound_py_any(py)?),
Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_bound_py_any(py)?),
Expand All @@ -159,13 +162,17 @@ impl PyExpr {
Expr::IsUnknown(expr) => Ok(PyIsUnknown::new(*expr.clone()).into_bound_py_any(py)?),
Expr::IsNotTrue(expr) => Ok(PyIsNotTrue::new(*expr.clone()).into_bound_py_any(py)?),
Expr::IsNotFalse(expr) => Ok(PyIsNotFalse::new(*expr.clone()).into_bound_py_any(py)?),
Expr::IsNotUnknown(expr) => Ok(PyIsNotUnknown::new(*expr.clone()).into_bound_py_any(py)?),
Expr::IsNotUnknown(expr) => {
Ok(PyIsNotUnknown::new(*expr.clone()).into_bound_py_any(py)?)
}
Expr::Negative(expr) => Ok(PyNegative::new(*expr.clone()).into_bound_py_any(py)?),
Expr::AggregateFunction(expr) => {
Ok(PyAggregateFunction::from(expr.clone()).into_bound_py_any(py)?)
}
Expr::SimilarTo(value) => Ok(PySimilarTo::from(value.clone()).into_bound_py_any(py)?),
Expr::Between(value) => Ok(between::PyBetween::from(value.clone()).into_bound_py_any(py)?),
Expr::Between(value) => {
Ok(between::PyBetween::from(value.clone()).into_bound_py_any(py)?)
}
Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_bound_py_any(py)?),
Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_bound_py_any(py)?),
Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_bound_py_any(py)?),
Expand All @@ -175,7 +182,9 @@ impl PyExpr {
Expr::WindowFunction(value) => Err(py_unsupported_variant_err(format!(
"Converting Expr::WindowFunction to a Python object is not implemented: {value:?}"
))),
Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_bound_py_any(py)?),
Expr::InList(value) => {
Ok(in_list::PyInList::from(value.clone()).into_bound_py_any(py)?)
}
Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_bound_py_any(py)?),
Expr::InSubquery(value) => {
Ok(in_subquery::PyInSubquery::from(value.clone()).into_bound_py_any(py)?)
Expand All @@ -193,11 +202,14 @@ impl PyExpr {
Expr::Placeholder(value) => {
Ok(placeholder::PyPlaceholder::from(value.clone()).into_bound_py_any(py)?)
}
Expr::OuterReferenceColumn(data_type, column) => Err(py_unsupported_variant_err(format!(
"Converting Expr::OuterReferenceColumn to a Python object is not implemented: {data_type:?} - {column:?}"
))),
Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_bound_py_any(py)?),
}
Expr::OuterReferenceColumn(data_type, column) => {
Err(py_unsupported_variant_err(format!(
"Converting Expr::OuterReferenceColumn to a Python object is not implemented: {data_type:?} - {column:?}"
)))
}
Expr::Unnest(value) => {
Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_bound_py_any(py)?)
}
})
}

Expand Down Expand Up @@ -368,7 +380,7 @@ impl PyExpr {
Expr::ScalarSubquery(..) => RexType::ScalarSubquery,
#[allow(deprecated)]
Expr::Wildcard { .. } => {
return Err(py_unsupported_variant_err("Expr::Wildcard is unsupported"))
return Err(py_unsupported_variant_err("Expr::Wildcard is unsupported"));
}
})
}
Expand Down Expand Up @@ -555,7 +567,7 @@ impl PyExpr {
return Err(py_type_err(format!(
"Catch all triggered in get_operator_name: {:?}",
&self.expr
)))
)));
}
})
}
Expand Down
4 changes: 2 additions & 2 deletions src/expr/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
use std::fmt::{self, Display, Formatter};

use datafusion::common::DataFusionError;
use datafusion::logical_expr::Expr;
use datafusion::logical_expr::expr::{AggregateFunction, AggregateFunctionParams, Alias};
use datafusion::logical_expr::logical_plan::Aggregate;
use datafusion::logical_expr::Expr;
use pyo3::prelude::*;
use pyo3::IntoPyObjectExt;
use pyo3::prelude::*;

use super::logical_node::LogicalNode;
use crate::common::df_schema::PyDFSchema;
Expand Down
2 changes: 1 addition & 1 deletion src/expr/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
use std::fmt::{self, Display, Formatter};

use datafusion::logical_expr::logical_plan::Analyze;
use pyo3::prelude::*;
use pyo3::IntoPyObjectExt;
use pyo3::prelude::*;

use super::logical_node::LogicalNode;
use crate::common::df_schema::PyDFSchema;
Expand Down
Loading