From 3941abad9575d41cede0431fa167cf78a5ae48b7 Mon Sep 17 00:00:00 2001 From: Qi Zhu <821684824@qq.com> Date: Wed, 11 Feb 2026 22:07:34 +0800 Subject: [PATCH] retry --- datafusion-cli/src/functions.rs | 4 +- datafusion/catalog-listing/src/helpers.rs | 16 +-- datafusion/common/src/column.rs | 44 ++++-- datafusion/common/src/dfschema.rs | 11 +- datafusion/core/src/dataframe/mod.rs | 4 +- datafusion/core/src/physical_planner.rs | 16 ++- .../tests/dataframe/dataframe_functions.rs | 8 +- datafusion/core/tests/dataframe/mod.rs | 2 +- .../core/tests/user_defined/expr_planner.rs | 4 +- datafusion/expr/src/expr.rs | 128 +++++++++--------- datafusion/expr/src/expr_fn.rs | 5 +- datafusion/expr/src/expr_rewriter/mod.rs | 19 +-- datafusion/expr/src/expr_rewriter/order_by.rs | 5 +- datafusion/expr/src/expr_schema.rs | 40 +++--- datafusion/expr/src/lib.rs | 2 +- datafusion/expr/src/logical_plan/builder.rs | 29 ++-- datafusion/expr/src/logical_plan/plan.rs | 16 ++- datafusion/expr/src/logical_plan/tree_node.rs | 2 +- datafusion/expr/src/tree_node.rs | 27 ++-- datafusion/expr/src/utils.rs | 16 +-- .../src/analyzer/resolve_grouping_function.rs | 6 +- .../optimizer/src/analyzer/type_coercion.rs | 20 ++- .../optimizer/src/common_subexpr_eliminate.rs | 5 +- datafusion/optimizer/src/decorrelate.rs | 19 ++- .../optimizer/src/optimize_projections/mod.rs | 42 +++--- datafusion/optimizer/src/push_down_filter.rs | 2 +- .../optimizer/src/rewrite_set_comparison.rs | 11 +- .../simplify_expressions/expr_simplifier.rs | 2 +- datafusion/physical-expr/src/planner.rs | 14 +- .../proto/src/logical_plan/from_proto.rs | 4 +- datafusion/proto/src/logical_plan/to_proto.rs | 21 ++- datafusion/sql/src/expr/identifier.rs | 19 +-- datafusion/sql/src/planner.rs | 2 +- datafusion/sql/src/select.rs | 6 +- datafusion/sql/src/unparser/expr.rs | 12 +- datafusion/sql/src/unparser/plan.rs | 33 ++--- datafusion/sql/src/unparser/rewrite.rs | 6 +- datafusion/sql/src/utils.rs | 16 +-- datafusion/sqllogictest/test_files/joins.slt | 18 +-- datafusion/sqllogictest/test_files/unnest.slt | 8 +- .../src/logical_plan/producer/expr/mod.rs | 2 +- .../producer/rel/aggregate_rel.rs | 5 +- 42 files changed, 348 insertions(+), 323 deletions(-) diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs index cef057545c113..72b5ede7ee8ea 100644 --- a/datafusion-cli/src/functions.rs +++ b/datafusion-cli/src/functions.rs @@ -32,7 +32,7 @@ use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef, TimeUnit}; use arrow::record_batch::RecordBatch; use arrow::util::pretty::pretty_format_batches; use datafusion::catalog::{Session, TableFunctionImpl}; -use datafusion::common::{Column, plan_err}; +use datafusion::common::plan_err; use datafusion::datasource::TableProvider; use datafusion::datasource::memory::MemorySourceConfig; use datafusion::error::Result; @@ -329,7 +329,7 @@ impl TableFunctionImpl for ParquetMetadataFunc { fn call(&self, exprs: &[Expr]) -> Result> { let filename = match exprs.first() { Some(Expr::Literal(ScalarValue::Utf8(Some(s)), _)) => s, // single quote: parquet_metadata('x.parquet') - Some(Expr::Column(Column { name, .. })) => name, // double quote: parquet_metadata("x.parquet") + Some(Expr::Column(col)) => &col.name, // double quote: parquet_metadata("x.parquet") _ => { return plan_err!( "parquet_metadata requires string argument as its input" diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index 031b2ebfb8109..bc11ef9f612b7 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -36,8 +36,8 @@ use futures::stream::FuturesUnordered; use futures::{StreamExt, TryStreamExt, stream::BoxStream}; use log::{debug, trace}; +use datafusion_common::DFSchema; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_common::{Column, DFSchema}; use datafusion_expr::{Expr, Volatility}; use datafusion_physical_expr::create_physical_expr; use object_store::path::Path; @@ -51,8 +51,8 @@ use object_store::{ObjectMeta, ObjectStore}; pub fn expr_applicable_for_cols(col_names: &[&str], expr: &Expr) -> bool { let mut is_applicable = true; expr.apply(|expr| match expr { - Expr::Column(Column { name, .. }) => { - is_applicable &= col_names.contains(&name.as_str()); + Expr::Column(col) => { + is_applicable &= col_names.contains(&col.name.as_str()); if is_applicable { Ok(TreeNodeRecursion::Jump) } else { @@ -61,7 +61,7 @@ pub fn expr_applicable_for_cols(col_names: &[&str], expr: &Expr) -> bool { } Expr::Literal(_, _) | Expr::Alias(_) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::ScalarVariable(_, _) | Expr::Not(_) | Expr::IsNotNull(_) @@ -251,13 +251,13 @@ fn populate_partition_values<'a>( if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = filter { match op { Operator::Eq => match (left.as_ref(), right.as_ref()) { - (Expr::Column(Column { name, .. }), Expr::Literal(val, _)) - | (Expr::Literal(val, _), Expr::Column(Column { name, .. })) => { + (Expr::Column(col), Expr::Literal(val, _)) + | (Expr::Literal(val, _), Expr::Column(col)) => { if partition_values - .insert(name, PartitionValue::Single(val.to_string())) + .insert(&col.name, PartitionValue::Single(val.to_string())) .is_some() { - partition_values.insert(name, PartitionValue::Multi); + partition_values.insert(&col.name, PartitionValue::Multi); } } _ => {} diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index c7f0b5a4f4881..bc4dee93bbd6b 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -29,7 +29,12 @@ use std::fmt; #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Column { /// relation/table reference. - pub relation: Option, + /// + /// Boxed to reduce the size of the `Column` struct (and thus `Expr` enum). + /// `TableReference` is ~56 bytes but `Box` is only 8 bytes. + /// This keeps `Column` small enough to be stored inline in `Expr` without + /// boxing the entire `Column`, avoiding heap allocation on the hottest path. + pub relation: Option>, /// field/column name. pub name: String, /// Original source code location, if known @@ -57,7 +62,7 @@ impl Column { name: impl Into, ) -> Self { Self { - relation: relation.map(|r| r.into()), + relation: relation.map(|r| Box::new(r.into())), name: name.into(), spans: Spans::new(), } @@ -91,24 +96,24 @@ impl Column { let (relation, name) = match idents.len() { 1 => (None, idents.remove(0)), 2 => ( - Some(TableReference::Bare { + Some(Box::new(TableReference::Bare { table: idents.remove(0).into(), - }), + })), idents.remove(0), ), 3 => ( - Some(TableReference::Partial { + Some(Box::new(TableReference::Partial { schema: idents.remove(0).into(), table: idents.remove(0).into(), - }), + })), idents.remove(0), ), 4 => ( - Some(TableReference::Full { + Some(Box::new(TableReference::Full { catalog: idents.remove(0).into(), schema: idents.remove(0).into(), table: idents.remove(0).into(), - }), + })), idents.remove(0), ), // any expression that failed to parse or has more than 4 period delimited @@ -321,7 +326,7 @@ impl Column { /// Qualifies the column with the given table reference. pub fn with_relation(&self, relation: TableReference) -> Self { Self { - relation: Some(relation), + relation: Some(Box::new(relation)), ..self.clone() } } @@ -350,14 +355,22 @@ impl From for Column { /// Create a column, use qualifier and field name impl From<(Option<&TableReference>, &Field)> for Column { fn from((relation, field): (Option<&TableReference>, &Field)) -> Self { - Self::new(relation.cloned(), field.name()) + Self { + relation: relation.map(|r| Box::new(r.clone())), + name: field.name().to_string(), + spans: Spans::new(), + } } } /// Create a column, use qualifier and field name impl From<(Option<&TableReference>, &FieldRef)> for Column { fn from((relation, field): (Option<&TableReference>, &FieldRef)) -> Self { - Self::new(relation.cloned(), field.name()) + Self { + relation: relation.map(|r| Box::new(r.clone())), + name: field.name().to_string(), + spans: Spans::new(), + } } } @@ -380,8 +393,17 @@ impl fmt::Display for Column { mod tests { use super::*; use arrow::datatypes::{DataType, SchemaBuilder}; + use std::mem::size_of; use std::sync::Arc; + #[test] + fn test_column_size() { + // Column should be small enough to fit inline in Expr without boxing. + // Boxing TableReference (56 bytes -> 8 bytes pointer) shrinks Column + // from 104 to 56 bytes, keeping it inline in Expr. + assert_eq!(size_of::(), 56); + } + fn create_qualified_schema(qualifier: &str, names: Vec<&str>) -> Result { let mut schema_builder = SchemaBuilder::new(); schema_builder.extend( diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index de0aacf9e8bcd..548fd1361f856 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -399,7 +399,7 @@ impl DFSchema { /// See [Self::index_of_column] for a version that returns an error if the /// column is not found pub fn maybe_index_of_column(&self, col: &Column) -> Option { - self.index_of_column_by_name(col.relation.as_ref(), &col.name) + self.index_of_column_by_name(col.relation.as_deref(), &col.name) } /// Find the index of the column with the given qualifier and name, @@ -408,13 +408,14 @@ impl DFSchema { /// See [Self::maybe_index_of_column] for a version that returns `None` if /// the column is not found pub fn index_of_column(&self, col: &Column) -> Result { - self.maybe_index_of_column(col) - .ok_or_else(|| field_not_found(col.relation.clone(), &col.name, self)) + self.maybe_index_of_column(col).ok_or_else(|| { + field_not_found(col.relation.clone().map(|r| *r), &col.name, self) + }) } /// Check if the column is in the current schema pub fn is_column_from_schema(&self, col: &Column) -> bool { - self.index_of_column_by_name(col.relation.as_ref(), &col.name) + self.index_of_column_by_name(col.relation.as_deref(), &col.name) .is_some() } @@ -557,7 +558,7 @@ impl DFSchema { &self, column: &Column, ) -> Result<(Option<&TableReference>, &FieldRef)> { - self.qualified_field_with_name(column.relation.as_ref(), &column.name) + self.qualified_field_with_name(column.relation.as_deref(), &column.name) } /// Find if the field exists with the given name diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index fadc6ad792556..9c81215f40366 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -2466,7 +2466,7 @@ impl DataFrame { if cols.contains(field) { // Try to cast fill value to column type. If the cast fails, fallback to the original column. match value.clone().cast_to(field.data_type()) { - Ok(fill_value) => Expr::Alias(Alias { + Ok(fill_value) => Expr::Alias(Box::new(Alias { expr: Box::new(Expr::ScalarFunction(ScalarFunction { func: coalesce(), args: vec![col(field.name()), lit(fill_value)], @@ -2474,7 +2474,7 @@ impl DataFrame { relation: None, name: field.name().to_string(), metadata: None, - }), + })), Err(_) => col(field.name()), } } else { diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 6765b7f79fdd2..ce5aa710f4dd7 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -79,7 +79,7 @@ use datafusion_datasource::file_groups::FileGroup; use datafusion_datasource::memory::MemorySourceConfig; use datafusion_expr::dml::{CopyTo, InsertOp}; use datafusion_expr::expr::{ - AggregateFunction, AggregateFunctionParams, Alias, GroupingSet, NullTreatment, + AggregateFunction, AggregateFunctionParams, GroupingSet, NullTreatment, WindowFunction, WindowFunctionParams, physical_name, }; use datafusion_expr::expr_rewriter::unnormalize_cols; @@ -719,9 +719,9 @@ impl DefaultPhysicalPlanner { } = &window_fun.as_ref().params; generate_sort_key(partition_by, order_by) } - Expr::Alias(Alias { expr, .. }) => { + Expr::Alias(alias) => { // Convert &Box to &T - match &**expr { + match alias.expr.as_ref() { Expr::WindowFunction(window_fun) => { let WindowFunctionParams { partition_by, @@ -2151,7 +2151,7 @@ pub fn create_window_expr( ) -> Result> { // unpack aliased logical expressions, e.g. "sum(col) over () as total" let (name, e) = match e { - Expr::Alias(Alias { expr, name, .. }) => (name.clone(), expr.as_ref()), + Expr::Alias(alias) => (alias.name.clone(), alias.expr.as_ref()), _ => (e.schema_name().to_string(), e), }; create_window_expr_with_name(e, name, logical_schema, execution_props) @@ -2244,9 +2244,13 @@ pub fn create_aggregate_expr_and_maybe_filter( // Some functions like `count_all()` create internal aliases, // Unwrap all alias layers to get to the underlying aggregate function let (name, human_display, e) = match e { - Expr::Alias(Alias { name, .. }) => { + Expr::Alias(alias) => { let unaliased = e.clone().unalias_nested().data; - (Some(name.clone()), e.human_display().to_string(), unaliased) + ( + Some(alias.name.clone()), + e.human_display().to_string(), + unaliased, + ) } Expr::AggregateFunction(_) => ( Some(e.schema_name().to_string()), diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs index 014f356cd64cd..5a5d9f546c43e 100644 --- a/datafusion/core/tests/dataframe/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe/dataframe_functions.rs @@ -442,11 +442,11 @@ async fn test_fn_approx_percentile_cont() -> Result<()> { "); // the arg2 parameter is a complex expr, but it can be evaluated to the literal value - let alias_expr = Expr::Alias(Alias::new( + let alias_expr = Expr::Alias(Box::new(Alias::new( cast(lit(0.5), DataType::Float32), None::<&str>, "arg_2".to_string(), - )); + ))); let expr = approx_percentile_cont(col("b").sort(true, false), alias_expr, None); let df = create_test_table().await?; let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?; @@ -462,11 +462,11 @@ async fn test_fn_approx_percentile_cont() -> Result<()> { " ); - let alias_expr = Expr::Alias(Alias::new( + let alias_expr = Expr::Alias(Box::new(Alias::new( cast(lit(0.1), DataType::Float32), None::<&str>, "arg_2".to_string(), - )); + ))); let expr = approx_percentile_cont(col("b").sort(false, false), alias_expr, None); let df = create_test_table().await?; let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?; diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index bab00ced1cb13..ba6937b1d4150 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -6163,7 +6163,7 @@ async fn test_alias() -> Result<()> { .alias("table_alias")?; // All output column qualifiers are changed to "table_alias" df.schema().columns().iter().for_each(|c| { - assert_eq!(c.relation, Some("table_alias".into())); + assert_eq!(c.relation, Some(Box::new("table_alias".into()))); }); let plan = df diff --git a/datafusion/core/tests/user_defined/expr_planner.rs b/datafusion/core/tests/user_defined/expr_planner.rs index c5e5af731359f..09d59595c366b 100644 --- a/datafusion/core/tests/user_defined/expr_planner.rs +++ b/datafusion/core/tests/user_defined/expr_planner.rs @@ -55,11 +55,11 @@ impl ExprPlanner for MyCustomPlanner { }))) } BinaryOperator::Question => { - Ok(PlannerResult::Planned(Expr::Alias(Alias::new( + Ok(PlannerResult::Planned(Expr::Alias(Box::new(Alias::new( Expr::Literal(ScalarValue::Boolean(Some(true)), None), None::<&str>, format!("{} ? {}", expr.left, expr.right), - )))) + ))))) } _ => Ok(PlannerResult::Original(expr)), } diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 87e8e029a6ee5..65d785f758905 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -314,7 +314,7 @@ impl From for NullTreatment { #[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)] pub enum Expr { /// An expression with a specific name. - Alias(Alias), + Alias(Box), /// A named reference to a qualified field in a schema. Column(Column), /// A named reference to a variable in a registry. @@ -399,7 +399,7 @@ pub enum Expr { Placeholder(Placeholder), /// A placeholder which holds a reference to a qualified field /// in the outer query, used for correlated sub queries. - OuterReferenceColumn(FieldRef, Column), + OuterReferenceColumn(Box), /// Unnest expression Unnest(Unnest), } @@ -598,6 +598,23 @@ impl Alias { } } +/// Outer reference expression for correlated sub queries. +/// +/// This holds a reference to a qualified field in the outer query. +#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +pub struct OuterReference { + /// The field reference from the outer query + pub field: FieldRef, + /// The column reference + pub column: Column, +} + +impl OuterReference { + pub fn new(field: FieldRef, column: Column) -> Self { + Self { field, column } + } +} + /// Binary expression #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] pub struct BinaryExpr { @@ -1528,12 +1545,10 @@ impl Expr { /// output schema. We can use this qualified name to reference the field. pub fn qualified_name(&self) -> (Option, String) { match self { - Expr::Column(Column { - relation, - name, - spans: _, - }) => (relation.clone(), name.clone()), - Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()), + Expr::Column(col) => (col.relation.as_deref().cloned(), col.name.clone()), + Expr::Alias(boxed_alias) => { + (boxed_alias.relation.clone(), boxed_alias.name.clone()) + } _ => (None, self.schema_name().to_string()), } } @@ -1567,7 +1582,7 @@ impl Expr { Expr::Case { .. } => "Case", Expr::Cast { .. } => "Cast", Expr::Column(..) => "Column", - Expr::OuterReferenceColumn(_, _) => "Outer", + Expr::OuterReferenceColumn(..) => "Outer", Expr::Exists { .. } => "Exists", Expr::GroupingSet(..) => "GroupingSet", Expr::InList { .. } => "InList", @@ -1694,7 +1709,7 @@ impl Expr { /// Return `self AS name` alias expression pub fn alias(self, name: impl Into) -> Expr { - Expr::Alias(Alias::new(self, None::<&str>, name.into())) + Expr::Alias(Box::new(Alias::new(self, None::<&str>, name.into()))) } /// Return `self AS name` alias expression with metadata @@ -1716,7 +1731,9 @@ impl Expr { name: impl Into, metadata: Option, ) -> Expr { - Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata)) + Expr::Alias(Box::new( + Alias::new(self, None::<&str>, name.into()).with_metadata(metadata), + )) } /// Return `self AS name` alias expression with a specific qualifier @@ -1725,7 +1742,7 @@ impl Expr { relation: Option>, name: impl Into, ) -> Expr { - Expr::Alias(Alias::new(self, relation, name.into())) + Expr::Alias(Box::new(Alias::new(self, relation, name.into()))) } /// Return `self AS name` alias expression with a specific qualifier and metadata @@ -1749,7 +1766,9 @@ impl Expr { name: impl Into, metadata: Option, ) -> Expr { - Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata)) + Expr::Alias(Box::new( + Alias::new(self, relation, name.into()).with_metadata(metadata), + )) } /// Remove an alias from an expression if one exists. @@ -1774,7 +1793,7 @@ impl Expr { /// ``` pub fn unalias(self) -> Expr { match self { - Expr::Alias(alias) => *alias.expr, + Expr::Alias(boxed_alias) => *boxed_alias.expr, _ => self, } } @@ -1817,15 +1836,15 @@ impl Expr { |expr| { // f_up: unalias on up so we can remove nested aliases like // `(x as foo) as bar` - if let Expr::Alias(alias) = expr { - match alias + if let Expr::Alias(boxed_alias) = expr { + match boxed_alias .metadata .as_ref() .map(|h| h.is_empty()) .unwrap_or(true) { - true => Ok(Transformed::yes(*alias.expr)), - false => Ok(Transformed::no(Expr::Alias(alias))), + true => Ok(Transformed::yes(*boxed_alias.expr)), + false => Ok(Transformed::no(Expr::Alias(boxed_alias))), } } else { Ok(Transformed::no(expr)) @@ -2143,7 +2162,7 @@ impl Expr { | Expr::SimilarTo(..) | Expr::Not(..) | Expr::Negative(..) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(..) | Expr::TryCast(..) | Expr::Unnest(..) | Expr::Wildcard { .. } @@ -2158,7 +2177,7 @@ impl Expr { /// type doesn't support tracking locations yet. pub fn spans(&self) -> Option<&Spans> { match self { - Expr::Column(col) => Some(&col.spans), + Expr::Column(boxed_col) => Some(&boxed_col.spans), _ => None, } } @@ -2231,23 +2250,10 @@ impl NormalizeEq for Expr { && self_right.normalize_eq(other_right) } } - ( - Expr::Alias(Alias { - expr: self_expr, - relation: self_relation, - name: self_name, - .. - }), - Expr::Alias(Alias { - expr: other_expr, - relation: other_relation, - name: other_name, - .. - }), - ) => { - self_name == other_name - && self_relation == other_relation - && self_expr.normalize_eq(other_expr) + (Expr::Alias(self_alias), Expr::Alias(other_alias)) => { + self_alias.name == other_alias.name + && self_alias.relation == other_alias.relation + && self_alias.expr.normalize_eq(&other_alias.expr) } ( Expr::Like(Like { @@ -2588,17 +2594,12 @@ impl HashNode for Expr { fn hash_node(&self, state: &mut H) { mem::discriminant(self).hash(state); match self { - Expr::Alias(Alias { - expr: _expr, - relation, - name, - .. - }) => { - relation.hash(state); - name.hash(state); + Expr::Alias(boxed_alias) => { + boxed_alias.relation.hash(state); + boxed_alias.name.hash(state); } - Expr::Column(column) => { - column.hash(state); + Expr::Column(boxed_col) => { + boxed_col.hash(state); } Expr::ScalarVariable(field, name) => { field.hash(state); @@ -2750,9 +2751,9 @@ impl HashNode for Expr { Expr::Placeholder(place_holder) => { place_holder.hash(state); } - Expr::OuterReferenceColumn(field, column) => { - field.hash(state); - column.hash(state); + Expr::OuterReferenceColumn(outer_ref) => { + outer_ref.field.hash(state); + outer_ref.column.hash(state); } Expr::Unnest(Unnest { expr: _expr }) => {} }; @@ -2817,12 +2818,11 @@ impl Display for SchemaDisplay<'_> { } } // Expr is not shown since it is aliased - Expr::Alias(Alias { - name, - relation: Some(relation), - .. - }) => write!(f, "{relation}.{name}"), - Expr::Alias(Alias { name, .. }) => write!(f, "{name}"), + Expr::Alias(boxed_alias) if boxed_alias.relation.is_some() => { + let relation = boxed_alias.relation.as_ref().unwrap(); + write!(f, "{relation}.{}", boxed_alias.name) + } + Expr::Alias(boxed_alias) => write!(f, "{}", boxed_alias.name), Expr::Between(Between { expr, negated, @@ -3084,7 +3084,7 @@ impl Display for SqlDisplay<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self.0 { Expr::Literal(scalar, _) => scalar.fmt(f), - Expr::Alias(Alias { name, .. }) => write!(f, "{name}"), + Expr::Alias(boxed_alias) => write!(f, "{}", boxed_alias.name), Expr::Between(Between { expr, negated, @@ -3344,10 +3344,12 @@ pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST"; impl Display for Expr { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { - Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"), + Expr::Alias(boxed_alias) => { + write!(f, "{} AS {}", boxed_alias.expr, boxed_alias.name) + } Expr::Column(c) => write!(f, "{c}"), - Expr::OuterReferenceColumn(_, c) => { - write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})") + Expr::OuterReferenceColumn(outer_ref) => { + write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({})", outer_ref.column) } Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")), Expr::Literal(v, metadata) => { @@ -3593,8 +3595,8 @@ fn fmt_function( /// The difference from [Expr::schema_name] is that top-level columns are unqualified. pub fn physical_name(expr: &Expr) -> Result { match expr { - Expr::Column(col) => Ok(col.name.clone()), - Expr::Alias(alias) => Ok(alias.name.clone()), + Expr::Column(boxed_col) => Ok(boxed_col.name.clone()), + Expr::Alias(boxed_alias) => Ok(boxed_alias.name.clone()), _ => Ok(expr.schema_name().to_string()), } } @@ -4053,7 +4055,7 @@ mod test { // If this test fails when you change `Expr`, please try // `Box`ing the fields to make `Expr` smaller // See https://github.com/apache/datafusion/issues/16199 for details - assert_eq!(size_of::(), 112); + assert_eq!(size_of::(), 80); assert_eq!(size_of::(), 64); assert_eq!(size_of::(), 24); // 3 ptrs assert_eq!(size_of::>(), 24); diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 4254602d7c555..1c0d8814ca024 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -19,7 +19,8 @@ use crate::expr::{ AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, InSubquery, - NullTreatment, Placeholder, TryCast, Unnest, WildcardOptions, WindowFunction, + NullTreatment, OuterReference, Placeholder, TryCast, Unnest, WildcardOptions, + WindowFunction, }; use crate::function::{ AccumulatorArgs, AccumulatorFactoryFunction, PartitionEvaluatorFactory, @@ -86,7 +87,7 @@ pub fn out_ref_col_with_metadata( let column = ident.into(); let field: FieldRef = Arc::new(Field::new(column.name(), dt, true).with_metadata(metadata)); - Expr::OuterReferenceColumn(field, column) + Expr::OuterReferenceColumn(Box::new(OuterReference::new(field, column))) } /// Create an unqualified column expression from the provided name, without normalizing diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index 32a88ab8cf310..061364d02952a 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -22,7 +22,7 @@ use std::collections::HashSet; use std::fmt::Debug; use std::sync::Arc; -use crate::expr::{Alias, Sort, Unnest}; +use crate::expr::{Sort, Unnest}; use crate::logical_plan::Projection; use crate::{Expr, ExprSchemable, LogicalPlan, LogicalPlanBuilder}; @@ -179,9 +179,9 @@ pub fn create_col_from_scalar_expr( subqry_alias: String, ) -> Result { match scalar_expr { - Expr::Alias(Alias { name, .. }) => Ok(Column::new( + Expr::Alias(boxed_alias) => Ok(Column::new( Some::(subqry_alias.into()), - name, + boxed_alias.name.clone(), )), Expr::Column(col) => Ok(col.with_relation(subqry_alias.into())), _ => { @@ -205,8 +205,8 @@ pub fn unnormalize_cols(exprs: impl IntoIterator) -> Vec { pub fn strip_outer_reference(expr: Expr) -> Expr { expr.transform(|expr| { Ok({ - if let Expr::OuterReferenceColumn(_, col) = expr { - Transformed::yes(Expr::Column(col)) + if let Expr::OuterReferenceColumn(outer_ref) = expr { + Transformed::yes(Expr::Column(outer_ref.column)) } else { Transformed::no(expr) } @@ -255,9 +255,10 @@ fn coerce_exprs_for_schema( let new_type = dst_schema.field(idx).data_type(); if new_type != &expr.get_type(src_schema)? { match expr { - Expr::Alias(Alias { expr, name, .. }) => { - Ok(expr.cast_to(new_type, src_schema)?.alias(name)) - } + Expr::Alias(boxed_alias) => Ok(boxed_alias + .expr + .cast_to(new_type, src_schema)? + .alias(boxed_alias.name)), #[expect(deprecated)] Expr::Wildcard { .. } => Ok(expr), _ => { @@ -284,7 +285,7 @@ fn coerce_exprs_for_schema( #[inline] pub fn unalias(expr: Expr) -> Expr { match expr { - Expr::Alias(Alias { expr, .. }) => unalias(*expr), + Expr::Alias(boxed_alias) => unalias(*boxed_alias.expr), _ => expr, } } diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs index ec22be525464b..9baef9ee013c5 100644 --- a/datafusion/expr/src/expr_rewriter/order_by.rs +++ b/datafusion/expr/src/expr_rewriter/order_by.rs @@ -17,7 +17,6 @@ //! Rewrite for order by expressions -use crate::expr::Alias; use crate::expr_rewriter::normalize_col; use crate::{Cast, Expr, LogicalPlan, TryCast, expr::Sort}; @@ -137,8 +136,8 @@ fn rewrite_in_terms_of_projection( /// so avg(c) as average will match avgc fn expr_match(needle: &Expr, expr: &Expr) -> bool { // check inside aliases - if let Expr::Alias(Alias { expr, .. }) = &expr { - expr.as_ref() == needle + if let Expr::Alias(boxed_alias) = &expr { + boxed_alias.expr.as_ref() == needle } else { expr == needle } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index f4e4f014f533c..b9a2f4902585c 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -17,9 +17,8 @@ use super::{Between, Expr, Like, predicate_bounds}; use crate::expr::{ - AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, InList, - InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction, - WindowFunctionParams, + AggregateFunction, AggregateFunctionParams, BinaryExpr, Cast, InList, InSubquery, + Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction, WindowFunctionParams, }; use crate::type_coercion::functions::{UDFCoercionExt, fields_with_udf}; use crate::udf::ReturnFieldArgs; @@ -109,16 +108,18 @@ impl ExprSchemable for Expr { #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn get_type(&self, schema: &dyn ExprSchema) -> Result { match self { - Expr::Alias(Alias { expr, name, .. }) => match &**expr { + Expr::Alias(alias) => match &*alias.expr { Expr::Placeholder(Placeholder { field, .. }) => match &field { - None => schema.data_type(&Column::from_name(name)).cloned(), + None => schema.data_type(&Column::from_name(&alias.name)).cloned(), Some(field) => Ok(field.data_type().clone()), }, - _ => expr.get_type(schema), + _ => alias.expr.get_type(schema), }, Expr::Negative(expr) => expr.get_type(schema), Expr::Column(c) => Ok(schema.data_type(c)?.clone()), - Expr::OuterReferenceColumn(field, _) => Ok(field.data_type().clone()), + Expr::OuterReferenceColumn(outer_ref) => { + Ok(outer_ref.field.data_type().clone()) + } Expr::ScalarVariable(field, _) => Ok(field.data_type().clone()), Expr::Literal(l, _) => Ok(l.data_type()), Expr::Case(case) => { @@ -212,9 +213,8 @@ impl ExprSchemable for Expr { /// column that does not exist in the schema. fn nullable(&self, input_schema: &dyn ExprSchema) -> Result { match self { - Expr::Alias(Alias { expr, .. }) | Expr::Not(expr) | Expr::Negative(expr) => { - expr.nullable(input_schema) - } + Expr::Alias(alias) => alias.expr.nullable(input_schema), + Expr::Not(expr) | Expr::Negative(expr) => expr.nullable(input_schema), Expr::InList(InList { expr, list, .. }) => { // Avoid inspecting too many expressions. @@ -246,7 +246,7 @@ impl ExprSchemable for Expr { || high.nullable(input_schema)?), Expr::Column(c) => input_schema.nullable(c), - Expr::OuterReferenceColumn(field, _) => Ok(field.is_nullable()), + Expr::OuterReferenceColumn(outer_ref) => Ok(outer_ref.field.is_nullable()), Expr::Literal(value, _) => Ok(value.is_null()), Expr::Case(case) => { let nullable_then = case @@ -433,26 +433,22 @@ impl ExprSchemable for Expr { let (relation, schema_name) = self.qualified_name(); #[expect(deprecated)] let field = match self { - Expr::Alias(Alias { - expr, - name: _, - metadata, - .. - }) => { - let mut combined_metadata = expr.metadata(schema)?; - if let Some(metadata) = metadata { + Expr::Alias(alias) => { + let mut combined_metadata = alias.expr.metadata(schema)?; + if let Some(metadata) = &alias.metadata { combined_metadata.extend(metadata.clone()); } - Ok(expr + Ok(alias + .expr .to_field(schema) .map(|(_, f)| f)? .with_field_metadata(&combined_metadata)) } Expr::Negative(expr) => expr.to_field(schema).map(|(_, f)| f), Expr::Column(c) => schema.field_from_column(c).map(Arc::clone), - Expr::OuterReferenceColumn(field, _) => { - Ok(Arc::clone(field).renamed(&schema_name)) + Expr::OuterReferenceColumn(outer_ref) => { + Ok(Arc::clone(&outer_ref.field).renamed(&schema_name)) } Expr::ScalarVariable(field, _) => Ok(Arc::clone(field).renamed(&schema_name)), Expr::Literal(l, metadata) => Ok(Arc::new( diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index cb136229bf88d..73d4b2c50adcc 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -103,7 +103,7 @@ pub use datafusion_expr_common::signature::{ pub use datafusion_expr_common::type_coercion::binary; pub use expr::{ Between, BinaryExpr, Case, Cast, Expr, GetFieldAccess, GroupingSet, Like, - Sort as SortExpr, TryCast, WindowFunctionDefinition, + OuterReference, Sort as SortExpr, TryCast, WindowFunctionDefinition, }; pub use expr_fn::*; pub use expr_schema::ExprSchemable; diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 2e23fef1da768..1e089d845a86f 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -25,7 +25,7 @@ use std::iter::once; use std::sync::Arc; use crate::dml::CopyTo; -use crate::expr::{Alias, PlannedReplaceSelectItem, Sort as SortExpr}; +use crate::expr::{PlannedReplaceSelectItem, Sort as SortExpr}; use crate::expr_rewriter::{ coerce_plan_expr_for_schema, normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_cols, normalize_sorts, @@ -755,8 +755,8 @@ impl LogicalPlanBuilder { // As described in https://github.com/apache/datafusion/issues/5293 let all_aliases = missing_exprs.iter().all(|e| { projection_exprs.iter().any(|proj_expr| { - if let Expr::Alias(Alias { expr, .. }) = proj_expr { - e == expr.as_ref() + if let Expr::Alias(alias) = proj_expr { + e == alias.expr.as_ref() } else { false } @@ -829,7 +829,11 @@ impl LogicalPlanBuilder { } // remove pushed down sort columns - let new_expr = schema.columns().into_iter().map(Expr::Column).collect(); + let new_expr = schema + .columns() + .into_iter() + .map(|c| Expr::Column(c)) + .collect(); let is_distinct = false; let plan = Self::add_missing_columns( @@ -1998,14 +2002,14 @@ fn replace_columns( replace: &PlannedReplaceSelectItem, ) -> Result> { for expr in exprs.iter_mut() { - if let Expr::Column(Column { name, .. }) = expr + if let Expr::Column(col) = expr && let Some((_, new_expr)) = replace .items() .iter() .zip(replace.expressions().iter()) - .find(|(item, _)| item.column_name.value == *name) + .find(|(item, _)| item.column_name.value == col.name) { - *expr = new_expr.clone().alias(name.clone()) + *expr = new_expr.clone().alias(col.name.clone()) } } Ok(exprs) @@ -2121,7 +2125,7 @@ pub fn wrap_projection_for_join_if_necessary( let mut projection = input_schema .columns() .into_iter() - .map(Expr::Column) + .map(|c| Expr::Column(c)) .collect::>(); let join_key_items = alias_join_keys .iter() @@ -2507,10 +2511,11 @@ mod tests { name, spans: _, } = *field; - let Some(TableReference::Bare { table }) = relation else { - return plan_err!( - "wrong relation: {relation:?}, expected table name" - ); + let Some(table_ref) = relation else { + return plan_err!("wrong relation: None, expected table name"); + }; + let TableReference::Bare { table } = *table_ref else { + return plan_err!("wrong relation: expected table name"); }; assert_eq!(*"employee_csv", *table); assert_eq!("id", &name); diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 032a97bdb3efa..eef14b17f6511 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -2260,7 +2260,11 @@ impl Projection { /// Create a new Projection using the specified output schema pub fn new_from_schema(input: Arc, schema: DFSchemaRef) -> Self { - let expr: Vec = schema.columns().into_iter().map(Expr::Column).collect(); + let expr: Vec = schema + .columns() + .into_iter() + .map(|c| Expr::Column(c)) + .collect(); Self { expr, input, @@ -2340,9 +2344,11 @@ impl SubqueryAlias { Expr::Column(Column::new(qualifier.cloned(), field.name())); match alias { None => column, - Some(alias) => { - Expr::Alias(Alias::new(column, qualifier.cloned(), alias)) - } + Some(alias) => Expr::Alias(Box::new(Alias::new( + column, + qualifier.cloned(), + alias, + ))), } }) .collect(); @@ -4217,7 +4223,7 @@ impl Unnest { Ok(transformed_columns .iter() .map(|(col, field)| { - (col.relation.to_owned(), field.to_owned()) + (col.relation.as_deref().cloned(), field.to_owned()) }) .collect()) } diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index a1285510da569..3c2d5b3abfe2d 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -444,7 +444,7 @@ impl LogicalPlan { .exec_columns .iter() .cloned() - .map(Expr::Column) + .map(|c| Expr::Column(c)) .collect::>(); exprs.apply_elements(f) } diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs index 226c512a974d8..cffe51d968e66 100644 --- a/datafusion/expr/src/tree_node.rs +++ b/datafusion/expr/src/tree_node.rs @@ -44,8 +44,8 @@ impl TreeNode for Expr { f: F, ) -> Result { match self { - Expr::Alias(Alias { expr, .. }) - | Expr::Unnest(Unnest { expr }) + Expr::Alias(boxed_alias) => boxed_alias.expr.apply_elements(f), + Expr::Unnest(Unnest { expr }) | Expr::Not(expr) | Expr::IsNotNull(expr) | Expr::IsTrue(expr) @@ -72,7 +72,7 @@ impl TreeNode for Expr { #[expect(deprecated)] Expr::Column(_) // Treat OuterReferenceColumn as a leaf expression - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::ScalarVariable(_, _) | Expr::Literal(_, _) | Expr::Exists { .. } @@ -124,7 +124,7 @@ impl TreeNode for Expr { Expr::Column(_) | Expr::Wildcard { .. } | Expr::Placeholder(Placeholder { .. }) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::ScalarVariable(_, _) @@ -145,14 +145,17 @@ impl TreeNode for Expr { Expr::Unnest(Unnest { expr, .. }) => expr .map_elements(f)? .update_data(|expr| Expr::Unnest(Unnest { expr })), - Expr::Alias(Alias { - expr, - relation, - name, - metadata, - }) => f(*expr)?.update_data(|e| { - e.alias_qualified_with_metadata(relation, name, metadata) - }), + Expr::Alias(boxed_alias) => { + let Alias { + expr, + relation, + name, + metadata, + } = *boxed_alias; + f(*expr)?.update_data(|e| { + e.alias_qualified_with_metadata(relation, name, metadata) + }) + } Expr::InSubquery(InSubquery { expr, subquery, diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index b19299981cef3..30fa07f08013c 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -21,7 +21,7 @@ use std::cmp::Ordering; use std::collections::{BTreeSet, HashSet}; use std::sync::Arc; -use crate::expr::{Alias, Sort, WildcardOptions, WindowFunctionParams}; +use crate::expr::{Sort, WildcardOptions, WindowFunctionParams}; use crate::expr_rewriter::strip_outer_reference; use crate::{ BinaryExpr, Expr, ExprSchemable, Filter, GroupingSet, LogicalPlan, Operator, and, @@ -769,7 +769,7 @@ pub fn find_column_exprs(exprs: &[Expr]) -> Vec { exprs .iter() .flat_map(find_columns_referenced_by_expr) - .map(Expr::Column) + .map(|c| Expr::Column(c)) .collect() } @@ -997,7 +997,7 @@ fn split_conjunction_impl<'a>(expr: &'a Expr, mut exprs: Vec<&'a Expr>) -> Vec<& let exprs = split_conjunction_impl(left, exprs); split_conjunction_impl(right, exprs) } - Expr::Alias(Alias { expr, .. }) => split_conjunction_impl(expr, exprs), + Expr::Alias(alias) => split_conjunction_impl(&alias.expr, exprs), other => { exprs.push(other); exprs @@ -1021,7 +1021,7 @@ pub fn iter_conjunction(expr: &Expr) -> impl Iterator { stack.push(right); stack.push(left); } - Expr::Alias(Alias { expr, .. }) => stack.push(expr), + Expr::Alias(alias) => stack.push(&alias.expr), other => return Some(other), } } @@ -1045,7 +1045,7 @@ pub fn iter_conjunction_owned(expr: Expr) -> impl Iterator { stack.push(*right); stack.push(*left); } - Expr::Alias(Alias { expr, .. }) => stack.push(*expr), + Expr::Alias(alias) => stack.push(*alias.expr), other => return Some(other), } } @@ -1108,9 +1108,7 @@ fn split_binary_owned_impl( let exprs = split_binary_owned_impl(*left, operator, exprs); split_binary_owned_impl(*right, operator, exprs) } - Expr::Alias(Alias { expr, .. }) => { - split_binary_owned_impl(*expr, operator, exprs) - } + Expr::Alias(alias) => split_binary_owned_impl(*alias.expr, operator, exprs), other => { exprs.push(other); exprs @@ -1135,7 +1133,7 @@ fn split_binary_impl<'a>( let exprs = split_binary_impl(left, operator, exprs); split_binary_impl(right, operator, exprs) } - Expr::Alias(Alias { expr, .. }) => split_binary_impl(expr, operator, exprs), + Expr::Alias(alias) => split_binary_impl(&alias.expr, operator, exprs), other => { exprs.push(other); exprs diff --git a/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs b/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs index 747c54e2cd26d..151848d615911 100644 --- a/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs +++ b/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs @@ -103,11 +103,11 @@ fn replace_grouping_exprs( &group_expr_to_bitmap_index, is_grouping_set, )?; - projection_exprs.push(Expr::Alias(Alias::new( + projection_exprs.push(Expr::Alias(Box::new(Alias::new( grouping_expr, - column.relation, + column.relation.map(|r| *r), column.name, - ))); + )))); } _ => { projection_exprs.push(Expr::Column(column)); diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index a98678f7cf9c4..d3b95b3cd6caa 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -30,13 +30,13 @@ use arrow::temporal_conversions::SECONDS_IN_DAY; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion_common::{ - Column, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, TableReference, + DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, TableReference, exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, }; use datafusion_expr::expr::{ - self, AggregateFunctionParams, Alias, Between, BinaryExpr, Case, Exists, InList, - InSubquery, Like, ScalarFunction, SetComparison, Sort, WindowFunction, + self, AggregateFunctionParams, Between, BinaryExpr, Case, Exists, InList, InSubquery, + Like, ScalarFunction, SetComparison, Sort, WindowFunction, }; use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema; use datafusion_expr::expr_schema::cast_subquery; @@ -759,7 +759,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> { | Expr::Wildcard { .. } | Expr::GroupingSet(_) | Expr::Placeholder(_) - | Expr::OuterReferenceColumn(_, _) => Ok(Transformed::no(expr)), + | Expr::OuterReferenceColumn(_) => Ok(Transformed::no(expr)), } } } @@ -1227,15 +1227,13 @@ fn project_with_column_index( .into_iter() .enumerate() .map(|(i, e)| match e { - Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => { + Expr::Alias(ref alias) if alias.name.as_str() != schema.field(i).name() => { Ok(e.unalias().alias(schema.field(i).name())) } - Expr::Column(Column { - relation: _, - ref name, - spans: _, - }) if name != schema.field(i).name() => Ok(e.alias(schema.field(i).name())), - Expr::Alias { .. } | Expr::Column { .. } => Ok(e), + Expr::Column(ref col) if col.name.as_str() != schema.field(i).name() => { + Ok(e.alias(schema.field(i).name())) + } + Expr::Alias(_) | Expr::Column(_) => Ok(e), #[expect(deprecated)] Expr::Wildcard { .. } => { plan_err!("Wildcard should be expanded before type coercion") diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 2096c42770315..0053f0121cc8b 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -354,9 +354,8 @@ impl CommonSubexprEliminate { } else { expr_rewritten }; - if let Expr::Alias(Alias { expr, name, .. }) = - expr_rewritten - { + if let Expr::Alias(alias) = expr_rewritten { + let Alias { expr, name, .. } = *alias; agg_exprs.push(expr.alias(&name)); proj_exprs .push(Expr::Column(Column::from_name(name))); diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs index 52d777f874fa8..1457f94d17da4 100644 --- a/datafusion/optimizer/src/decorrelate.rs +++ b/datafusion/optimizer/src/decorrelate.rs @@ -27,7 +27,6 @@ use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter, }; use datafusion_common::{Column, DFSchemaRef, HashMap, Result, ScalarValue, plan_err}; -use datafusion_expr::expr::Alias; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::utils::{ collect_subquery_cols, conjunction, find_join_exprs, split_conjunction, @@ -526,9 +525,9 @@ fn proj_exprs_evaluation_result_on_empty_batch( let result_expr = expr .clone() .transform_up(|expr| { - if let Expr::Column(Column { name, .. }) = &expr { + if let Expr::Column(col) = &expr { if let Some(result_expr) = - input_expr_result_map_for_count_bug.get(name) + input_expr_result_map_for_count_bug.get(&col.name) { Ok(Transformed::yes(result_expr.clone())) } else { @@ -545,12 +544,8 @@ fn proj_exprs_evaluation_result_on_empty_batch( let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; let expr_name = match expr { - Expr::Alias(Alias { name, .. }) => name.to_string(), - Expr::Column(Column { - relation: _, - name, - spans: _, - }) => name.to_string(), + Expr::Alias(alias) => alias.name.to_string(), + Expr::Column(col) => col.name.to_string(), _ => expr.schema_name().to_string(), }; expr_result_map_for_count_bug.insert(expr_name, result_expr); @@ -568,8 +563,10 @@ fn filter_exprs_evaluation_result_on_empty_batch( let result_expr = filter_expr .clone() .transform_up(|expr| { - if let Expr::Column(Column { name, .. }) = &expr { - if let Some(result_expr) = input_expr_result_map_for_count_bug.get(name) { + if let Expr::Column(col) = &expr { + if let Some(result_expr) = + input_expr_result_map_for_count_bug.get(&col.name) + { Ok(Transformed::yes(result_expr.clone())) } else { Ok(Transformed::no(expr)) diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index 9cccb20bcc45e..ed2f048118e49 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -552,16 +552,21 @@ fn merge_consecutive_projections(proj: Projection) -> Result rewrite_expr(*expr, &prev_projection).map(|result| { - result.update_data(|expr| { - Expr::Alias(Alias::new(expr, relation, name).with_metadata(metadata)) + Expr::Alias(alias) => { + let Alias { + expr, + relation, + name, + metadata, + } = *alias; + rewrite_expr(*expr, &prev_projection).map(|result| { + result.update_data(|expr| { + Expr::Alias(Box::new( + Alias::new(expr, relation, name).with_metadata(metadata), + )) + }) }) - }), + } e => rewrite_expr(e, &prev_projection), } })?; @@ -635,14 +640,10 @@ fn rewrite_expr(expr: Expr, input: &Projection) -> Result> { match expr { // remove any intermediate aliases if they do not carry metadata Expr::Alias(alias) => { - match alias - .metadata - .as_ref() - .map(|h| h.is_empty()) - .unwrap_or(true) - { - true => Ok(Transformed::yes(*alias.expr)), - false => Ok(Transformed::no(Expr::Alias(alias))), + let a = *alias; + match a.metadata.as_ref().map(|h| h.is_empty()).unwrap_or(true) { + true => Ok(Transformed::yes(*a.expr)), + false => Ok(Transformed::no(Expr::Alias(Box::new(a)))), } } Expr::Column(col) => { @@ -676,8 +677,8 @@ fn outer_columns<'a>(expr: &'a Expr, columns: &mut HashSet<&'a Column>) { // inspect_expr_pre doesn't handle subquery references, so find them explicitly expr.apply(|expr| { match expr { - Expr::OuterReferenceColumn(_, col) => { - columns.insert(col); + Expr::OuterReferenceColumn(outer_ref) => { + columns.insert(&outer_ref.column); } Expr::ScalarSubquery(subquery) => { outer_columns_helper_multi(&subquery.outer_ref_columns, columns); @@ -857,7 +858,8 @@ pub fn is_projection_unnecessary( |((field_relation, field_name), expr)| { // Check if the expression is a column and if it matches the field name if let Expr::Column(col) = expr { - col.relation.as_ref() == field_relation && col.name.eq(field_name.name()) + col.relation.as_deref() == field_relation + && col.name.eq(field_name.name()) } else { false } diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 15bb5db07d2c2..80fc1a1f3a50c 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -266,7 +266,7 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result { | Expr::InSubquery(_) | Expr::SetComparison(_) | Expr::ScalarSubquery(_) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::Unnest(_) => { is_evaluate = false; Ok(TreeNodeRecursion::Stop) diff --git a/datafusion/optimizer/src/rewrite_set_comparison.rs b/datafusion/optimizer/src/rewrite_set_comparison.rs index c8c35b518743a..97482ba5acfc1 100644 --- a/datafusion/optimizer/src/rewrite_set_comparison.rs +++ b/datafusion/optimizer/src/rewrite_set_comparison.rs @@ -22,7 +22,7 @@ use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{Column, DFSchema, ExprSchema, Result, ScalarValue, plan_err}; -use datafusion_expr::expr::{self, Exists, SetComparison, SetQuantifier}; +use datafusion_expr::expr::{self, Exists, OuterReference, SetComparison, SetQuantifier}; use datafusion_expr::logical_plan::Subquery; use datafusion_expr::logical_plan::builder::LogicalPlanBuilder; use datafusion_expr::{Expr, LogicalPlan, lit}; @@ -159,12 +159,11 @@ fn to_outer_reference(expr: Expr, outer_schema: &DFSchema) -> Result { expr.transform_up(|expr| match expr { Expr::Column(col) => { let field = outer_schema.field_from_column(&col)?; - Ok(Transformed::yes(Expr::OuterReferenceColumn( - Arc::clone(field), - col, - ))) + Ok(Transformed::yes(Expr::OuterReferenceColumn(Box::new( + OuterReference::new(Arc::clone(field), col), + )))) } - Expr::OuterReferenceColumn(_, _) => Ok(Transformed::no(expr)), + Expr::OuterReferenceColumn(_) => Ok(Transformed::no(expr)), _ => Ok(Transformed::no(expr)), }) .map(|t| t.data) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index c6644e008645a..4f8e88c305ae2 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -634,7 +634,7 @@ impl ConstEvaluator { Expr::AggregateFunction { .. } | Expr::ScalarVariable(_, _) | Expr::Column(_) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::Exists { .. } | Expr::InSubquery(_) | Expr::SetComparison(_) diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 84a6aa4309872..8e72bc5c5a2c8 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -30,7 +30,7 @@ use datafusion_common::{ DFSchema, Result, ScalarValue, ToDFSchema, exec_err, not_impl_err, plan_err, }; use datafusion_expr::execution_props::ExecutionProps; -use datafusion_expr::expr::{Alias, Cast, InList, Placeholder, ScalarFunction}; +use datafusion_expr::expr::{Cast, InList, Placeholder, ScalarFunction}; use datafusion_expr::var_provider::VarType; use datafusion_expr::var_provider::is_system_variables; use datafusion_expr::{ @@ -114,18 +114,22 @@ pub fn create_physical_expr( let input_schema = input_dfschema.as_arrow(); match e { - Expr::Alias(Alias { expr, metadata, .. }) => { - if let Expr::Literal(v, prior_metadata) = expr.as_ref() { + Expr::Alias(alias) => { + if let Expr::Literal(v, prior_metadata) = alias.expr.as_ref() { let new_metadata = FieldMetadata::merge_options( prior_metadata.as_ref(), - metadata.as_ref(), + alias.metadata.as_ref(), ); Ok(Arc::new(Literal::new_with_metadata( v.clone(), new_metadata, ))) } else { - Ok(create_physical_expr(expr, input_dfschema, execution_props)?) + Ok(create_physical_expr( + &alias.expr, + input_dfschema, + execution_props, + )?) } } Expr::Column(c) => { diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index a653f517b7275..a1d755c42c4a9 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -364,7 +364,7 @@ pub fn parse_expr( builder.build().map_err(Error::DataFusionError) } - ExprType::Alias(alias) => Ok(Expr::Alias(Alias::new( + ExprType::Alias(alias) => Ok(Expr::Alias(Box::new(Alias::new( parse_required_expr(alias.expr.as_deref(), registry, "expr", codec)?, alias .relation @@ -372,7 +372,7 @@ pub fn parse_expr( .map(|r| TableReference::try_from(r.clone())) .transpose()?, alias.alias.clone(), - ))), + )))), ExprType::IsNullExpr(is_null) => Ok(Expr::IsNull(Box::new(parse_required_expr( is_null.expr.as_deref(), registry, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index fe63fce6ee260..4dbfd9bee20b0 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -25,8 +25,8 @@ use datafusion_common::{NullEquality, TableReference, UnnestOptions}; use datafusion_expr::WriteOp; use datafusion_expr::dml::InsertOp; use datafusion_expr::expr::{ - self, AggregateFunctionParams, Alias, Between, BinaryExpr, Cast, GroupingSet, InList, - Like, NullTreatment, Placeholder, ScalarFunction, Unnest, + self, AggregateFunctionParams, Between, BinaryExpr, Cast, GroupingSet, InList, Like, + NullTreatment, Placeholder, ScalarFunction, Unnest, }; use datafusion_expr::{ Expr, JoinConstraint, JoinType, SortExpr, TryCast, WindowFrame, WindowFrameBound, @@ -197,20 +197,17 @@ pub fn serialize_expr( Expr::Column(c) => protobuf::LogicalExprNode { expr_type: Some(ExprType::Column(c.into())), }, - Expr::Alias(Alias { - expr, - relation, - name, - metadata, - }) => { + Expr::Alias(a) => { let alias = Box::new(protobuf::AliasNode { - expr: Some(Box::new(serialize_expr(expr.as_ref(), codec)?)), - relation: relation + expr: Some(Box::new(serialize_expr(a.expr.as_ref(), codec)?)), + relation: a + .relation .to_owned() .map(|r| vec![r.into()]) .unwrap_or(vec![]), - alias: name.to_owned(), - metadata: metadata + alias: a.name.to_owned(), + metadata: a + .metadata .as_ref() .map(|m| m.to_hashmap()) .unwrap_or(HashMap::new()), diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index cca09df0db027..dadf05a4b1d80 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -21,6 +21,7 @@ use datafusion_common::{ Column, DFSchema, Result, Span, TableReference, assert_or_internal_err, exec_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, }; +use datafusion_expr::expr::OuterReference; use datafusion_expr::planner::PlannerResult; use datafusion_expr::{Case, Expr}; use sqlparser::ast::{CaseWhen, Expr as SQLExpr, Ident}; @@ -81,10 +82,10 @@ impl SqlToRel<'_, S> { outer.qualified_field_with_unqualified_name(normalize_ident.as_str()) { // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column - return Ok(Expr::OuterReferenceColumn( - Arc::clone(field), - Column::from((qualifier, field)), - )); + return Ok(Expr::OuterReferenceColumn(Box::new(OuterReference { + field: Arc::clone(field), + column: Column::from((qualifier, field)), + }))); } } @@ -190,10 +191,12 @@ impl SqlToRel<'_, S> { // Found matching field with no spare identifier(s) Some((field, qualifier, _nested_names)) => { // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column - Ok(Expr::OuterReferenceColumn( - Arc::clone(field), - Column::from((qualifier, field)), - )) + Ok(Expr::OuterReferenceColumn(Box::new( + OuterReference { + field: Arc::clone(field), + column: Column::from((qualifier, field)), + }, + ))) } // Found no matching field, will return a default None => continue, diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index dd63cfce5e4a2..992ebc5509df4 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -567,7 +567,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(()) } else { Err(field_not_found( - col.relation.clone(), + col.relation.as_deref().cloned(), col.name.as_str(), schema, )) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 28e7ac2f205b8..58c89a452ef6d 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -31,7 +31,7 @@ use datafusion_common::error::DataFusionErrorBuilder; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; use datafusion_common::{Column, DFSchema, Result, not_impl_err, plan_err}; use datafusion_common::{RecursionUnnestOption, UnnestOptions}; -use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions}; +use datafusion_expr::expr::{PlannedReplaceSelectItem, WildcardOptions}; use datafusion_expr::expr_rewriter::{ normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_sorts, }; @@ -198,8 +198,8 @@ impl SqlToRel<'_, S> { .iter() .filter(|select_expr| match select_expr { Expr::AggregateFunction(_) => false, - Expr::Alias(Alias { expr, name: _, .. }) => { - !matches!(**expr, Expr::AggregateFunction(_)) + Expr::Alias(alias) => { + !matches!(*alias.expr.as_ref(), Expr::AggregateFunction(_)) } _ => true, }) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 5f6612830ac1f..6278b8aeab13a 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -45,7 +45,7 @@ use datafusion_common::{ }; use datafusion_expr::{ Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast, - expr::{Alias, Exists, InList, ScalarFunction, SetQuantifier, Sort, WindowFunction}, + expr::{Exists, InList, ScalarFunction, SetQuantifier, Sort, WindowFunction}, }; use sqlparser::ast::helpers::attached_token::AttachedToken; use sqlparser::tokenizer::Span; @@ -192,7 +192,7 @@ impl Unparser<'_> { Ok(self.cast_to_sql(expr, data_type)?) } Expr::Literal(value, _) => Ok(self.scalar_to_sql(value)?), - Expr::Alias(Alias { expr, name: _, .. }) => self.expr_to_sql_inner(expr), + Expr::Alias(alias) => self.expr_to_sql_inner(&alias.expr), Expr::WindowFunction(window_fun) => { let WindowFunction { fun, @@ -549,7 +549,7 @@ impl Unparser<'_> { Expr::Placeholder(p) => { Ok(ast::Expr::value(ast::Value::Placeholder(p.id.to_string()))) } - Expr::OuterReferenceColumn(_, col) => self.col_to_sql(col), + Expr::OuterReferenceColumn(outer_ref) => self.col_to_sql(&outer_ref.column), Expr::Unnest(unnest) => self.unnest_to_sql(unnest), } } @@ -1908,7 +1908,7 @@ mod tests { ((col("a") + col("b")).gt(lit(4)), r#"((a + b) > 4)"#), ( Expr::Column(Column { - relation: Some(TableReference::partial("a", "b")), + relation: Some(Box::new(TableReference::partial("a", "b"))), name: "c".to_string(), spans: Spans::new(), }) @@ -2285,7 +2285,9 @@ mod tests { ( Expr::Unnest(Unnest { expr: Box::new(Expr::Column(Column { - relation: Some(TableReference::partial("schema", "table")), + relation: Some(Box::new(TableReference::partial( + "schema", "table", + ))), name: "array_col".to_string(), spans: Spans::new(), })), diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 9f770f9f45e1d..02026e08e0ecd 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -47,7 +47,7 @@ use datafusion_expr::expr::OUTER_REFERENCE_COLUMN_PREFIX; use datafusion_expr::{ BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest, - UserDefinedLogicalNode, expr::Alias, + UserDefinedLogicalNode, }; use sqlparser::ast::{self, Ident, OrderByKind, SetExpr, TableAliasColumnDef}; use std::{sync::Arc, vec}; @@ -1003,9 +1003,9 @@ impl Unparser<'_> { /// /// `outer_ref` is the display result of [Expr::OuterReferenceColumn] fn check_unnest_placeholder_with_outer_ref(expr: &Expr) -> Option { - if let Expr::Alias(Alias { expr, .. }) = expr - && let Expr::Column(Column { name, .. }) = expr.as_ref() - && let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) + if let Expr::Alias(alias) = expr + && let Expr::Column(col) = alias.expr.as_ref() + && let Some(prefix) = col.name.strip_prefix(UNNEST_PLACEHOLDER) { if prefix.starts_with(&format!("({OUTER_REFERENCE_COLUMN_PREFIX}(")) { return Some(UnnestInputType::OuterReference); @@ -1207,16 +1207,16 @@ impl Unparser<'_> { fn select_item_to_sql(&self, expr: &Expr) -> Result { match expr { - Expr::Alias(Alias { expr, name, .. }) => { - let inner = self.expr_to_sql(expr)?; + Expr::Alias(alias) => { + let inner = self.expr_to_sql(&alias.expr)?; // Determine the alias name to use let col_name = if let Some(rewritten_name) = - self.dialect.col_alias_overrides(name)? + self.dialect.col_alias_overrides(&alias.name)? { rewritten_name.to_string() } else { - name.to_string() + alias.name.to_string() }; Ok(ast::SelectItem::ExprWithAlias { @@ -1280,22 +1280,13 @@ impl Unparser<'_> { let mut object_names = Vec::with_capacity(join_conditions.len()); for (left, right) in join_conditions { match (left, right) { - ( - Expr::Column(Column { - relation: _, - name: left_name, - spans: _, - }), - Expr::Column(Column { - relation: _, - name: right_name, - spans: _, - }), - ) if left_name == right_name => { + (Expr::Column(left_col), Expr::Column(right_col)) + if left_col.name == right_col.name => + { // For example, if the join condition `t1.id = t2.id` // this is represented as two columns like `[t1.id, t2.id]` // This code forms `id` (without relation name) - let ident = self.new_ident_quoted_if_needs(left_name.to_string()); + let ident = self.new_ident_quoted_if_needs(left_col.name.to_string()); object_names.push(ast::ObjectName::from(vec![ident])); } // USING is only valid with matching column names; arbitrary expressions diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs index ec1b17cd28a91..5817a8ced0d71 100644 --- a/datafusion/sql/src/unparser/rewrite.rs +++ b/datafusion/sql/src/unparser/rewrite.rs @@ -419,16 +419,16 @@ pub(super) fn inject_column_aliases( .zip(aliases) .map(|(expr, col_alias)| { let relation = match &expr { - Expr::Column(col) => col.relation.clone(), + Expr::Column(col) => col.relation.as_deref().cloned(), _ => None, }; - Expr::Alias(Alias { + Expr::Alias(Box::new(Alias { expr: Box::new(expr.clone()), relation, name: col_alias.value, metadata: None, - }) + })) }) .collect::>(); diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 9205336a52e4e..55f3db4640157 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -30,9 +30,7 @@ use datafusion_common::{ assert_or_internal_err, exec_datafusion_err, exec_err, internal_err, plan_err, }; use datafusion_expr::builder::get_struct_unnested_columns; -use datafusion_expr::expr::{ - Alias, GroupingSet, Unnest, WindowFunction, WindowFunctionParams, -}; +use datafusion_expr::expr::{GroupingSet, Unnest, WindowFunction, WindowFunctionParams}; use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs}; use datafusion_expr::{ ColumnUnnestList, Expr, ExprSchemable, LogicalPlan, col, expr_vec_fmt, @@ -196,7 +194,7 @@ pub(crate) fn extract_aliases(exprs: &[Expr]) -> HashMap { exprs .iter() .filter_map(|expr| match expr { - Expr::Alias(Alias { expr, name, .. }) => Some((name.clone(), *expr.clone())), + Expr::Alias(alias) => Some((alias.name.clone(), *alias.expr.clone())), _ => None, }) .collect::>() @@ -219,7 +217,7 @@ pub(crate) fn resolve_positions_to_exprs( let index = (position - 1) as usize; let select_expr = &select_exprs[index]; Ok(match select_expr { - Expr::Alias(Alias { expr, .. }) => *expr.clone(), + Expr::Alias(alias) => *alias.expr.clone(), _ => select_expr.clone(), }) } @@ -264,7 +262,7 @@ pub fn window_expr_common_partition_keys(window_exprs: &[Expr]) -> Result<&[Expr } = window_fun.as_ref(); Ok(partition_by) } - Expr::Alias(Alias { expr, .. }) => match expr.as_ref() { + Expr::Alias(alias) => match alias.expr.as_ref() { Expr::WindowFunction(window_fun) => { let WindowFunction { params: WindowFunctionParams { partition_by, .. }, @@ -418,8 +416,8 @@ impl RecursiveUnnestRewriter<'_> { return true; } // Allow struct unnest when root is an alias wrapping the unnest - if let Expr::Alias(Alias { expr: inner, .. }) = self.root_expr { - return inner.as_ref() == expr; + if let Expr::Alias(alias) = self.root_expr { + return alias.expr.as_ref() == expr; } false } @@ -459,7 +457,7 @@ impl RecursiveUnnestRewriter<'_> { .insert(Column::from_name(placeholder_name.clone()), None); Ok(get_struct_unnested_columns(&placeholder_name, inner_fields) .into_iter() - .map(Expr::Column) + .map(|c| Expr::Column(c)) .collect()) } DataType::List(_) diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index dd7f4710d9dbb..be8ebf3459dce 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -3965,12 +3965,12 @@ logical_plan 09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[] 10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int))) 11)----------------EmptyRelation: rows=1 -physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true }, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" }) +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(OuterReference { field: Field { name: "t1_int", data_type: UInt32, nullable: true }, column: Column { relation: Some(Bare { table: "t1" }), name: "t1_int" } }) # Test CROSS JOIN LATERAL syntax (execution) # TODO: https://github.com/apache/datafusion/issues/10048 -query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \}\) +query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(OuterReference \{ field: Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, column: Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \} \}\) select t1_id, t1_name, i from join_t1 t1 cross join lateral (select * from unnest(generate_series(1, t1_int))) as series(i); @@ -3990,12 +3990,12 @@ logical_plan 09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[] 10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int))) 11)----------------EmptyRelation: rows=1 -physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true }, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" }) +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(OuterReference { field: Field { name: "t1_int", data_type: UInt32, nullable: true }, column: Column { relation: Some(Bare { table: "t2" }), name: "t1_int" } }) # Test INNER JOIN LATERAL syntax (execution) # TODO: https://github.com/apache/datafusion/issues/10048 -query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, Column \{ relation: Some\(Bare \{ table: "t2" \}\), name: "t1_int" \}\) +query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(OuterReference \{ field: Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, column: Column \{ relation: Some\(Bare \{ table: "t2" \}\), name: "t1_int" \} \}\) select t1_id, t1_name, i from join_t1 t2 inner join lateral (select * from unnest(generate_series(1, t1_int))) as series(i) on(t1_id > i); # Test RIGHT JOIN LATERAL syntax (unsupported) @@ -4585,7 +4585,7 @@ logical_plan 05)------Subquery: 06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id 07)----------TableScan: j2 projection=[j2_string, j2_id] -physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" }) +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(OuterReference { field: Field { name: "j1_id", data_type: Int32, nullable: true }, column: Column { relation: Some(Bare { table: "j1" }), name: "j1_id" } }) query TT explain SELECT * FROM j1 JOIN (j2 JOIN j3 ON(j2_id = j3_id - 2)) ON(j1_id = j2_id), LATERAL (SELECT * FROM j3 WHERE j3_string = j2_string) as j4 @@ -4601,7 +4601,7 @@ logical_plan 08)----Subquery: 09)------Filter: j3.j3_string = outer_ref(j2.j2_string) 10)--------TableScan: j3 projection=[j3_string, j3_id] -physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j2_string", data_type: Utf8View, nullable: true }, Column { relation: Some(Bare { table: "j2" }), name: "j2_string" }) +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(OuterReference { field: Field { name: "j2_string", data_type: Utf8View, nullable: true }, column: Column { relation: Some(Bare { table: "j2" }), name: "j2_string" } }) query TT explain SELECT * FROM j1, LATERAL (SELECT * FROM j1, LATERAL (SELECT * FROM j2 WHERE j1_id = j2_id) as j2) as j2; @@ -4617,7 +4617,7 @@ logical_plan 08)----------Subquery: 09)------------Filter: outer_ref(j1.j1_id) = j2.j2_id 10)--------------TableScan: j2 projection=[j2_string, j2_id] -physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" }) +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(OuterReference { field: Field { name: "j1_id", data_type: Int32, nullable: true }, column: Column { relation: Some(Bare { table: "j1" }), name: "j1_id" } }) query TT explain SELECT j1_string, j2_string FROM j1 LEFT JOIN LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2 ON(true); @@ -4630,7 +4630,7 @@ logical_plan 05)------Subquery: 06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id 07)----------TableScan: j2 projection=[j2_string, j2_id] -physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" }) +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(OuterReference { field: Field { name: "j1_id", data_type: Int32, nullable: true }, column: Column { relation: Some(Bare { table: "j1" }), name: "j1_id" } }) query TT explain SELECT * FROM j1, (j2 LEFT JOIN LATERAL (SELECT * FROM j3 WHERE j1_id + j2_id = j3_id) AS j3 ON(true)); @@ -4644,7 +4644,7 @@ logical_plan 06)------Subquery: 07)--------Filter: outer_ref(j1.j1_id) + outer_ref(j2.j2_id) = j3.j3_id 08)----------TableScan: j3 projection=[j3_string, j3_id] -physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" }) +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(OuterReference { field: Field { name: "j1_id", data_type: Int32, nullable: true }, column: Column { relation: Some(Bare { table: "j1" }), name: "j1_id" } }) query TT explain SELECT * FROM j1, LATERAL (SELECT 1) AS j2; diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index 1a6b82020c667..28c2ad10f4d16 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -889,11 +889,11 @@ select count(*) from (select unnest(range(0, 100000)) id) t inner join (select u # Test implicit LATERAL support for UNNEST # Issue: https://github.com/apache/datafusion/issues/13659 # TODO: https://github.com/apache/datafusion/issues/10048 -query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\) +query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(OuterReference \{ field: Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, column: Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \} \}\) select * from unnest_table u, unnest(u.column1); # Test implicit LATERAL support for UNNEST (INNER JOIN) -query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\) +query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(OuterReference \{ field: Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, column: Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \} \}\) select * from unnest_table u INNER JOIN unnest(u.column1) AS t(column1) ON u.column3 = t.column1; # Test implicit LATERAL planning for UNNEST @@ -909,7 +909,7 @@ logical_plan 06)------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[] 07)--------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1)) 08)----------EmptyRelation: rows=1 -physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, Column { relation: Some(Bare { table: "u" }), name: "column1" }) +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(OuterReference { field: Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, column: Column { relation: Some(Bare { table: "u" }), name: "column1" } }) # Test implicit LATERAL planning for UNNEST (INNER JOIN) query TT @@ -925,7 +925,7 @@ logical_plan 07)--------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[] 08)----------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1)) 09)------------EmptyRelation: rows=1 -physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, Column { relation: Some(Bare { table: "u" }), name: "column1" }) +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(OuterReference { field: Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, column: Column { relation: Some(Bare { table: "u" }), name: "column1" } }) # uncorrelated EXISTS with unnest query I diff --git a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs index 74b1a65215376..f3e1bb5ba5b43 100644 --- a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs +++ b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs @@ -149,7 +149,7 @@ pub fn to_substrait_rex( Expr::Wildcard { .. } => not_impl_err!("Cannot convert {expr:?} to Substrait"), Expr::GroupingSet(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"), Expr::Placeholder(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"), - Expr::OuterReferenceColumn(_, _) => { + Expr::OuterReferenceColumn(_) => { not_impl_err!("Cannot convert {expr:?} to Substrait") } Expr::Unnest(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"), diff --git a/datafusion/substrait/src/logical_plan/producer/rel/aggregate_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/aggregate_rel.rs index dec94b0422257..997daa3ad02c5 100644 --- a/datafusion/substrait/src/logical_plan/producer/rel/aggregate_rel.rs +++ b/datafusion/substrait/src/logical_plan/producer/rel/aggregate_rel.rs @@ -19,7 +19,6 @@ use crate::logical_plan::producer::{ SubstraitProducer, from_aggregate_function, substrait_field_ref, }; use datafusion::common::{DFSchemaRef, internal_err, not_impl_err}; -use datafusion::logical_expr::expr::Alias; use datafusion::logical_expr::utils::powerset; use datafusion::logical_expr::{Aggregate, Distinct, Expr, GroupingSet}; use substrait::proto::aggregate_rel::{Grouping, Measure}; @@ -184,9 +183,7 @@ pub fn to_substrait_agg_measure( Expr::AggregateFunction(agg_fn) => { from_aggregate_function(producer, agg_fn, schema) } - Expr::Alias(Alias { expr, .. }) => { - to_substrait_agg_measure(producer, expr, schema) - } + Expr::Alias(alias) => to_substrait_agg_measure(producer, &alias.expr, schema), _ => internal_err!( "Expression must be compatible with aggregation. Unsupported expression: {:?}. Expressiontype: {}", expr,