Skip to content

Commit e9c35a6

Browse files
authored
Fix join type coercion (#14387)
1 parent 278cf9d commit e9c35a6

File tree

2 files changed

+52
-8
lines changed

2 files changed

+52
-8
lines changed

datafusion/core/tests/dataframe/mod.rs

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ use arrow::{
3030
record_batch::RecordBatch,
3131
};
3232
use arrow_array::{
33-
Array, BooleanArray, DictionaryArray, Float32Array, Float64Array, Int8Array,
34-
UnionArray,
33+
record_batch, Array, BooleanArray, DictionaryArray, Float32Array, Float64Array,
34+
Int8Array, UnionArray,
3535
};
3636
use arrow_buffer::ScalarBuffer;
3737
use arrow_schema::{ArrowError, SchemaRef, UnionFields, UnionMode};
@@ -1121,6 +1121,39 @@ async fn join() -> Result<()> {
11211121
Ok(())
11221122
}
11231123

1124+
#[tokio::test]
1125+
async fn join_coercion_unnnamed() -> Result<()> {
1126+
let ctx = SessionContext::new();
1127+
1128+
// Test that join will coerce column types when necessary
1129+
// even when the relations don't have unique names
1130+
let left = ctx.read_batch(record_batch!(
1131+
("id", Int32, [1, 2, 3]),
1132+
("name", Utf8, ["a", "b", "c"])
1133+
)?)?;
1134+
let right = ctx.read_batch(record_batch!(
1135+
("id", Int32, [10, 3]),
1136+
("name", Utf8View, ["d", "c"]) // Utf8View is a different type
1137+
)?)?;
1138+
let cols = vec!["name", "id"];
1139+
1140+
let filter = None;
1141+
let join = right.join(left, JoinType::LeftAnti, &cols, &cols, filter)?;
1142+
let results = join.collect().await?;
1143+
1144+
assert_batches_sorted_eq!(
1145+
[
1146+
"+----+------+",
1147+
"| id | name |",
1148+
"+----+------+",
1149+
"| 10 | d |",
1150+
"+----+------+",
1151+
],
1152+
&results
1153+
);
1154+
Ok(())
1155+
}
1156+
11241157
#[tokio::test]
11251158
async fn join_on() -> Result<()> {
11261159
let left = test_table_with_name("a")

datafusion/optimizer/src/analyzer/type_coercion.rs

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,15 @@ impl<'a> TypeCoercionRewriter<'a> {
190190
.map(|(lhs, rhs)| {
191191
// coerce the arguments as though they were a single binary equality
192192
// expression
193-
let (lhs, rhs) = self.coerce_binary_op(lhs, Operator::Eq, rhs)?;
193+
let left_schema = join.left.schema();
194+
let right_schema = join.right.schema();
195+
let (lhs, rhs) = self.coerce_binary_op(
196+
lhs,
197+
left_schema,
198+
Operator::Eq,
199+
rhs,
200+
right_schema,
201+
)?;
194202
Ok((lhs, rhs))
195203
})
196204
.collect::<Result<Vec<_>>>()?;
@@ -275,17 +283,19 @@ impl<'a> TypeCoercionRewriter<'a> {
275283
fn coerce_binary_op(
276284
&self,
277285
left: Expr,
286+
left_schema: &DFSchema,
278287
op: Operator,
279288
right: Expr,
289+
right_schema: &DFSchema,
280290
) -> Result<(Expr, Expr)> {
281291
let (left_type, right_type) = get_input_types(
282-
&left.get_type(self.schema)?,
292+
&left.get_type(left_schema)?,
283293
&op,
284-
&right.get_type(self.schema)?,
294+
&right.get_type(right_schema)?,
285295
)?;
286296
Ok((
287-
left.cast_to(&left_type, self.schema)?,
288-
right.cast_to(&right_type, self.schema)?,
297+
left.cast_to(&left_type, left_schema)?,
298+
right.cast_to(&right_type, right_schema)?,
289299
))
290300
}
291301
}
@@ -404,7 +414,8 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> {
404414
))))
405415
}
406416
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
407-
let (left, right) = self.coerce_binary_op(*left, op, *right)?;
417+
let (left, right) =
418+
self.coerce_binary_op(*left, self.schema, op, *right, self.schema)?;
408419
Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr::new(
409420
Box::new(left),
410421
op,

0 commit comments

Comments
 (0)