Skip to content

Commit 91a3ea0

Browse files
Mytherinkrlmlr
authored andcommitted
Merge pull request duckdb/duckdb#7987 from Tmonster/add_asof_join_to_relational
Add JoinReftype to Relational Joins (to add asof, positional, dependent joins)
1 parent 61a5fc5 commit 91a3ea0

File tree

5 files changed

+163
-23
lines changed

5 files changed

+163
-23
lines changed

R/cpp11.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ rapi_expr_window <- function(window_function, partitions, order_bys, window_boun
7676
.Call(`_duckdb_rapi_expr_window`, window_function, partitions, order_bys, window_boundary_start, window_boundary_end, start_expr, end_expr, offset_expr, default_expr)
7777
}
7878

79-
rapi_rel_join <- function(left, right, conds, join) {
80-
.Call(`_duckdb_rapi_rel_join`, left, right, conds, join)
79+
rapi_rel_join <- function(left, right, conds, join, join_ref_type) {
80+
.Call(`_duckdb_rapi_rel_join`, left, right, conds, join, join_ref_type)
8181
}
8282

8383
rapi_rel_union_all <- function(rel_a, rel_b) {

R/relational.R

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,13 +212,21 @@ expr_window_ <- function (window_function, partitions=list(), order_bys=list(),
212212
#' rel2 <- rel_join(left, right, cond, "right")
213213
#' rel2 <- rel_join(left, right, cond, "left")
214214
#' rel2 <- rel_join(left, right, cond, "outer")
215+
215216
rel_inner_join <- function(left, right, conds) {
216-
rel_join(left, right, conds, "inner")
217+
rel_join_(left, right, conds, "inner", "regular")
218+
}
219+
220+
rel_join <- function(left, right, conds, join = "inner", ref_type = "regular") {
221+
rel_join_(left, right, conds, join, ref_type)
217222
}
218223

219-
rel_join <- function(left, right, conds, join = c("inner", "left", "right", "outer", "cross", "semi", "anti")) {
224+
rel_join_ <- function(left, right, conds,
225+
join = c("inner", "left", "right", "outer", "cross", "semi", "anti"),
226+
join_ref_type = c("regular", "natural", "cross", "positional", "asof")) {
220227
join <- match.arg(join)
221-
rapi_rel_join(left, right, conds, join)
228+
join_ref_type <- match.arg(join_ref_type)
229+
rapi_rel_join(left, right, conds, join, join_ref_type)
222230
}
223231

224232
#' UNION ALL on two DuckDB relation objects

src/cpp11.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,10 @@ extern "C" SEXP _duckdb_rapi_expr_window(SEXP window_function, SEXP partitions,
146146
END_CPP11
147147
}
148148
// relational.cpp
149-
SEXP rapi_rel_join(duckdb::rel_extptr_t left, duckdb::rel_extptr_t right, list conds, std::string join);
150-
extern "C" SEXP _duckdb_rapi_rel_join(SEXP left, SEXP right, SEXP conds, SEXP join) {
149+
SEXP rapi_rel_join(duckdb::rel_extptr_t left, duckdb::rel_extptr_t right, list conds, std::string join, std::string join_ref_type);
150+
extern "C" SEXP _duckdb_rapi_rel_join(SEXP left, SEXP right, SEXP conds, SEXP join, SEXP join_ref_type) {
151151
BEGIN_CPP11
152-
return cpp11::as_sexp(rapi_rel_join(cpp11::as_cpp<cpp11::decay_t<duckdb::rel_extptr_t>>(left), cpp11::as_cpp<cpp11::decay_t<duckdb::rel_extptr_t>>(right), cpp11::as_cpp<cpp11::decay_t<list>>(conds), cpp11::as_cpp<cpp11::decay_t<std::string>>(join)));
152+
return cpp11::as_sexp(rapi_rel_join(cpp11::as_cpp<cpp11::decay_t<duckdb::rel_extptr_t>>(left), cpp11::as_cpp<cpp11::decay_t<duckdb::rel_extptr_t>>(right), cpp11::as_cpp<cpp11::decay_t<list>>(conds), cpp11::as_cpp<cpp11::decay_t<std::string>>(join), cpp11::as_cpp<cpp11::decay_t<std::string>>(join_ref_type)));
153153
END_CPP11
154154
}
155155
// relational.cpp
@@ -396,7 +396,7 @@ static const R_CallMethodDef CallEntries[] = {
396396
{"_duckdb_rapi_rel_from_df", (DL_FUNC) &_duckdb_rapi_rel_from_df, 3},
397397
{"_duckdb_rapi_rel_from_table", (DL_FUNC) &_duckdb_rapi_rel_from_table, 3},
398398
{"_duckdb_rapi_rel_from_table_function", (DL_FUNC) &_duckdb_rapi_rel_from_table_function, 4},
399-
{"_duckdb_rapi_rel_join", (DL_FUNC) &_duckdb_rapi_rel_join, 4},
399+
{"_duckdb_rapi_rel_join", (DL_FUNC) &_duckdb_rapi_rel_join, 5},
400400
{"_duckdb_rapi_rel_limit", (DL_FUNC) &_duckdb_rapi_rel_limit, 2},
401401
{"_duckdb_rapi_rel_names", (DL_FUNC) &_duckdb_rapi_rel_names, 1},
402402
{"_duckdb_rapi_rel_order", (DL_FUNC) &_duckdb_rapi_rel_order, 2},

src/relational.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include "duckdb/main/relation/distinct_relation.hpp"
2727
#include "duckdb/main/relation/table_function_relation.hpp"
2828

29+
#include "duckdb/common/enums/joinref_type.hpp"
30+
2931
using namespace duckdb;
3032
using namespace cpp11;
3133

@@ -300,10 +302,23 @@ bool constant_expression_is_not_null(duckdb::expr_extptr_t expr) {
300302
}
301303

302304
[[cpp11::register]] SEXP rapi_rel_join(duckdb::rel_extptr_t left, duckdb::rel_extptr_t right, list conds,
303-
std::string join) {
305+
std::string join, std::string join_ref_type) {
304306
auto join_type = JoinType::INNER;
307+
auto ref_type = JoinRefType::REGULAR;
305308
unique_ptr<ParsedExpression> cond;
306309

310+
if (join_ref_type == "regular") {
311+
ref_type = JoinRefType::REGULAR;
312+
} else if (join_ref_type == "cross") {
313+
ref_type = JoinRefType::CROSS;
314+
} else if (join_ref_type == "positional") {
315+
ref_type = JoinRefType::POSITIONAL;
316+
} else if (join_ref_type == "asof") {
317+
ref_type = JoinRefType::ASOF;
318+
}
319+
320+
cpp11::writable::list prot = {left, right};
321+
307322
if (join == "left") {
308323
join_type = JoinType::LEFT;
309324
} else if (join == "right") {
@@ -314,9 +329,19 @@ bool constant_expression_is_not_null(duckdb::expr_extptr_t expr) {
314329
join_type = JoinType::SEMI;
315330
} else if (join == "anti") {
316331
join_type = JoinType::ANTI;
317-
} else if (join == "cross") {
318-
auto res = std::make_shared<CrossProductRelation>(left->rel, right->rel);
319-
return make_external<RelationWrapper>("duckdb_relation", res);
332+
} else if (join == "cross" || ref_type == JoinRefType::POSITIONAL) {
333+
if (ref_type != JoinRefType::POSITIONAL) {
334+
// users can only supply positional cross join, or cross join.
335+
warning("Automatically converting join to cross join");
336+
ref_type = JoinRefType::CROSS;
337+
}
338+
auto res = std::make_shared<CrossProductRelation>(left->rel, right->rel, ref_type);
339+
auto rel = make_external_prot<RelationWrapper>("duckdb_relation", prot, res);
340+
// if the user described filters, apply them on top of the cross product relation
341+
if (conds.size() > 0) {
342+
return rapi_rel_filter(rel, conds);
343+
}
344+
return rel;
320345
}
321346

322347
if (conds.size() == 1) {
@@ -329,10 +354,7 @@ bool constant_expression_is_not_null(duckdb::expr_extptr_t expr) {
329354
cond = make_uniq<ConjunctionExpression>(ExpressionType::CONJUNCTION_AND, std::move(cond_args));
330355
}
331356

332-
auto res = std::make_shared<JoinRelation>(left->rel, right->rel, std::move(cond), join_type);
333-
334-
cpp11::writable::list prot = {left, right};
335-
357+
auto res = std::make_shared<JoinRelation>(left->rel, right->rel, std::move(cond), join_type, ref_type);
336358
return make_external_prot<RelationWrapper>("duckdb_relation", prot, res);
337359
}
338360

tests/testthat/test_relational.R

Lines changed: 116 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,117 @@ test_that("Union all keeps duplicates", {
268268
expect_equal(rel_df, expected_result)
269269
})
270270

271+
test_that("Inner join returns all inner relations", {
272+
dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) AS a = b")
273+
left <- rel_from_df(con, data.frame(left_a=c(1, 2, 3), left_b=c(1, 1, 2)))
274+
right <- rel_from_df(con, data.frame(right_b=c(1, 3), right_c=c(4, 5)))
275+
cond <- list(expr_function("eq", list(expr_reference("left_b"), expr_reference("right_b"))))
276+
rel2 <- rel_join(left, right, cond, "inner")
277+
rel_df <- rel_to_altrep(rel2)
278+
dim(rel_df)
279+
expected_result <- data.frame(left_a=c(1, 2), left_b=c(1, 1), right_b=c(1, 1), right_c=c(4, 4))
280+
expect_equal(rel_df, expected_result)
281+
})
282+
283+
test_that("ASOF join works", {
284+
dbExecute(con, "CREATE OR REPLACE MACRO gte(a, b) AS a >= b")
285+
test_df1 <- duckdb:::rel_from_df(con, data.frame(ts=c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
286+
test_df2 <- duckdb:::rel_from_df(con, data.frame(event_ts=c(1, 3, 6, 8), event_id=c(0, 1, 2, 3)))
287+
cond <- list(duckdb:::expr_function("gte", list(duckdb:::expr_reference("ts"), duckdb:::expr_reference("event_ts"))))
288+
rel <- duckdb:::rel_join(test_df1, test_df2, cond, ref_type="asof")
289+
rel_proj <- duckdb:::rel_project(rel, list(duckdb:::expr_reference("ts"), duckdb:::expr_reference("event_id")))
290+
rel_df <- duckdb:::rel_to_altrep(rel_proj)
291+
expected_result <- data.frame(ts=c(1, 2, 3, 4, 5, 6, 7, 8, 9), event_id=c(0, 0, 1, 1, 1, 2, 2, 3, 3))
292+
expect_equal(expected_result, rel_df)
293+
})
294+
295+
test_that("LEFT ASOF join works", {
296+
dbExecute(con, "CREATE OR REPLACE MACRO gte(a, b) AS a >= b")
297+
test_df1 <- duckdb:::rel_from_df(con, data.frame(ts=c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)))
298+
test_df2 <- duckdb:::rel_from_df(con, data.frame(event_ts=c(2, 4, 6, 8), event_id=c(0, 1, 2, 3)))
299+
cond <- list(duckdb:::expr_function("gte", list(duckdb:::expr_reference("ts"), duckdb:::expr_reference("event_ts"))))
300+
rel <- duckdb:::rel_join(test_df1, test_df2, cond, join="left", ref_type="asof")
301+
rel_proj <- duckdb:::rel_project(rel, list(duckdb:::expr_reference("ts"), duckdb:::expr_reference("event_ts"), duckdb:::expr_reference("event_id")))
302+
order <- duckdb:::rel_order(rel_proj, list(duckdb:::expr_reference("ts")))
303+
rel_df <- duckdb:::rel_to_altrep(order)
304+
expected_result <- data.frame(ts=c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), event_ts=c(NA, NA,2, 2, 4, 4, 6, 6, 8, 8), event_id=c(NA, NA, 0, 0, 1, 1, 2, 2, 3, 3))
305+
expect_equal(expected_result, rel_df)
306+
})
307+
308+
test_that("Positional cross join works", {
309+
test_df1 <- duckdb:::rel_from_df(con, data.frame(a=c(11, 12, 13), b=c(1, 2, 3)))
310+
test_df2 <- duckdb:::rel_from_df(con, data.frame(c=c(11, 12), d=c(1, 2)))
311+
rel <- duckdb:::rel_join(test_df1, test_df2, list(), join="cross", ref_type="positional")
312+
rel_df <- duckdb:::rel_to_altrep(rel)
313+
expected_result <- data.frame(a=c(11, 12, 13), b=c(1, 2, 3), c=c(11, 12, NA), d=c(1, 2, NA))
314+
expect_equal(expected_result, rel_df)
315+
})
316+
317+
test_that("regular positional join works", {
318+
dbExecute(con, "CREATE OR REPLACE MACRO eq(a, b) AS a = b")
319+
test_df1 <- duckdb:::rel_from_df(con, data.frame(a=c(11, 12, 13), b=c(1, 2, 3)))
320+
test_df2 <- duckdb:::rel_from_df(con, data.frame(c=c(11, 12, 14, 11), d=c(4, 5, 6, 8)))
321+
cond <- duckdb:::expr_function("eq", list(duckdb:::expr_reference("a"), duckdb:::expr_reference("c")))
322+
rel <- duckdb:::rel_join(test_df1, test_df2, list(cond), ref_type="positional")
323+
rel_df <- duckdb:::rel_to_altrep(rel)
324+
expected_result <- data.frame(a=c(11, 12), b=c(1, 2), c=c(11, 12), d=c(4, 5))
325+
expect_equal(expected_result, rel_df)
326+
})
327+
328+
test_that("Invalid asof join condition throws error", {
329+
dbExecute(con, "CREATE OR REPLACE MACRO neq(a, b) AS a <> b")
330+
test_df1 <- rel_from_df(con, data.frame(ts=c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
331+
test_df2 <- rel_from_df(con, data.frame(begin=c(1, 3, 6, 8), value=c(0, 1, 2, 3)))
332+
cond <- list(expr_function("neq", list(expr_reference("ts"), expr_reference("begin"))))
333+
expect_error(rel_join(test_df1, test_df2, cond, ref_type="asof"), "Binder Error")
334+
})
335+
336+
test_that("multiple inequality conditions for asof join throws error", {
337+
dbExecute(con, "CREATE OR REPLACE MACRO gte(a, b) AS a >= b")
338+
test_df1 <- duckdb:::rel_from_df(con, data.frame(ts=c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
339+
test_df2 <- duckdb:::rel_from_df(con, data.frame(begin=c(1, 3, 6, 8), value=c(0, 1, 2, 3)))
340+
cond1 <- duckdb:::expr_function("gte", list(duckdb:::expr_reference("ts"), duckdb:::expr_reference("begin")))
341+
cond2 <- duckdb:::expr_function("gte", list(duckdb:::expr_reference("ts"), duckdb:::expr_reference("value")))
342+
conds <- list(cond1, cond2)
343+
expect_error(rel_join(test_df1, test_df2, conds, ref_type="asof"), "Binder Error")
344+
})
345+
346+
347+
test_that("Inequality joins work", {
348+
dbExecute(con, "CREATE OR REPLACE MACRO gte(a, b) AS a >= b")
349+
timing_df <- rel_from_df(con, data.frame(ts=c(1, 2, 3, 4, 5, 6)))
350+
events_df <- rel_from_df(con, data.frame(event_ts=c(1, 3, 6, 8), event_id=c(0, 1, 2, 3)))
351+
cond <- list(expr_function("gte", list(expr_reference("ts"), expr_reference("event_ts"))))
352+
rel <- rel_inner_join(timing_df, events_df, cond)
353+
rel_proj <- rel_project(rel, list(expr_reference("ts"), expr_reference("event_ts")))
354+
rel_order <- rel_order(rel_proj, list(expr_reference("ts"), expr_reference("event_ts")))
355+
rel_df <- rel_to_altrep(rel_order)
356+
expected_result <- data.frame(ts=c(1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6), event_ts=c(1, 1, 1, 3, 1, 3, 1, 3, 1, 3, 6))
357+
expect_equal(expected_result, rel_df)
358+
})
359+
360+
361+
test_that("Inequality join works to perform between operation", {
362+
dbExecute(con, "CREATE OR REPLACE MACRO gt(a, b) AS a > b")
363+
dbExecute(con, "CREATE OR REPLACE MACRO lt(a, b) AS a < b")
364+
timing_df <- rel_from_df(con, data.frame(ts=c(1, 2, 3, 4, 5, 6, 7, 8, 9)))
365+
events_df <- rel_from_df(con, data.frame(event_ts=c(1, 3, 6, 8), event_id=c(0, 1, 2, 3)))
366+
lead <- expr_function("lead", list(expr_reference("event_ts")))
367+
window_lead <- expr_window(lead, offset_expr=expr_constant(1))
368+
expr_set_alias(window_lead, "lead")
369+
proj_window <- rel_project(events_df, list(expr_reference("event_ts"), window_lead, expr_reference("event_id")))
370+
cond1 <- expr_function("gt", list(expr_reference("ts"), expr_reference("event_ts")))
371+
cond2 <- expr_function("lt", list(expr_reference("ts"), expr_reference("lead")))
372+
conds <- list(cond1, cond2)
373+
rel <- rel_inner_join(timing_df, proj_window, conds)
374+
rel_proj <- rel_project(rel, list(expr_reference("ts")))
375+
rel_order <- rel_order(rel_proj, list(expr_reference("ts")))
376+
rel_df <- rel_to_altrep(rel_order)
377+
expected_result <- data.frame(ts=c(2, 4, 5, 7))
378+
expect_equal(expected_result, rel_df)
379+
})
380+
381+
271382
# nobody should do this in reality. It's a pretty dumb idea
272383
test_that("we can union the same relation to itself", {
273384
test_df_a2 <- rel_from_df(con, data.frame(a=c('1', '2'), b=c('3', '4')))
@@ -285,10 +396,10 @@ test_that("we throw an error when attempting to union all relations that are not
285396
})
286397

287398
test_that("A union with different column types casts to the richer type", {
288-
test_df_a1 <- duckdb:::rel_from_df(con, data.frame(a=c(1)))
289-
test_df_a2 <- duckdb:::rel_from_df(con, data.frame(a=c('1')))
290-
rel <- duckdb:::rel_union_all(test_df_a1, test_df_a2)
291-
res <- duckdb:::rapi_rel_to_df(rel)
399+
test_df_a1 <- rel_from_df(con, data.frame(a=c(1)))
400+
test_df_a2 <- rel_from_df(con, data.frame(a=c('1')))
401+
rel <- rel_union_all(test_df_a1, test_df_a2)
402+
res <- rapi_rel_to_df(rel)
292403
expected <- data.frame(a=c('1.0', '1'))
293404
expect_equal(class(res$a), class(expected$a))
294405
expect_equal(res$a[1], expected$a[1])
@@ -644,9 +755,8 @@ test_that("rel_to_sql works for row_number", {
644755
expect_equal(sub_str_sql, "SELECT row_number() OVER () AS ___row_number")
645756
})
646757

647-
648758
test_that("rel_from_table_function works", {
649-
rel <- duckdb:::rel_from_table_function(duckdb:::default_connection(), 'generate_series', list(1L, 10L, 2L))
759+
rel <- rel_from_table_function(default_connection(), 'generate_series', list(1L, 10L, 2L))
650760
df <- as.data.frame(rel)
651761
expect_equal(df$generate_series, c(1,3,5,7,9))
652762
})

0 commit comments

Comments
 (0)