Skip to content

Commit 7cca028

Browse files
feat: add with_columns (#909)
* feat: add with_columns * chore: add doc * Format docstring to render in online documentation --------- Co-authored-by: Tim Saucer <[email protected]>
1 parent fc7e3e5 commit 7cca028

File tree

3 files changed

+87
-1
lines changed

3 files changed

+87
-1
lines changed

python/datafusion/dataframe.py

+46-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
from __future__ import annotations
2323

24-
from typing import Any, List, TYPE_CHECKING, Literal
24+
from typing import Any, Iterable, List, Literal, TYPE_CHECKING
2525
from datafusion.record_batch import RecordBatchStream
2626
from typing_extensions import deprecated
2727
from datafusion.plan import LogicalPlan, ExecutionPlan
@@ -171,6 +171,51 @@ def with_column(self, name: str, expr: Expr) -> DataFrame:
171171
"""
172172
return DataFrame(self.df.with_column(name, expr.expr))
173173

174+
def with_columns(
175+
self, *exprs: Expr | Iterable[Expr], **named_exprs: Expr
176+
) -> DataFrame:
177+
"""Add columns to the DataFrame.
178+
179+
By passing expressions, iteratables of expressions, or named expressions. To
180+
pass named expressions use the form name=Expr.
181+
182+
Example usage: The following will add 4 columns labeled a, b, c, and d::
183+
184+
df = df.with_columns(
185+
lit(0).alias('a'),
186+
[lit(1).alias('b'), lit(2).alias('c')],
187+
d=lit(3)
188+
)
189+
190+
Args:
191+
exprs: Either a single expression or an iterable of expressions to add.
192+
named_exprs: Named expressions in the form of ``name=expr``
193+
194+
Returns:
195+
DataFrame with the new columns added.
196+
"""
197+
198+
def _simplify_expression(
199+
*exprs: Expr | Iterable[Expr], **named_exprs: Expr
200+
) -> list[Expr]:
201+
expr_list = []
202+
for expr in exprs:
203+
if isinstance(expr, Expr):
204+
expr_list.append(expr.expr)
205+
elif isinstance(expr, Iterable):
206+
for inner_expr in expr:
207+
expr_list.append(inner_expr.expr)
208+
else:
209+
raise NotImplementedError
210+
if named_exprs:
211+
for alias, expr in named_exprs.items():
212+
expr_list.append(expr.alias(alias).expr)
213+
return expr_list
214+
215+
expressions = _simplify_expression(*exprs, **named_exprs)
216+
217+
return DataFrame(self.df.with_columns(expressions))
218+
174219
def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame:
175220
r"""Rename one column by applying a new projection.
176221

python/tests/test_dataframe.py

+31
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,37 @@ def test_with_column(df):
216216
assert result.column(2) == pa.array([5, 7, 9])
217217

218218

219+
def test_with_columns(df):
220+
df = df.with_columns(
221+
(column("a") + column("b")).alias("c"),
222+
(column("a") + column("b")).alias("d"),
223+
[
224+
(column("a") + column("b")).alias("e"),
225+
(column("a") + column("b")).alias("f"),
226+
],
227+
g=(column("a") + column("b")),
228+
)
229+
230+
# execute and collect the first (and only) batch
231+
result = df.collect()[0]
232+
233+
assert result.schema.field(0).name == "a"
234+
assert result.schema.field(1).name == "b"
235+
assert result.schema.field(2).name == "c"
236+
assert result.schema.field(3).name == "d"
237+
assert result.schema.field(4).name == "e"
238+
assert result.schema.field(5).name == "f"
239+
assert result.schema.field(6).name == "g"
240+
241+
assert result.column(0) == pa.array([1, 2, 3])
242+
assert result.column(1) == pa.array([4, 5, 6])
243+
assert result.column(2) == pa.array([5, 7, 9])
244+
assert result.column(3) == pa.array([5, 7, 9])
245+
assert result.column(4) == pa.array([5, 7, 9])
246+
assert result.column(5) == pa.array([5, 7, 9])
247+
assert result.column(6) == pa.array([5, 7, 9])
248+
249+
219250
def test_with_column_renamed(df):
220251
df = df.with_column("c", column("a") + column("b")).with_column_renamed("c", "sum")
221252

src/dataframe.rs

+10
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,16 @@ impl PyDataFrame {
187187
Ok(Self::new(df))
188188
}
189189

190+
fn with_columns(&self, exprs: Vec<PyExpr>) -> PyResult<Self> {
191+
let mut df = self.df.as_ref().clone();
192+
for expr in exprs {
193+
let expr: Expr = expr.into();
194+
let name = format!("{}", expr.schema_name());
195+
df = df.with_column(name.as_str(), expr)?
196+
}
197+
Ok(Self::new(df))
198+
}
199+
190200
/// Rename one column by applying a new projection. This is a no-op if the column to be
191201
/// renamed does not exist.
192202
fn with_column_renamed(&self, old_name: &str, new_name: &str) -> PyResult<Self> {

0 commit comments

Comments
 (0)