Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit edb1ada

Browse files
committedAug 24, 2024
Add rank window function
1 parent 3d20b82 commit edb1ada

File tree

3 files changed

+43
-4
lines changed

3 files changed

+43
-4
lines changed
 

‎python/datafusion/functions.py‎

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1753,6 +1753,9 @@ def lead(arg: Expr, shift_offset: int = 1, default_value: Optional[Any] = None)
17531753
return the 3rd following value in column ``b``. At the end of the partition, where
17541754
no futher values can be returned it will return the default value of 5.
17551755
1756+
To set window function parameters use the window builder approach described in the
1757+
ref:`_window_functions` online documentation.
1758+
17561759
Args:
17571760
arg: Value to return
17581761
shift_offset: Number of rows following the current row.
@@ -1772,6 +1775,9 @@ def lag(arg: Expr, shift_offset: int = 1, default_value: Optional[Any] = None) -
17721775
will return the 3rd previous value in column ``b``. At the beginnig of the
17731776
partition, where no values can be returned it will return the default value of 5.
17741777
1778+
To set window function parameters use the window builder approach described in the
1779+
ref:`_window_functions` online documentation.
1780+
17751781
Args:
17761782
arg: Value to return
17771783
shift_offset: Number of rows before the current row.
@@ -1786,8 +1792,35 @@ def lag(arg: Expr, shift_offset: int = 1, default_value: Optional[Any] = None) -
17861792
def row_number() -> Expr:
17871793
"""Create a row number window function.
17881794
1789-
Returns the row number of the window function. To set window function parameters
1790-
use the window builder approach described in the ref:`_window_functions` online
1791-
documentation.
1795+
Returns the row number of the window function.
1796+
1797+
To set window function parameters use the window builder approach described in the
1798+
ref:`_window_functions` online documentation.
17921799
"""
17931800
return Expr(f.row_number())
1801+
1802+
1803+
def rank() -> Expr:
1804+
"""Create a rank window function.
1805+
1806+
Returns the rank based upon the window order. Consecutive equal values will receive
1807+
the same rank, but the next different value will not be consecutive but rather the
1808+
number of rows that preceed it plus one. This is similar to Olympic medals. If two
1809+
people tie for gold, the next place is bronze. There would be no silver medal. Here
1810+
is an example of a dataframe with a window ordered by descending ``points`` and the
1811+
associated rank.
1812+
1813+
```
1814+
+--------+------+
1815+
| points | rank |
1816+
+--------+------+
1817+
| 100 | 1 |
1818+
| 100 | 1 |
1819+
| 50 | 3 |
1820+
+--------+------+
1821+
```
1822+
1823+
To set window function parameters use the window builder approach described in the
1824+
ref:`_window_functions` online documentation.
1825+
"""
1826+
return Expr(f.rank())

‎python/datafusion/tests/test_dataframe.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def test_distinct():
280280

281281
data_test_window_functions = [
282282
("row", f.row_number().order_by(column("c").sort()).build(), [2, 1, 3]),
283-
("rank", f.window("rank", [], order_by=[f.order_by(column("c"))]), [2, 1, 2]),
283+
("rank", f.rank().order_by(column("c").sort()).build(), [2, 1, 2]),
284284
(
285285
"dense_rank",
286286
f.window("dense_rank", [], order_by=[f.order_by(column("c"))]),

‎src/functions.rs‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,11 @@ pub fn row_number() -> PyExpr {
874874
window_function::row_number().into()
875875
}
876876

877+
#[pyfunction]
878+
pub fn rank() -> PyExpr {
879+
window_function::rank().into()
880+
}
881+
877882
pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
878883
m.add_wrapped(wrap_pyfunction!(abs))?;
879884
m.add_wrapped(wrap_pyfunction!(acos))?;
@@ -1061,6 +1066,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
10611066
m.add_wrapped(wrap_pyfunction!(lead))?;
10621067
m.add_wrapped(wrap_pyfunction!(lag))?;
10631068
m.add_wrapped(wrap_pyfunction!(row_number))?;
1069+
m.add_wrapped(wrap_pyfunction!(rank))?;
10641070

10651071
Ok(())
10661072
}

0 commit comments

Comments
 (0)
Please sign in to comment.