Skip to content

Commit d5bc309

Browse files
committed
Add cume_dist
1 parent f9e37b2 commit d5bc309

File tree

3 files changed

+47
-14
lines changed

3 files changed

+47
-14
lines changed

python/datafusion/functions.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@
258258
"rank",
259259
"dense_rank",
260260
"percent_rank",
261+
"cume_dist",
261262
]
262263

263264

@@ -1855,28 +1856,54 @@ def dense_rank() -> Expr:
18551856
To set window function parameters use the window builder approach described in the
18561857
ref:`_window_functions` online documentation.
18571858
"""
1858-
return Expr(f.rank())
1859+
return Expr(f.dense_rank())
18591860

18601861

18611862
def percent_rank() -> Expr:
18621863
"""Create a percent_rank window function.
18631864
18641865
This window function is similar to :py:func:`rank` except that the returned values
1865-
will be consecutive. Here is an example of a dataframe with a window ordered by
1866-
descending ``points`` and the associated dense rank.
1866+
are the percentage from 0.0 to 1.0 from first to last. Here is an example of a
1867+
dataframe with a window ordered by descending ``points`` and the associated percent
1868+
rank.
18671869
18681870
```
1869-
+--------+------+
1870-
| points | rank |
1871-
+--------+------+
1872-
| 100 | 1 |
1873-
| 100 | 1 |
1874-
| 50 | 2 |
1875-
| 25 | 3 |
1876-
+--------+------+
1871+
+--------+--------------+
1872+
| points | percent_rank |
1873+
+--------+--------------+
1874+
| 100 | 0.0 |
1875+
| 100 | 0.0 |
1876+
| 50 | 0.666667 |
1877+
| 25 | 1.0 |
1878+
+--------+--------------+
18771879
```
18781880
18791881
To set window function parameters use the window builder approach described in the
18801882
ref:`_window_functions` online documentation.
18811883
"""
1882-
return Expr(f.rank())
1884+
return Expr(f.percent_rank())
1885+
1886+
1887+
def cume_dist() -> Expr:
1888+
"""Create a cumulative distribution window function.
1889+
1890+
This window function is similar to :py:func:`rank` except that the returned values
1891+
are the ratio of the row number to the total numebr of rows. Here is an example of a
1892+
dataframe with a window ordered by descending ``points`` and the associated
1893+
cumulative distribution.
1894+
1895+
```
1896+
+--------+-----------+
1897+
| points | cume_dist |
1898+
+--------+-----------+
1899+
| 100 | 0.5 |
1900+
| 100 | 0.5 |
1901+
| 50 | 0.75 |
1902+
| 25 | 1.0 |
1903+
+--------+-----------+
1904+
```
1905+
1906+
To set window function parameters use the window builder approach described in the
1907+
ref:`_window_functions` online documentation.
1908+
"""
1909+
return Expr(f.cume_dist())

python/datafusion/tests/test_dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,12 +288,12 @@ def test_distinct():
288288
),
289289
(
290290
"percent_rank",
291-
f.window("percent_rank", [], order_by=[f.order_by(column("c"))]),
291+
f.percent_rank().order_by(column("c").sort()).build(),
292292
[0.5, 0, 0.5],
293293
),
294294
(
295295
"cume_dist",
296-
f.window("cume_dist", [], order_by=[f.order_by(column("b"))]),
296+
f.cume_dist().order_by(column("b").sort()).build(),
297297
[0.3333333333333333, 0.6666666666666666, 1.0],
298298
),
299299
(

src/functions.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,11 @@ pub fn percent_rank() -> PyExpr {
889889
window_function::percent_rank().into()
890890
}
891891

892+
#[pyfunction]
893+
pub fn cume_dist() -> PyExpr {
894+
window_function::cume_dist().into()
895+
}
896+
892897
pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
893898
m.add_wrapped(wrap_pyfunction!(abs))?;
894899
m.add_wrapped(wrap_pyfunction!(acos))?;
@@ -1079,6 +1084,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
10791084
m.add_wrapped(wrap_pyfunction!(rank))?;
10801085
m.add_wrapped(wrap_pyfunction!(dense_rank))?;
10811086
m.add_wrapped(wrap_pyfunction!(percent_rank))?;
1087+
m.add_wrapped(wrap_pyfunction!(cume_dist))?;
10821088

10831089
Ok(())
10841090
}

0 commit comments

Comments
 (0)