Skip to content

Commit 7c1c08f

Browse files
nirnayroyNirnay Roy
and
Nirnay Roy
authored
feat: expose regex_count function (#1066)
* Added wrapper for regex_count function * fix comment --------- Co-authored-by: Nirnay Roy <[email protected]>
1 parent 2f52688 commit 7c1c08f

File tree

3 files changed

+42
-0
lines changed

3 files changed

+42
-0
lines changed

python/datafusion/functions.py

+18
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@
217217
"random",
218218
"range",
219219
"rank",
220+
"regexp_count",
220221
"regexp_like",
221222
"regexp_match",
222223
"regexp_replace",
@@ -779,6 +780,23 @@ def regexp_replace(
779780
return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags))
780781

781782

783+
def regexp_count(
784+
string: Expr, pattern: Expr, start: Expr, flags: Expr | None = None
785+
) -> Expr:
786+
"""Returns the number of matches in a string.
787+
788+
Optional start position (the first position is 1) to search for the regular
789+
expression.
790+
"""
791+
if flags is not None:
792+
flags = flags.expr
793+
if start is not None:
794+
start = start.expr
795+
else:
796+
start = Expr.expr
797+
return Expr(f.regexp_count(string.expr, pattern.expr, start, flags))
798+
799+
782800
def repeat(string: Expr, n: Expr) -> Expr:
783801
"""Repeats the ``string`` to ``n`` times."""
784802
return Expr(f.repeat(string.expr, n.expr))

python/tests/test_functions.py

+4
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,10 @@ def test_array_function_obj_tests(stmt, py_expr):
740740
f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")),
741741
pa.array(["H-o", "W-d", "!"]),
742742
),
743+
(
744+
f.regexp_count(column("a"), literal("(ell|orl)"), literal(1)),
745+
pa.array([1, 1, 0], type=pa.int64()),
746+
),
743747
],
744748
)
745749
def test_string_functions(df, function, expected_result):

src/functions.rs

+20
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,25 @@ fn regexp_replace(
173173
)
174174
.into())
175175
}
176+
177+
#[pyfunction]
178+
#[pyo3(signature = (string, pattern, start, flags=None))]
179+
/// Returns the number of matches found in the string.
180+
fn regexp_count(
181+
string: PyExpr,
182+
pattern: PyExpr,
183+
start: Option<PyExpr>,
184+
flags: Option<PyExpr>,
185+
) -> PyResult<PyExpr> {
186+
Ok(functions::expr_fn::regexp_count(
187+
string.expr,
188+
pattern.expr,
189+
start.map(|x| x.expr),
190+
flags.map(|x| x.expr),
191+
)
192+
.into())
193+
}
194+
176195
/// Creates a new Sort Expr
177196
#[pyfunction]
178197
fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult<PySortExpr> {
@@ -943,6 +962,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
943962
m.add_wrapped(wrap_pyfunction!(power))?;
944963
m.add_wrapped(wrap_pyfunction!(radians))?;
945964
m.add_wrapped(wrap_pyfunction!(random))?;
965+
m.add_wrapped(wrap_pyfunction!(regexp_count))?;
946966
m.add_wrapped(wrap_pyfunction!(regexp_like))?;
947967
m.add_wrapped(wrap_pyfunction!(regexp_match))?;
948968
m.add_wrapped(wrap_pyfunction!(regexp_replace))?;

0 commit comments

Comments
 (0)