Skip to content

Commit 55141ba

Browse files
Spaarshtimsaucer
andauthored
Renaming Internal Structs (#1059)
* Renamed Expr to RawExpr * Fixed CI test for exported classes to include RawExpr as well * Fixed CI test for exported classes to check if Expr class covers RawExpr * Generalized Raw* class checking * fixes * fixes * fixed the CI test to not look for Raw classes in the datafusion module * Add additional text to unit test describing operation and ensure wrapped Raw classes are checked * New ruff rule on main * Resolve ruff errors --------- Co-authored-by: Tim Saucer <[email protected]>
1 parent 3dcf7c7 commit 55141ba

File tree

3 files changed

+45
-20
lines changed

3 files changed

+45
-20
lines changed

python/datafusion/expr.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ class Expr:
193193
:ref:`Expressions` in the online documentation for more information.
194194
"""
195195

196-
def __init__(self, expr: expr_internal.Expr) -> None:
196+
def __init__(self, expr: expr_internal.RawExpr) -> None:
197197
"""This constructor should not be called by the end user."""
198198
self.expr = expr
199199

@@ -383,7 +383,7 @@ def literal(value: Any) -> Expr:
383383
value = pa.scalar(value, type=pa.string_view())
384384
if not isinstance(value, pa.Scalar):
385385
value = pa.scalar(value)
386-
return Expr(expr_internal.Expr.literal(value))
386+
return Expr(expr_internal.RawExpr.literal(value))
387387

388388
@staticmethod
389389
def string_literal(value: str) -> Expr:
@@ -398,13 +398,13 @@ def string_literal(value: str) -> Expr:
398398
"""
399399
if isinstance(value, str):
400400
value = pa.scalar(value, type=pa.string())
401-
return Expr(expr_internal.Expr.literal(value))
401+
return Expr(expr_internal.RawExpr.literal(value))
402402
return Expr.literal(value)
403403

404404
@staticmethod
405405
def column(value: str) -> Expr:
406406
"""Creates a new expression representing a column."""
407-
return Expr(expr_internal.Expr.column(value))
407+
return Expr(expr_internal.RawExpr.column(value))
408408

409409
def alias(self, name: str) -> Expr:
410410
"""Assign a name to the expression."""

python/tests/test_wrapper_coverage.py

+40-15
Original file line numberDiff line numberDiff line change
@@ -28,37 +28,62 @@
2828
from enum import EnumMeta as EnumType
2929

3030

31-
def missing_exports(internal_obj, wrapped_obj) -> None:
32-
# Special case enums - just make sure they exist since dir()
33-
# and other functions get overridden.
31+
def missing_exports(internal_obj, wrapped_obj) -> None: # noqa: C901
32+
"""
33+
Identify if any of the rust exposted structs or functions do not have wrappers.
34+
35+
Special handling for:
36+
- Raw* classes: Internal implementation details that shouldn't be exposed
37+
- _global_ctx: Internal implementation detail
38+
- __self__, __class__: Python special attributes
39+
"""
40+
# Special case enums - EnumType overrides a some of the internal functions,
41+
# so check all of the values exist and move on
3442
if isinstance(wrapped_obj, EnumType):
43+
expected_values = [v for v in dir(internal_obj) if not v.startswith("__")]
44+
for value in expected_values:
45+
assert value in dir(wrapped_obj)
3546
return
3647

37-
for attr in dir(internal_obj):
38-
if attr in ["_global_ctx"]:
39-
continue
40-
assert attr in dir(wrapped_obj)
48+
for internal_attr_name in dir(internal_obj):
49+
wrapped_attr_name = internal_attr_name.removeprefix("Raw")
50+
assert wrapped_attr_name in dir(wrapped_obj)
4151

42-
internal_attr = getattr(internal_obj, attr)
43-
wrapped_attr = getattr(wrapped_obj, attr)
52+
internal_attr = getattr(internal_obj, internal_attr_name)
53+
wrapped_attr = getattr(wrapped_obj, wrapped_attr_name)
4454

45-
if internal_attr is not None and wrapped_attr is None:
46-
pytest.fail(f"Missing attribute: {attr}")
55+
# There are some auto generated attributes that can be None, such as
56+
# __kwdefaults__ and __doc__. As long as these are None on the internal
57+
# object, it's okay to skip them. However if they do exist on the internal
58+
# object they must also exist on the wrapped object.
59+
if internal_attr is not None:
60+
if wrapped_attr is None:
61+
pytest.fail(f"Missing attribute: {internal_attr_name}")
4762

48-
if attr in ["__self__", "__class__"]:
63+
if internal_attr_name in ["__self__", "__class__"]:
4964
continue
65+
5066
if isinstance(internal_attr, list):
5167
assert isinstance(wrapped_attr, list)
68+
69+
# We have cases like __all__ that are a list and we want to be certain that
70+
# every value in the list in the internal object is also in the wrapper list
5271
for val in internal_attr:
53-
assert val in wrapped_attr
72+
if isinstance(val, str) and val.startswith("Raw"):
73+
assert val[3:] in wrapped_attr
74+
else:
75+
assert val in wrapped_attr
5476
elif hasattr(internal_attr, "__dict__"):
77+
# Check all submodules recursively
5578
missing_exports(internal_attr, wrapped_attr)
5679

5780

5881
def test_datafusion_missing_exports() -> None:
5982
"""Check for any missing python exports.
6083
61-
This test verifies that every exposed class, attribute, and function in
62-
the internal (pyo3) module is also exposed in our python wrappers.
84+
This test verifies that every exposed class, attribute,
85+
and function in the internal (pyo3) module - datafusion._internal
86+
is also exposed in our python wrappers - datafusion -
87+
i.e., the ones exposed to the public.
6388
"""
6489
missing_exports(datafusion._internal, datafusion)

src/expr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ pub mod window;
101101
use sort_expr::{to_sort_expressions, PySortExpr};
102102

103103
/// A PyExpr that can be used on a DataFrame
104-
#[pyclass(name = "Expr", module = "datafusion.expr", subclass)]
104+
#[pyclass(name = "RawExpr", module = "datafusion.expr", subclass)]
105105
#[derive(Debug, Clone)]
106106
pub struct PyExpr {
107107
pub expr: Expr,

0 commit comments

Comments
 (0)