Skip to content

Commit 948486e

Browse files
authored
Remove parsing table name in row_filter (#1689)
This PR deprecates one of the three items that were planned for the 0.9.0 release. All items marked for removal: - [x] Table name reference in scan expression <https://github.com/apache/iceberg-python/blob/efc8b5ac0f16717f776e034ecf9a9e9bdabd8424/pyiceberg/expressions/parser.py#L95> - [x] REST catalog client AUTH_URL (#1691) <https://github.com/apache/iceberg-python/blob/efc8b5ac0f16717f776e034ecf9a9e9bdabd8424/pyiceberg/catalog/rest.py#L324> - [x] botocore session (#1692) <https://github.com/apache/iceberg-python/blob/efc8b5ac0f16717f776e034ecf9a9e9bdabd8424/pyiceberg/catalog/__init__.py#L790> Currently there are three items marked for release. However, based on the ongoing [discussion](https://lists.apache.org/thread/rr8lcf96jl6079dz6vfkwr5spbvlxzpm), it appears that the other two items. have not yet been replaced with a proper solution. As a result, this PR only addresses the deprecation of `Table name reference in scan expression` while we await further resolution on the others.
1 parent 4e9c66d commit 948486e

File tree

3 files changed

+49
-10
lines changed

3 files changed

+49
-10
lines changed

pyiceberg/expressions/parser.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@
6666
)
6767
from pyiceberg.typedef import L
6868
from pyiceberg.types import strtobool
69-
from pyiceberg.utils.deprecated import deprecation_message
7069

7170
ParserElement.enablePackrat()
7271

@@ -89,15 +88,7 @@
8988

9089
@column.set_parse_action
9190
def _(result: ParseResults) -> Reference:
92-
if len(result.column) > 1:
93-
deprecation_message(
94-
deprecated_in="0.8.0",
95-
removed_in="0.9.0",
96-
help_message="Parsing expressions with table name is deprecated. Only provide field names in the row_filter.",
97-
)
98-
# TODO: Once this is removed, we will no longer take just the last index of parsed column result
99-
# And introduce support for parsing filter expressions with nested fields.
100-
return Reference(result.column[-1])
91+
return Reference(".".join(result.column))
10192

10293

10394
boolean = one_of(["true", "false"], caseless=True).set_results_name("boolean")

tests/expressions/test_expressions.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,34 @@ def test_eq_bound_expression(bound_reference_str: BoundReference[str]) -> None:
11601160
)
11611161

11621162

1163+
def test_nested_bind() -> None:
1164+
schema = Schema(NestedField(1, "foo", StructType(NestedField(2, "bar", StringType()))), schema_id=1)
1165+
bound = BoundIsNull(BoundReference(schema.find_field(2), schema.accessor_for_field(2)))
1166+
assert IsNull(Reference("foo.bar")).bind(schema) == bound
1167+
1168+
1169+
def test_bind_dot_name() -> None:
1170+
schema = Schema(NestedField(1, "foo.bar", StringType()), schema_id=1)
1171+
bound = BoundIsNull(BoundReference(schema.find_field(1), schema.accessor_for_field(1)))
1172+
assert IsNull(Reference("foo.bar")).bind(schema) == bound
1173+
1174+
1175+
def test_nested_bind_with_dot_name() -> None:
1176+
schema = Schema(NestedField(1, "foo.bar", StructType(NestedField(2, "baz", StringType()))), schema_id=1)
1177+
bound = BoundIsNull(BoundReference(schema.find_field(2), schema.accessor_for_field(2)))
1178+
assert IsNull(Reference("foo.bar.baz")).bind(schema) == bound
1179+
1180+
1181+
def test_bind_ambiguous_name() -> None:
1182+
with pytest.raises(ValueError) as exc_info:
1183+
Schema(
1184+
NestedField(1, "foo", StructType(NestedField(2, "bar", StringType()))),
1185+
NestedField(3, "foo.bar", StringType()),
1186+
schema_id=1,
1187+
)
1188+
assert "Invalid schema, multiple fields for name foo.bar: 2 and 3" in str(exc_info)
1189+
1190+
11631191
# __ __ ___
11641192
# | \/ |_ _| _ \_ _
11651193
# | |\/| | || | _/ || |

tests/expressions/test_parser.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
from decimal import Decimal
18+
1719
import pytest
1820
from pyparsing import ParseException
1921

@@ -39,6 +41,7 @@
3941
Or,
4042
StartsWith,
4143
)
44+
from pyiceberg.expressions.literals import DecimalLiteral
4245

4346

4447
def test_always_true() -> None:
@@ -216,3 +219,20 @@ def test_with_function() -> None:
216219
parser.parse("foo = 1 and lower(bar) = '2'")
217220

218221
assert "Expected end of text, found 'and'" in str(exc_info)
222+
223+
224+
def test_nested_fields() -> None:
225+
assert EqualTo("foo.bar", "data") == parser.parse("foo.bar = 'data'")
226+
assert LessThan("location.x", DecimalLiteral(Decimal(52.00))) == parser.parse("location.x < 52.00")
227+
228+
229+
def test_quoted_column_with_dots() -> None:
230+
with pytest.raises(ParseException) as exc_info:
231+
parser.parse("\"foo.bar\".baz = 'data'")
232+
233+
assert "Expected '\"', found '.'" in str(exc_info.value)
234+
235+
with pytest.raises(ParseException) as exc_info:
236+
parser.parse("'foo.bar'.baz = 'data'")
237+
238+
assert "Expected <= | <> | < | >= | > | == | = | !=, found '.'" in str(exc_info.value)

0 commit comments

Comments
 (0)