Skip to content

Commit 4f982fa

Browse files
pr comments
1 parent dcac8e0 commit 4f982fa

File tree

5 files changed

+15
-14
lines changed

5 files changed

+15
-14
lines changed

bigframes/functions/_function_client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def provision_bq_managed_function(
229229

230230
# TODO(b/406283812): Expose the capability to pass down
231231
# capture_references=True in the public udf API.
232+
# TODO(b/495508827): Include all config in the value hash.
232233
if (
233234
capture_references
234235
and (python_version := _utils.get_python_version())

bigframes/functions/_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,6 @@ def get_managed_function_name(
191191
session_id: str | None = None,
192192
):
193193
"""Get a name for the bigframes managed function for the given user defined function."""
194-
# TODO: Move over to logic used by remote functions
195194
parts = [_BIGFRAMES_FUNCTION_PREFIX]
196195
if session_id:
197196
parts.append(session_id)

bigframes/functions/udf_def.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
import dataclasses
1717
import functools
18-
import hashlib
1918
import inspect
2019
import io
2120
import os
@@ -25,6 +24,7 @@
2524

2625
import cloudpickle
2726
from google.cloud import bigquery
27+
import google_crc32c
2828
import pandas as pd
2929

3030
import bigframes.dtypes
@@ -84,7 +84,7 @@ def sql_type(self) -> str:
8484
return self.dtype.sql_type
8585

8686
def stable_hash(self) -> bytes:
87-
hash_val = hashlib.md5()
87+
hash_val = google_crc32c.Checksum()
8888
hash_val.update(self.name.encode())
8989
hash_val.update(self.dtype.stable_hash())
9090
return hash_val.digest()
@@ -116,7 +116,7 @@ def sql_type(self) -> str:
116116
return function_typing.sdk_type_to_sql_string(sdk_type)
117117

118118
def stable_hash(self) -> bytes:
119-
hash_val = hashlib.md5()
119+
hash_val = google_crc32c.Checksum()
120120
hash_val.update(self._py_type.__name__.encode())
121121
return hash_val.digest()
122122

@@ -145,7 +145,6 @@ class VirtualListTypeV1:
145145
def py_type(self) -> Type[list[Any]]:
146146
return list[self.inner_dtype.py_type] # type: ignore
147147

148-
# TODO: Specify emulating type and mapping expressions between said types
149148
@property
150149
def bf_type(self) -> bigframes.dtypes.Dtype:
151150
return bigframes.dtypes.list_type(self.inner_dtype.bf_type)
@@ -165,7 +164,8 @@ def out_expr(
165164
if self.inner_dtype.py_type is str:
166165
return as_str_list
167166
elif self.inner_dtype.py_type is bool:
168-
# TODO: hack so we don't need to make ArrayMap support general expressions yet
167+
# hack so we don't need to make ArrayMap support general expressions yet
168+
# with b/495513753 we can map the equality operator instead
169169
return ops.ArrayMapOp(ops.IsInOp(values=("true",))).as_expr(as_str_list)
170170
else:
171171
return ops.ArrayMapOp(ops.AsTypeOp(self.inner_dtype.bf_type)).as_expr(
@@ -177,7 +177,7 @@ def sql_type(self) -> str:
177177
return f"ARRAY<{self.inner_dtype.sql_type}>"
178178

179179
def stable_hash(self) -> bytes:
180-
hash_val = hashlib.md5()
180+
hash_val = google_crc32c.Checksum()
181181
hash_val.update(self._PROTOCOL_ID.encode())
182182
hash_val.update(self.inner_dtype.stable_hash())
183183
return hash_val.digest()
@@ -212,7 +212,7 @@ def emulating_type(self) -> DirectScalarType:
212212
return DirectScalarType(str)
213213

214214
def stable_hash(self) -> bytes:
215-
hash_val = hashlib.md5()
215+
hash_val = google_crc32c.Checksum()
216216
hash_val.update(self._PROTOCOL_ID.encode())
217217
return hash_val.digest()
218218

@@ -227,6 +227,7 @@ class UdfSignature:
227227
output: DirectScalarType | VirtualListTypeV1
228228

229229
def __post_init__(self):
230+
# Validate inputs and outputs are of the correct types.
230231
assert all(isinstance(arg, UdfArg) for arg in self.inputs)
231232
assert isinstance(self.output, (DirectScalarType, VirtualListTypeV1))
232233

@@ -240,7 +241,6 @@ def to_sql_input_signature(self) -> str:
240241
def protocol_metadata(self) -> str | None:
241242
import bigframes.functions._utils
242243

243-
# TODO: The output field itself should handle this, to handle protocol versioning.
244244
if isinstance(self.output, VirtualListTypeV1):
245245
return bigframes.functions._utils.get_bigframes_metadata(
246246
python_output_type=self.output.py_type
@@ -362,7 +362,7 @@ def to_remote_function_compatible(self) -> UdfSignature:
362362
return self
363363

364364
def stable_hash(self) -> bytes:
365-
hash_val = hashlib.md5()
365+
hash_val = google_crc32c.Checksum()
366366
for input_type in self.inputs:
367367
hash_val.update(input_type.stable_hash())
368368
hash_val.update(self.output.stable_hash())
@@ -438,7 +438,7 @@ def stable_hash(self) -> bytes:
438438
def_copy, protocol=_pickle_protocol_version
439439
)
440440

441-
hash_val = hashlib.md5()
441+
hash_val = google_crc32c.Checksum()
442442
hash_val.update(normalized_pickled_code)
443443

444444
if self.package_requirements:
@@ -464,7 +464,7 @@ class CloudRunFunctionConfig:
464464
concurrency: int | None
465465

466466
def stable_hash(self) -> bytes:
467-
hash_val = hashlib.md5()
467+
hash_val = google_crc32c.Checksum()
468468
hash_val.update(self.code.stable_hash())
469469
hash_val.update(self.signature.stable_hash())
470470
hash_val.update(str(self.timeout_seconds).encode())
@@ -503,7 +503,7 @@ def from_bq_routine(cls, routine: bigquery.Routine) -> RemoteFunctionConfig:
503503
)
504504

505505
def stable_hash(self) -> bytes:
506-
hash_val = hashlib.md5()
506+
hash_val = google_crc32c.Checksum()
507507
hash_val.update(self.endpoint.encode())
508508
hash_val.update(self.signature.stable_hash())
509509
hash_val.update(self.connection_id.encode())

bigframes/operations/array_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def output_type(self, *input_types):
9393
@dataclasses.dataclass(frozen=True)
9494
class ArrayMapOp(base_ops.UnaryOp):
9595
name: typing.ClassVar[str] = "array_map"
96-
# TODO: Generalize to chained expressions
96+
# TODO(b/495513753): Generalize to chained expressions
9797
map_op: base_ops.UnaryOp
9898

9999
def output_type(self, *input_types):

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
"google-cloud-bigquery-connection >=1.12.0",
4747
"google-cloud-resource-manager >=1.10.3",
4848
"google-cloud-storage >=2.0.0",
49+
"google-crc32c >=1.0.0,<2.0.0",
4950
"grpc-google-iam-v1 >= 0.14.2",
5051
"numpy >=1.24.0",
5152
"pandas >=1.5.3",

0 commit comments

Comments
 (0)