1515
1616import dataclasses
1717import functools
18- import hashlib
1918import inspect
2019import io
2120import os
2524
2625import cloudpickle
2726from google .cloud import bigquery
27+ import google_crc32c
2828import pandas as pd
2929
3030import bigframes .dtypes
@@ -84,7 +84,7 @@ def sql_type(self) -> str:
8484 return self .dtype .sql_type
8585
8686 def stable_hash (self ) -> bytes :
87- hash_val = hashlib . md5 ()
87+ hash_val = google_crc32c . Checksum ()
8888 hash_val .update (self .name .encode ())
8989 hash_val .update (self .dtype .stable_hash ())
9090 return hash_val .digest ()
@@ -116,7 +116,7 @@ def sql_type(self) -> str:
116116 return function_typing .sdk_type_to_sql_string (sdk_type )
117117
118118 def stable_hash (self ) -> bytes :
119- hash_val = hashlib . md5 ()
119+ hash_val = google_crc32c . Checksum ()
120120 hash_val .update (self ._py_type .__name__ .encode ())
121121 return hash_val .digest ()
122122
@@ -145,7 +145,6 @@ class VirtualListTypeV1:
145145 def py_type (self ) -> Type [list [Any ]]:
146146 return list [self .inner_dtype .py_type ] # type: ignore
147147
148- # TODO: Specify emulating type and mapping expressions between said types
149148 @property
150149 def bf_type (self ) -> bigframes .dtypes .Dtype :
151150 return bigframes .dtypes .list_type (self .inner_dtype .bf_type )
@@ -165,7 +164,8 @@ def out_expr(
165164 if self .inner_dtype .py_type is str :
166165 return as_str_list
167166 elif self .inner_dtype .py_type is bool :
168- # TODO: hack so we don't need to make ArrayMap support general expressions yet
167+ # hack so we don't need to make ArrayMap support general expressions yet
168+ # with b/495513753 we can map the equality operator instead
169169 return ops .ArrayMapOp (ops .IsInOp (values = ("true" ,))).as_expr (as_str_list )
170170 else :
171171 return ops .ArrayMapOp (ops .AsTypeOp (self .inner_dtype .bf_type )).as_expr (
@@ -177,7 +177,7 @@ def sql_type(self) -> str:
177177 return f"ARRAY<{ self .inner_dtype .sql_type } >"
178178
179179 def stable_hash (self ) -> bytes :
180- hash_val = hashlib . md5 ()
180+ hash_val = google_crc32c . Checksum ()
181181 hash_val .update (self ._PROTOCOL_ID .encode ())
182182 hash_val .update (self .inner_dtype .stable_hash ())
183183 return hash_val .digest ()
@@ -212,7 +212,7 @@ def emulating_type(self) -> DirectScalarType:
212212 return DirectScalarType (str )
213213
214214 def stable_hash (self ) -> bytes :
215- hash_val = hashlib . md5 ()
215+ hash_val = google_crc32c . Checksum ()
216216 hash_val .update (self ._PROTOCOL_ID .encode ())
217217 return hash_val .digest ()
218218
@@ -227,6 +227,7 @@ class UdfSignature:
227227 output : DirectScalarType | VirtualListTypeV1
228228
229229 def __post_init__ (self ):
230+ # Validate inputs and outputs are of the correct types.
230231 assert all (isinstance (arg , UdfArg ) for arg in self .inputs )
231232 assert isinstance (self .output , (DirectScalarType , VirtualListTypeV1 ))
232233
@@ -240,7 +241,6 @@ def to_sql_input_signature(self) -> str:
240241 def protocol_metadata (self ) -> str | None :
241242 import bigframes .functions ._utils
242243
243- # TODO: The output field itself should handle this, to handle protocol versioning.
244244 if isinstance (self .output , VirtualListTypeV1 ):
245245 return bigframes .functions ._utils .get_bigframes_metadata (
246246 python_output_type = self .output .py_type
@@ -362,7 +362,7 @@ def to_remote_function_compatible(self) -> UdfSignature:
362362 return self
363363
364364 def stable_hash (self ) -> bytes :
365- hash_val = hashlib . md5 ()
365+ hash_val = google_crc32c . Checksum ()
366366 for input_type in self .inputs :
367367 hash_val .update (input_type .stable_hash ())
368368 hash_val .update (self .output .stable_hash ())
@@ -438,7 +438,7 @@ def stable_hash(self) -> bytes:
438438 def_copy , protocol = _pickle_protocol_version
439439 )
440440
441- hash_val = hashlib . md5 ()
441+ hash_val = google_crc32c . Checksum ()
442442 hash_val .update (normalized_pickled_code )
443443
444444 if self .package_requirements :
@@ -464,7 +464,7 @@ class CloudRunFunctionConfig:
464464 concurrency : int | None
465465
466466 def stable_hash (self ) -> bytes :
467- hash_val = hashlib . md5 ()
467+ hash_val = google_crc32c . Checksum ()
468468 hash_val .update (self .code .stable_hash ())
469469 hash_val .update (self .signature .stable_hash ())
470470 hash_val .update (str (self .timeout_seconds ).encode ())
@@ -503,7 +503,7 @@ def from_bq_routine(cls, routine: bigquery.Routine) -> RemoteFunctionConfig:
503503 )
504504
505505 def stable_hash (self ) -> bytes :
506- hash_val = hashlib . md5 ()
506+ hash_val = google_crc32c . Checksum ()
507507 hash_val .update (self .endpoint .encode ())
508508 hash_val .update (self .signature .stable_hash ())
509509 hash_val .update (self .connection_id .encode ())
0 commit comments