@@ -42,12 +42,13 @@ def main(self):
42
42
43
43
date_format = "%Y-%m-%dT%H:%M:%S.%fZ"
44
44
45
+ # removing total_retries as a config for now
45
46
def __init__ (
46
47
self ,
47
48
staging_area : str ,
48
49
ignore_dangling_inputs : bool ,
49
50
validate_json : bool ,
50
- total_retries ,
51
+ # total_retries,
51
52
) -> None :
52
53
super ().__init__ ()
53
54
self .staging_area = staging_area
@@ -57,7 +58,7 @@ def __init__(
57
58
self .gcs = gcs .Client ()
58
59
59
60
# Number of retries for validation
60
- self .total_retries = total_retries
61
+ # self.total_retries = total_retries
61
62
# A boolean to tell us if this is a delta or non-delta staging area
62
63
self .is_delta = None
63
64
# A mapping of data file name to metadata id
@@ -298,7 +299,8 @@ def validate_file_json(
298
299
if self .validate_json :
299
300
print (f"Validating JSON of { file_name } " )
300
301
try :
301
- self .validator .validate_json (file_json , self .total_retries , schema )
302
+ # self.validator.validate_json(file_json, self.total_retries, schema)
303
+ self .validator .validate_json (file_json , schema )
302
304
except Exception as e :
303
305
log .error ("File %s failed json validation." , file_name )
304
306
self .file_errors [file_name ] = e
@@ -369,27 +371,31 @@ class SchemaValidator:
369
371
def validate_json (
370
372
cls ,
371
373
file_json : JSON ,
372
- total_retries : int ,
374
+ # total_retries: int,
373
375
schema : Optional [JSON ] = None ,
374
376
) -> None :
375
377
if schema is None :
376
378
try :
377
- schema = cls ._download_schema (file_json ["describedBy" ], total_retries )
379
+ # schema = cls._download_schema(file_json["describedBy"], total_retries)
380
+ schema = cls ._download_schema (file_json ["describedBy" ])
378
381
except json .decoder .JSONDecodeError as e :
379
382
schema_url = file_json ["describedBy" ]
380
383
raise Exception ("Failed to parse schema JSON" , schema_url ) from e
381
384
validate (file_json , schema , format_checker = FormatChecker ())
382
385
383
386
@classmethod
384
387
# setting to maxsize=None so as not to evict old values, and maybe help avoid connectivity issues (DI-22)
388
+ # Could also have used the dagster.RetryPolicy for this
385
389
@lru_cache (maxsize = None )
386
- def _download_schema (cls , schema_url : str , total_retries : int ) -> JSON :
390
+ # def _download_schema(cls, schema_url: str, total_retries: int) -> JSON:
391
+ def _download_schema (cls , schema_url : str ) -> JSON :
387
392
log .debug ("Downloading schema %s" , schema_url )
388
393
389
394
s = requests .Session ()
390
- log .debug (f"total_retries = { total_retries } " )
395
+ # log.debug(f"total_retries = {total_retries}")
391
396
retries = Retry (
392
- total = total_retries ,
397
+ # total=total_retries,
398
+ total = 12 ,
393
399
backoff_factor = 0.2 ,
394
400
status_forcelist = [500 , 502 , 503 , 504 ],
395
401
)
0 commit comments