Skip to content

Commit 78c2dab

Browse files
committed
reverted retries on the command line - due to issues with passing to solids in the ingest pipeline. Hard coded at 12 for now.
1 parent 5b340b6 commit 78c2dab

File tree

3 files changed

+25
-18
lines changed

3 files changed

+25
-18
lines changed

hca/staging_area_validator.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,13 @@ def main(self):
4242

4343
date_format = "%Y-%m-%dT%H:%M:%S.%fZ"
4444

45+
# removing total_retries as a config for now
4546
def __init__(
4647
self,
4748
staging_area: str,
4849
ignore_dangling_inputs: bool,
4950
validate_json: bool,
50-
total_retries,
51+
# total_retries,
5152
) -> None:
5253
super().__init__()
5354
self.staging_area = staging_area
@@ -57,7 +58,7 @@ def __init__(
5758
self.gcs = gcs.Client()
5859

5960
# Number of retries for validation
60-
self.total_retries = total_retries
61+
# self.total_retries = total_retries
6162
# A boolean to tell us if this is a delta or non-delta staging area
6263
self.is_delta = None
6364
# A mapping of data file name to metadata id
@@ -298,7 +299,8 @@ def validate_file_json(
298299
if self.validate_json:
299300
print(f"Validating JSON of {file_name}")
300301
try:
301-
self.validator.validate_json(file_json, self.total_retries, schema)
302+
# self.validator.validate_json(file_json, self.total_retries, schema)
303+
self.validator.validate_json(file_json, schema)
302304
except Exception as e:
303305
log.error("File %s failed json validation.", file_name)
304306
self.file_errors[file_name] = e
@@ -369,27 +371,31 @@ class SchemaValidator:
369371
def validate_json(
370372
cls,
371373
file_json: JSON,
372-
total_retries: int,
374+
# total_retries: int,
373375
schema: Optional[JSON] = None,
374376
) -> None:
375377
if schema is None:
376378
try:
377-
schema = cls._download_schema(file_json["describedBy"], total_retries)
379+
# schema = cls._download_schema(file_json["describedBy"], total_retries)
380+
schema = cls._download_schema(file_json["describedBy"])
378381
except json.decoder.JSONDecodeError as e:
379382
schema_url = file_json["describedBy"]
380383
raise Exception("Failed to parse schema JSON", schema_url) from e
381384
validate(file_json, schema, format_checker=FormatChecker())
382385

383386
@classmethod
384387
# setting to maxsize=None so as not to evict old values, and maybe help avoid connectivity issues (DI-22)
388+
# Could also have used the dagster.RetryPolicy for this
385389
@lru_cache(maxsize=None)
386-
def _download_schema(cls, schema_url: str, total_retries: int) -> JSON:
390+
# def _download_schema(cls, schema_url: str, total_retries: int) -> JSON:
391+
def _download_schema(cls, schema_url: str) -> JSON:
387392
log.debug("Downloading schema %s", schema_url)
388393

389394
s = requests.Session()
390-
log.debug(f"total_retries = {total_retries}")
395+
# log.debug(f"total_retries = {total_retries}")
391396
retries = Retry(
392-
total=total_retries,
397+
# total=total_retries,
398+
total=12,
393399
backoff_factor=0.2,
394400
status_forcelist=[500, 502, 503, 504],
395401
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
55

66
[project]
77
name = "hca-import-validation"
8-
version = "0.0.11"
8+
version = "0.0.12"
99
description = "HCA Staging Import Validation"
1010
urls = {"Source" = "https://github.com/dataBiosphere/hca-import-validation"}
1111
classifiers = [

validate_staging_area.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,15 @@ def _parse_args(argv):
3333
dest="validate_json",
3434
help="Do not validate JSON documents against their schema.",
3535
)
36-
parser.add_argument(
37-
"--total-retries",
38-
"-t",
39-
type=int,
40-
default=10,
41-
dest="total_retries",
42-
help="The number of times to retry the validation process.",
43-
)
36+
# May add this back later if we really need to set this in the config
37+
# parser.add_argument(
38+
# "--total-retries",
39+
# "-t",
40+
# type=int,
41+
# default=10,
42+
# dest="total_retries",
43+
# help="The number of times to retry the validation process.",
44+
# )
4445
return parser.parse_args(argv)
4546

4647

@@ -50,6 +51,6 @@ def _parse_args(argv):
5051
staging_area=args.staging_area,
5152
ignore_dangling_inputs=args.ignore_dangling_inputs,
5253
validate_json=args.validate_json,
53-
total_retries=args.total_retries,
54+
# total_retries=args.total_retries,
5455
)
5556
sys.exit(adapter.main())

0 commit comments

Comments
 (0)