diff --git a/README.md b/README.md index f7255e3..a663d2d 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git Create a JSON file called `config.json` containing: - Access token you just created - API URL for your GitLab account. If you are using the public gitlab.com this will be `https://gitlab.com/api/v4` - - Groups to track (space separated) + - Groups to track (space separated) - Projects to track (space separated) Notes on group and project options: @@ -79,6 +79,7 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git "ultimate_license": true, "fetch_merge_request_commits": false, "fetch_pipelines_extended": false, + "fetch_retried_jobs": false, "fetch_group_variables": false, "fetch_project_variables": false } @@ -92,6 +93,8 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git If `fetch_pipelines_extended` is true (defaults to false), then for every Pipeline fetched with `sync_pipelines` (which returns N pages containing all pipelines per project), also fetch extended details of each of these pipelines with `sync_pipelines_extended`. Similar concerns as those related to `fetch_merge_request_commits` apply here - every pipeline fetched with `sync_pipelines_extended` requires a separate API call. + If `fetch_retried_jobs` is true (defaults to false), then include retried jobs as well. + If `fetch_group_variables` is true (defaults to false), then Group-level CI/CD variables will be retrieved for each available / specified group. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Group-level CI/CD variables. If `fetch_project_variables` is true (defaults to false), then Project-level CI/CD variables will be retrieved for each available / specified project. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Project-level CI/CD variables. diff --git a/tap_gitlab/__init__.py b/tap_gitlab/__init__.py index 1e98095..8303361 100644 --- a/tap_gitlab/__init__.py +++ b/tap_gitlab/__init__.py @@ -24,6 +24,7 @@ 'ultimate_license': False, 'fetch_merge_request_commits': False, 'fetch_pipelines_extended': False, + 'fetch_retried_jobs': False, 'fetch_group_variables': False, 'fetch_project_variables': False, } @@ -71,7 +72,7 @@ def load_schema(entity): 'replication_keys': ['updated_at'], }, 'jobs': { - 'url': '/projects/{id}/pipelines/{secondary_id}/jobs', + 'url': '/projects/{id}/pipelines/{secondary_id}/jobs?include_retried={fetch_retried_jobs}', 'schema': load_schema('jobs'), 'key_properties': ['id'], 'replication_method': 'FULL_TABLE', @@ -229,7 +230,7 @@ class ResourceInaccessible(Exception): def truthy(val) -> bool: return str(val).lower() in TRUTHY -def get_url(entity, id, secondary_id=None, start_date=None): +def get_url(entity, id, secondary_id=None, start_date=None, fetch_retried_jobs=False): if not isinstance(id, int): id = id.replace("/", "%2F") @@ -239,7 +240,8 @@ def get_url(entity, id, secondary_id=None, start_date=None): return CONFIG['api_url'] + RESOURCES[entity]['url'].format( id=id, secondary_id=secondary_id, - start_date=start_date + start_date=start_date, + fetch_retried_jobs=fetch_retried_jobs, ) @@ -250,8 +252,8 @@ def get_start(entity): @backoff.on_predicate(backoff.runtime, predicate=lambda r: r.status_code == 429, - max_tries=5, - value=lambda r: int(r.headers.get("Retry-After")), + max_tries=5, + value=lambda r: int(r.headers.get("Retry-After")), jitter=None) @backoff.on_exception(backoff.expo, (requests.exceptions.RequestException), @@ -770,8 +772,9 @@ def sync_jobs(project, pipeline): if stream is None or not stream.is_selected(): return mdata = metadata.to_map(stream.metadata) + fetch_retried_jobs = CONFIG['fetch_retried_jobs'] - url = get_url(entity=entity, id=project['id'], secondary_id=pipeline['id']) + url = get_url(entity=entity, id=project['id'], secondary_id=pipeline['id'], fetch_retried_jobs=fetch_retried_jobs) with Transformer(pre_hook=format_timestamp) as transformer: for row in gen_request(url): row['project_id'] = project['id'] @@ -932,6 +935,7 @@ def main_impl(): CONFIG['ultimate_license'] = truthy(CONFIG['ultimate_license']) CONFIG['fetch_merge_request_commits'] = truthy(CONFIG['fetch_merge_request_commits']) CONFIG['fetch_pipelines_extended'] = truthy(CONFIG['fetch_pipelines_extended']) + CONFIG['fetch_retried_jobs'] = truthy(CONFIG['fetch_retried_jobs']) CONFIG['fetch_group_variables'] = truthy(CONFIG['fetch_group_variables']) CONFIG['fetch_project_variables'] = truthy(CONFIG['fetch_project_variables'])