Skip to content

Commit

Permalink
feat: enable fetching retried jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
a-meynard committed Nov 18, 2024
1 parent 44e9410 commit 08ad14e
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git
Create a JSON file called `config.json` containing:
- Access token you just created
- API URL for your GitLab account. If you are using the public gitlab.com this will be `https://gitlab.com/api/v4`
- Groups to track (space separated)
- Groups to track (space separated)
- Projects to track (space separated)

Notes on group and project options:
Expand All @@ -79,6 +79,7 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git
"ultimate_license": true,
"fetch_merge_request_commits": false,
"fetch_pipelines_extended": false,
"fetch_retried_jobs": false,
"fetch_group_variables": false,
"fetch_project_variables": false
}
Expand All @@ -92,6 +93,8 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git

If `fetch_pipelines_extended` is true (defaults to false), then for every Pipeline fetched with `sync_pipelines` (which returns N pages containing all pipelines per project), also fetch extended details of each of these pipelines with `sync_pipelines_extended`. Similar concerns as those related to `fetch_merge_request_commits` apply here - every pipeline fetched with `sync_pipelines_extended` requires a separate API call.

If `fetch_retried_jobs` is true (defaults to false), then include retried jobs as well.

If `fetch_group_variables` is true (defaults to false), then Group-level CI/CD variables will be retrieved for each available / specified group. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Group-level CI/CD variables.

If `fetch_project_variables` is true (defaults to false), then Project-level CI/CD variables will be retrieved for each available / specified project. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Project-level CI/CD variables.
Expand Down
16 changes: 10 additions & 6 deletions tap_gitlab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
'ultimate_license': False,
'fetch_merge_request_commits': False,
'fetch_pipelines_extended': False,
'fetch_retried_jobs': False,
'fetch_group_variables': False,
'fetch_project_variables': False,
}
Expand Down Expand Up @@ -71,7 +72,7 @@ def load_schema(entity):
'replication_keys': ['updated_at'],
},
'jobs': {
'url': '/projects/{id}/pipelines/{secondary_id}/jobs',
'url': '/projects/{id}/pipelines/{secondary_id}/jobs?include_retried={fetch_retried_jobs}',
'schema': load_schema('jobs'),
'key_properties': ['id'],
'replication_method': 'FULL_TABLE',
Expand Down Expand Up @@ -229,7 +230,7 @@ class ResourceInaccessible(Exception):
def truthy(val) -> bool:
return str(val).lower() in TRUTHY

def get_url(entity, id, secondary_id=None, start_date=None):
def get_url(entity, id, secondary_id=None, start_date=None, fetch_retried_jobs=False):
if not isinstance(id, int):
id = id.replace("/", "%2F")

Expand All @@ -239,7 +240,8 @@ def get_url(entity, id, secondary_id=None, start_date=None):
return CONFIG['api_url'] + RESOURCES[entity]['url'].format(
id=id,
secondary_id=secondary_id,
start_date=start_date
start_date=start_date,
fetch_retried_jobs=fetch_retried_jobs,
)


Expand All @@ -250,8 +252,8 @@ def get_start(entity):

@backoff.on_predicate(backoff.runtime,
predicate=lambda r: r.status_code == 429,
max_tries=5,
value=lambda r: int(r.headers.get("Retry-After")),
max_tries=5,
value=lambda r: int(r.headers.get("Retry-After")),
jitter=None)
@backoff.on_exception(backoff.expo,
(requests.exceptions.RequestException),
Expand Down Expand Up @@ -770,8 +772,9 @@ def sync_jobs(project, pipeline):
if stream is None or not stream.is_selected():
return
mdata = metadata.to_map(stream.metadata)
fetch_retried_jobs = CONFIG['fetch_retried_jobs']

url = get_url(entity=entity, id=project['id'], secondary_id=pipeline['id'])
url = get_url(entity=entity, id=project['id'], secondary_id=pipeline['id'], fetch_retried_jobs=fetch_retried_jobs)
with Transformer(pre_hook=format_timestamp) as transformer:
for row in gen_request(url):
row['project_id'] = project['id']
Expand Down Expand Up @@ -932,6 +935,7 @@ def main_impl():
CONFIG['ultimate_license'] = truthy(CONFIG['ultimate_license'])
CONFIG['fetch_merge_request_commits'] = truthy(CONFIG['fetch_merge_request_commits'])
CONFIG['fetch_pipelines_extended'] = truthy(CONFIG['fetch_pipelines_extended'])
CONFIG['fetch_retried_jobs'] = truthy(CONFIG['fetch_retried_jobs'])
CONFIG['fetch_group_variables'] = truthy(CONFIG['fetch_group_variables'])
CONFIG['fetch_project_variables'] = truthy(CONFIG['fetch_project_variables'])

Expand Down

0 comments on commit 08ad14e

Please sign in to comment.