diff --git a/.circleci/config.yml b/.circleci/config.yml index 5440653d9b939..7b6a0f75a9313 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -288,7 +288,8 @@ jobs: source .venv/bin/activate mkdir parity_metrics && mv target/metric_reports/metric-report-raw-data-*amd64*.csv parity_metrics METRIC_REPORT_DIR_PATH=parity_metrics \ - COMMUNITY_IMPL_COV_PATH=scripts/implementation_coverage_full.csv \ + IMPLEMENTATION_COVERAGE_FILE=scripts/implementation_coverage_full.csv \ + SOURCE_TYPE=community \ python -m scripts.tinybird.upload_raw_test_metrics_and_coverage - store_artifacts: path: parity_metrics/ diff --git a/scripts/tinybird/retrieve_legacy_data_from_circleci.py b/scripts/tinybird/retrieve_legacy_data_from_circleci.py index 4fcfe074be362..acaefd0f1e166 100644 --- a/scripts/tinybird/retrieve_legacy_data_from_circleci.py +++ b/scripts/tinybird/retrieve_legacy_data_from_circleci.py @@ -223,7 +223,9 @@ def collect_workflows_past_30_days(): os.environ["CIRCLE_WORKFLOW_ID"] = str(workflow_id) # trigger the tinybird_upload - send_metric_report(metric_report_file_path, timestamp) + send_metric_report( + metric_report_file_path, source_type="community", timestamp=timestamp + ) send_implemented_coverage( community_coverage_file_path, timestamp=timestamp, type="community" ) diff --git a/scripts/tinybird/upload_raw_test_metrics_and_coverage.py b/scripts/tinybird/upload_raw_test_metrics_and_coverage.py index c3f3dc63213db..a51b6eb5a2040 100644 --- a/scripts/tinybird/upload_raw_test_metrics_and_coverage.py +++ b/scripts/tinybird/upload_raw_test_metrics_and_coverage.py @@ -55,18 +55,18 @@ service, operation, status_code, error_code, is_implemented* Additionally, we add the following information: -- *build_id*: the workflow-id (for CircleCI) +- *build_id*: the workflow-id (for CircleCI) or run-id (for GitHub) - *timestamp*: a timestamp as string which will be the same for the CircleCI run - *ls_source*: “community” for the CircleCI run, “pro” for the Github action In order to get more metadata from the build, we also send some general information to tests_raw_builds.datasource: -- *build_id:* the workflow-id (for CircleCI) -- *timestamp:* a timestamp as string which will be the same for the CircleCI run -- *branch:* env value from *`CIRCLE_BRANCH`* -- *build_url:* env value from *`CIRCLE_BUILD_URL`* -- *pull_requests:* env value from *`CIRCLE_PULL_REQUESTS`* -- *build_num:* env value from *`CIRCLE_BUILD_NUM`* -- *workflow_id:* env value from *`CIRCLE_WORKFLOW_ID`* +- *build_id:* the workflow-id (for CircleCI) or run-id (for GitHub) +- *timestamp:* a timestamp as string which will be the same for the CI run +- *branch:* env value from *`CIRCLE_BRANCH`* or *`GITHUB_HEAD_REF`* (only set for pull_requests) or *`GITHUB_REF_NAME`* +- *build_url:* env value from *`CIRCLE_BUILD_URL`* or *$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID* +- *pull_requests:* env value from *`CIRCLE_PULL_REQUESTS`* or *`GITHUB_REF`* +- *build_num:* env value from *`CIRCLE_BUILD_NUM`* (empty for GitHub, there seems to be no equivalent) +- *workflow_id:* env value from *`CIRCLE_WORKFLOW_ID`* or *`GITHUB_RUN_ID`* """ import csv @@ -138,21 +138,53 @@ def send_metadata_for_build(build_id: str, timestamp: str): CIRCLE_BUILD_NUM=78206 CIRCLE_BUILD_URL=https://circleci.com/gh/localstack/localstack/78206 CIRCLE_WORKFLOW_ID=b86a4bc4-bcd1-4170-94d6-4af66846c1c1 + + GitHub env examples: + GITHUB_REF=ref/heads/master or ref/pull//merge (will be used for 'pull_requests') + GITHUB_HEAD_REF=tinybird_data (used for 'branch', set only for pull_requests) + GITHUB_REF_NAME=feature-branch-1 (will be used for 'branch' if GITHUB_HEAD_REF is not set) + GITHUB_RUN_ID=1658821493 (will be used for 'workflow_id') + + workflow run's URL (will be used for 'build_url'): + $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID + + could not find anything that corresponds to "build_num" (number of current job in CircleCI) + -> leaving it blank for Github """ + # on GitHub the GITHUB_HEAD_REF is only set for pull_request, else we use the GITHUB_REF_NAME + branch = ( + os.environ.get("CIRCLE_BRANCH", "") + or os.environ.get("GITHUB_HEAD_REF", "") + or os.environ.get("GITHUB_REF_NAME", "") + ) + workflow_id = os.environ.get("CIRCLE_WORKFLOW_ID", "") or os.environ.get("GITHUB_RUN_ID", "") + + build_url = os.environ.get("CIRCLE_BUILD_URL", "") + if not build_url and os.environ.get("GITHUB_SERVER_URL"): + # construct the build-url for Github + server = os.environ.get("GITHUB_SERVER_URL", "") + repo = os.environ.get("GITHUB_REPOSITORY", "") + build_url = f"{server}/{repo}/actions/runs/{workflow_id}" + + pull_requests = os.environ.get("CIRCLE_PULL_REQUESTS", "") or os.environ.get("GITHUB_REF", "") + build_num = os.environ.get( + "CIRCLE_BUILD_NUM", "" + ) # TODO could not find equivalent job-id ENV in github + data = { "build_id": build_id, "timestamp": timestamp, - "branch": os.environ.get("CIRCLE_BRANCH", ""), - "build_url": os.environ.get("CIRCLE_BUILD_URL", ""), - "pull_requests": os.environ.get("CIRCLE_PULL_REQUESTS", ""), - "build_num": os.environ.get("CIRCLE_BUILD_NUM", ""), - "workflow_id": os.environ.get("CIRCLE_WORKFLOW_ID", ""), + "branch": branch, + "build_url": build_url, + "pull_requests": pull_requests, + "build_num": build_num, + "workflow_id": workflow_id, } data_to_send = [json.dumps(data)] send_data_to_tinybird(data_to_send, data_name=DATA_SOURCE_RAW_BUILDS) -def send_metric_report(metric_path: str, timestamp: str): +def send_metric_report(metric_path: str, source_type: str, timestamp: str): """ SCHEMA > @@ -172,7 +204,7 @@ def send_metric_report(metric_path: str, timestamp: str): """ tmp: list[str] = [] count: int = 0 - build_id = os.environ.get("CIRCLE_WORKFLOW_ID", "") + build_id = os.environ.get("CIRCLE_WORKFLOW_ID", "") or os.environ.get("GITHUB_RUN_ID", "") send_metadata_for_build(build_id, timestamp) pathlist = Path(metric_path).rglob("metric-report-raw-data-*.csv") @@ -187,7 +219,7 @@ def send_metric_report(metric_path: str, timestamp: str): # add timestamp, build_id, ls_source row["timestamp"] = timestamp row["build_id"] = build_id - row["ls_source"] = "community" + row["ls_source"] = source_type # remove data we are currently not interested in for field in data_to_remove: @@ -226,6 +258,7 @@ def send_implemented_coverage(file: str, timestamp: str, type: str): tmp: list[str] = [] count: int = 0 + build_id = os.environ.get("CIRCLE_WORKFLOW_ID", "") or os.environ.get("GITHUB_RUN_ID", "") with open(file, "r") as csv_obj: reader_obj = csv.DictReader(csv_obj) for row in reader_obj: @@ -240,7 +273,7 @@ def send_implemented_coverage(file: str, timestamp: str, type: str): # add timestamp and source row["timestamp"] = timestamp row["ls_source"] = type - row["build_id"] = os.environ.get("CIRCLE_WORKFLOW_ID", "") + row["build_id"] = build_id tmp.append(json.dumps(row)) if len(tmp) == 500: @@ -257,37 +290,37 @@ def send_implemented_coverage(file: str, timestamp: str, type: str): def main(): token = os.environ.get("TINYBIRD_PARITY_ANALYTICS_TOKEN", "") metric_report_dir = os.environ.get("METRIC_REPORT_DIR_PATH", "") - community_impl_coverage = os.environ.get("COMMUNITY_IMPL_COV_PATH", "") - + impl_coverage_file = os.environ.get("IMPLEMENTATION_COVERAGE_FILE", "") + source_type = os.environ.get("SOURCE_TYPE", "") missing_info = ( "missing data, please check the available ENVs that are required to run the script" ) - if not token: - print(missing_info) - print("missing TINYBIRD_PARITY_ANALYTICS_TOKEN") - return + print( + f"METRIC_REPORT_DIR_PATH={metric_report_dir}, IMPLEMENTATION_COVERAGE_FILE={impl_coverage_file}, " + f"SOURCE_TYPE={source_type}" + ) if not metric_report_dir: print(missing_info) print("missing METRIC_REPORT_DIR_PATH") return - if not community_impl_coverage: + if not impl_coverage_file: + print(missing_info) + print("missing IMPLEMENTATION_COVERAGE_FILE") + return + if not source_type: print(missing_info) - print("missing COMMUNITY_IMPL_COV_PATH") + print("missing SOURCE_TYPE") + return + if not token: + print(missing_info) + print("missing TINYBIRD_PARITY_ANALYTICS_TOKEN") return # create one timestamp that will be used for all the data sent timestamp: str = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") - # TODO re-enable pro tests (but these should be sent by the pro pipeline) - # pro_impl_coverage = os.environ.get("PRO_IMPL_COV_PATH", "") - # if not pro_impl_coverage: - # print(missing_info) - # print("missing PRO_IMPL_COV_PATH") - # return - # send_implemented_coverage(pro_impl_coverage, timestamp=timestamp, type="pro") - - send_metric_report(metric_report_dir, timestamp) - send_implemented_coverage(community_impl_coverage, timestamp=timestamp, type="community") + send_metric_report(metric_report_dir, source_type=source_type, timestamp=timestamp) + send_implemented_coverage(impl_coverage_file, timestamp=timestamp, type=source_type) if __name__ == "__main__":