diff --git a/bin/pants/README.md b/bin/pants/README.md new file mode 100644 index 0000000..46c523d --- /dev/null +++ b/bin/pants/README.md @@ -0,0 +1,10 @@ +This is the SCIE pants executable that is downloaded +by get-pants.sh from [the official site](https://www.pantsbuild.org/stable/docs/getting-started/installing-pants). + +It is in the repository so it is available to the CI/CD process. +We still need to add an automated way of checking for updates. +For now, someone should run +```bash +SCIE_BOOT=update ./pants +``` +occasionally and check in the resulting changes. diff --git a/bin/pants/pants b/bin/pants/pants new file mode 100755 index 0000000..b50a88f Binary files /dev/null and b/bin/pants/pants differ diff --git a/build_support/pants/README.pants.md b/build_support/pants/README.pants.md new file mode 100644 index 0000000..023c267 --- /dev/null +++ b/build_support/pants/README.pants.md @@ -0,0 +1,59 @@ +# `pants` + +## Setup + +When the current version of `pants` starts failing, we may need to download a +new `pants` launch script. Please see this: + + + +## flake8 task + +It has been developed in-house. When you upgrade `pants`, you may need to update +this file too: + +```none +build_support/pants/pants-plugins/flake8_task/flake8_task.py +``` + +## Useful commands + +- `./pants goals` + + Lists all goals + +- `./pants help` + + Prints generic help + +- `./pants clean-all` + + Deletes all build products, creating a clean workspace. + +## Additional information + +A pretty good starting resource to enable additional debugging information, +improve caching, or track down any dependency issues is the pants +troubleshooting page: + + + +## Pants caching + +Pants has the ability to store local intermediates to make builds consistent and +repeatable + +the `~/.cache` directory can fill up so much that NCBI accounts mounted on +`\\snowman` can fill up. It is recommended that the following are added to your +~/.bash_profile to route caching products to a directory in `/tmp` with a new +`bash` session + +``` +export XDG_CACHE_HOME=/tmp/${USER}/.cache +export PANTS_SETUP_CACHE=${XDG_CACHE_HOME}/pants/setup + +mkdir -p $XDG_CACHE_HOME $PANTS_SETUP_CACHE + +export PANTS_NAMED_CACHES_DIR=${XDG_CACHE_HOME}/named_caches +export PANTS_LOCAL_STORE_DIR=${XDG_CACHE_HOME}/local_stores +``` diff --git a/build_support/pants/coverage_py.ini b/build_support/pants/coverage_py.ini new file mode 100644 index 0000000..200d791 --- /dev/null +++ b/build_support/pants/coverage_py.ini @@ -0,0 +1,11 @@ +[report] +skip_covered=True +show_missing=True +sort=cover +; Regexes for lines to exclude from consideration +exclude_also = + ; TYPE_CHECKING lines are not executed + if TYPE_CHECKING: + + ; These lines are usually untestable setup code anyway + if __name__ == "__main__": diff --git a/build_support/pants/flake8.ini b/build_support/pants/flake8.ini new file mode 100644 index 0000000..3c91c3f --- /dev/null +++ b/build_support/pants/flake8.ini @@ -0,0 +1,16 @@ +[flake8] +max-line-length = 80 +count = True +statistics = True +show-source = True +max-complexity = 11 +; Reason for global ignoring of flake8 warnings +; E203 is not PEP8 compliant see https://github.com/psf/black/issues/1859 +; W503 is the opposite of the PEP8 recommendation "For new code Knuth's +; style is suggested." ... where Knuth's style is to break before the +; operator +ignore = E203,W503 +per_file_ignores = + ; Make flake8 ignore late module imports just like Ruff + utils/tests/*.py: E402 + analyze_har_files/tests/*.py: E402 diff --git a/build_support/pants/mypy.ini b/build_support/pants/mypy.ini new file mode 100644 index 0000000..fa4cbe5 --- /dev/null +++ b/build_support/pants/mypy.ini @@ -0,0 +1,11 @@ +[mypy] +check_untyped_defs = True +disallow_untyped_defs = True +incremental = True +show_column_numbers = True +show_error_context = True +scripts_are_modules = True +ignore_missing_imports = True +follow_imports = skip +strict_optional = True +warn_unused_ignores = True diff --git a/build_support/pants/pants-plugins/flake8_task/.gitignore b/build_support/pants/pants-plugins/flake8_task/.gitignore new file mode 100644 index 0000000..225fc6f --- /dev/null +++ b/build_support/pants/pants-plugins/flake8_task/.gitignore @@ -0,0 +1 @@ +/__pycache__ diff --git a/build_support/pants/pants-plugins/flake8_task/__init__.py b/build_support/pants/pants-plugins/flake8_task/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/build_support/pants/pants-plugins/flake8_task/flake8_task.py b/build_support/pants/pants-plugins/flake8_task/flake8_task.py new file mode 100644 index 0000000..25e4c61 --- /dev/null +++ b/build_support/pants/pants-plugins/flake8_task/flake8_task.py @@ -0,0 +1,170 @@ +import os +import subprocess + +from pants.backend.python.interpreter_cache import PythonInterpreterCache +from pants.backend.python.targets.python_binary import PythonBinary +from pants.backend.python.targets.python_library import PythonLibrary +from pants.backend.python.targets.python_target import PythonTarget +from pants.backend.python.targets.python_tests import PythonTests +from pants.backend.python.tasks.resolve_requirements_task_base import ( + ResolveRequirementsTaskBase, +) +from pants.base.build_environment import get_buildroot +from pants.base.exceptions import TaskError +from pants.base.workunit import WorkUnit, WorkUnitLabel +from pants.util.memo import memoized_property +from pex.interpreter import PythonInterpreter +from pex.pex import PEX +from pex.pex_info import PexInfo + + +class Flake8Task(ResolveRequirementsTaskBase): + """Invoke the flake8 PEP-8 checker for Python.""" + + _FLAKE8_COMPATIBLE_INTERPETER_CONSTRAINT = ">=3.11" + _PYTHON_SOURCE_EXTENSION = ".py" + + @classmethod + def prepare(cls, options, round_manager): + super().prepare(options, round_manager) + round_manager.require_data(PythonInterpreter) + + @classmethod + def register_options(cls, register): + register( + "--flake8-version", + default="7.1.1", + help="The version of flake8 to use.", + ) + register( + "--config-file", + default=None, + help="Path flake8 configuration file, relative to buildroot.", + ) + + @classmethod + def supports_passthru_args(cls): + return True + + @classmethod + def subsystem_dependencies(cls): + return super().subsystem_dependencies() + (PythonInterpreterCache,) + + def find_flake8_interpreter(self): + interpreters = self._interpreter_cache.setup( + filters=[self._FLAKE8_COMPATIBLE_INTERPETER_CONSTRAINT] + ) + return min(interpreters) if interpreters else None + + @staticmethod + def is_non_synthetic_python_target(target): + return not target.is_synthetic and isinstance( + target, (PythonLibrary, PythonBinary, PythonTests) + ) + + @staticmethod + def is_python_target(target): + return isinstance(target, PythonTarget) + + def _calculate_python_sources(self, targets): + """Generate a set of source files from the given targets.""" + python_eval_targets = filter( + self.is_non_synthetic_python_target, targets + ) + sources = set() + for target in python_eval_targets: + sources.update( + source + for source in target.sources_relative_to_buildroot() + if os.path.splitext(source)[1] == self._PYTHON_SOURCE_EXTENSION + ) + return list(sources) + + def _collect_source_roots(self): + # Collect the set of directories in which there are Python sources (whether part of + # the target roots or transitive dependencies.) + source_roots = set() + for target in self.context.targets(self.is_python_target): + if not target.has_sources(self._PYTHON_SOURCE_EXTENSION): + continue + source_roots.add(target.target_base) + return source_roots + + @memoized_property + def _interpreter_cache(self): + return PythonInterpreterCache.global_instance() + + def _run_flake8(self, py3_interpreter, flake8_args, **kwargs): + pex_info = PexInfo.default() + pex_info.entry_point = "flake8" + flake8_version = self.get_options().flake8_version + + flake8_requirement_pex = self.resolve_requirement_strings( + py3_interpreter, + [f"flake8=={flake8_version}", "teamcity-messages"], + ) + + path = os.path.realpath( + os.path.join( + self.workdir, str(py3_interpreter.identity), flake8_version + ) + ) + if not os.path.isdir(path): + self.merge_pexes( + path, pex_info, py3_interpreter, [flake8_requirement_pex] + ) + pex = PEX(path, py3_interpreter) + return pex.run(flake8_args, **kwargs) + + def execute(self): + flake8_interpreter = self.find_flake8_interpreter() + if not flake8_interpreter: + raise TaskError( + f"Unable to find a Python {self._FLAKE8_COMPATIBLE_INTERPETER_CONSTRAINT} interpreter (required for flake8)." + ) + + sources = self._calculate_python_sources(self.context.target_roots) + if not sources: + self.context.log.warning("No Python sources to check.") + return + + # Construct the flake8 command line. + cmd = [] + if self.get_options().config_file: + cmd.append( + "--config={}".format( + os.path.join( + get_buildroot(), self.get_options().config_file + ) + ) + ) + cmd.extend(self.get_passthru_args()) + cmd += sources + self.context.log.debug("flake8 command: {}".format(" ".join(cmd))) + + # Collect source roots for the targets being checked. + source_roots = self._collect_source_roots() + + flake8_path = os.pathsep.join( + [os.path.join(get_buildroot(), root) for root in source_roots] + ) + + # Execute flake8. + with self.context.new_workunit( + name="check", + labels=[WorkUnitLabel.TOOL, WorkUnitLabel.RUN], + log_config=WorkUnit.LogConfig( + log_level=self.get_options().level, + colors=self.get_options().colors, + ), + cmd=" ".join(cmd), + ) as workunit: + returncode = self._run_flake8( + flake8_interpreter, + cmd, + env={"FLAKE8PATH": flake8_path}, + stdout=workunit.output("stdout"), + stderr=subprocess.STDOUT, + ) + if returncode != 0: + raise TaskError(f"flake8 failed: code={returncode}") diff --git a/build_support/pants/pants-plugins/flake8_task/register.py b/build_support/pants/pants-plugins/flake8_task/register.py new file mode 100644 index 0000000..e411c05 --- /dev/null +++ b/build_support/pants/pants-plugins/flake8_task/register.py @@ -0,0 +1,7 @@ +from pants.goal.task_registrar import TaskRegistrar as task + +from flake8_task.flake8_task import Flake8Task + + +def register_goals(): + task(name="flake8", action=Flake8Task).install("flake8") diff --git a/jupyter/BUILD b/jupyter/BUILD new file mode 100644 index 0000000..e33424e --- /dev/null +++ b/jupyter/BUILD @@ -0,0 +1,4 @@ +python_sources( + name = "dbgap_fhir_src", + sources = ["dbgap_fhir.py"], +) diff --git a/jupyter/dbgap_fhir.py b/jupyter/dbgap_fhir.py index 2cd19b4..2fd1032 100644 --- a/jupyter/dbgap_fhir.py +++ b/jupyter/dbgap_fhir.py @@ -2,12 +2,8 @@ import sys import json import requests -import pandas as pd -import numpy as np from pathlib import Path -from datetime import datetime import time -import pprint class DbGapFHIR: diff --git a/pants b/pants new file mode 100755 index 0000000..9c33c95 --- /dev/null +++ b/pants @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Strict mode +set -euo pipefail +IFS=$'\n\t' + +script_path="$(readlink -f "${BASH_SOURCE[0]}")" +script_dir="$(dirname "$script_path")" +pants_path="$script_dir/bin/pants/pants" + +cert_path=/etc/pki/tls/certs/ca-bundle.crt +if [ -e "$cert_path" ]; then + SSL_CERT_FILE=/etc/pki/tls/certs/ca-bundle.crt "$pants_path" "$@" +else + echo "Error: certificate bundle is not at $cert_path" >&2 + echo "Attempting to run pants without it" >&2 + "$pants_path" "$@" +fi + diff --git a/pants.toml b/pants.toml new file mode 100644 index 0000000..1a5da66 --- /dev/null +++ b/pants.toml @@ -0,0 +1,53 @@ +[GLOBAL] +pants_version = "2.23.0" + +pythonpath = [ + "%(buildroot)s/build_support/pants/pants-plugins", + ] + +backend_packages = [ + 'pants.backend.python', + 'pants.backend.python.lint.flake8', + 'pants.backend.python.typecheck.mypy', + 'pants.backend.python.mixed_interpreter_constraints', + ] + +# Path patterns to ignore for filesystem operations on top of the builtin patterns. +pants_ignore.add = [ + 'cpp', + 've', + '.ve', + 'ncbi-toolkit-root', + ] + +#[python-bootstrap] +#search_path = [ +# "", +# "%(buildroot)s/bin/pypy/bin", +# ] + +[python] +interpreter_constraints = ["CPython>=3.11"] + +[python-repos] +indexes= [ + 'https://artifactory.ncbi.nlm.nih.gov/artifactory/api/pypi/python-virtual-repo/simple/'] + +[mypy] +config = "build_support/pants/mypy.ini" +interpreter_constraints = ["CPython>=3.11"] + +[flake8] +config = "build_support/pants/flake8.ini" + +[coverage-py] +config = "build_support/pants/coverage_py.ini" +interpreter_constraints = ["CPython>=3.11"] +report = [ + "console", + "html", +] + +[anonymous-telemetry] +enabled=true +repo_id="7f9f781c-2f0c-4a2e-b5bb-92b38713f564" diff --git a/pilot/jupyter/BUILD b/pilot/jupyter/BUILD new file mode 100644 index 0000000..a8d41ce --- /dev/null +++ b/pilot/jupyter/BUILD @@ -0,0 +1,4 @@ +python_sources( + name = "fhir_fetcher_src", + sources = ["fhir_fetcher.py"], +)