Initial commit

elaspic · Sep 11, 2020 · d0d9620 · d0d9620
commit d0d9620
Show file tree

Hide file tree

Showing 34 changed files with 1,826 additions and 0 deletions.
diff --git a/.ci/conda/meta.yaml b/.ci/conda/meta.yaml
@@ -0,0 +1,26 @@
+package:
+  name: elaspic-v2
+  version: 0.1.0
+
+source:
+  path: ../../
+
+build:
+  script: {{ PYTHON }} -m pip install . --no-deps --ignore-installed -vv
+  number: 0
+  noarch: python
+
+requirements:
+  build:
+    - python
+  run:
+    - python
+
+test:
+  imports:
+    - elaspic_v2
+
+about:
+  home: https://gitlab.com/ostrokach/elaspic-v2
+  license: MIT License
+  summary: "Predicting the effect of mutations on protein folding and protein-protein interaction."
diff --git a/.ci/pages/README.md b/.ci/pages/README.md
@@ -0,0 +1,21 @@
+# Pages
+
+This file creates a `./public` folder containing documentation created for multiple versions (tags) of this repository.
+
+When the repository is public, our job is easy: we simply download the `artifact.zip` file from a publicly-accessible URL (see: [downloading the latest artifacts]). However, when the repository is private, using the above-mentioned URL does not work (see: [gitlab-org/gitlab-ce#22957]). In that case, we resort to using the GitLab API instead.
+
+If [gitlab-org/gitlab-ce#22957] is ever fixed, we would be able to specify
+`--header "Private-Token: XXXXX"` or attach `&private_token=XXXXX` to the query string,
+and keep using the original URL:
+
+```bash
+curl --header "Private-Token: XXXXX" \
+    "https://gitlab.com/user/repo/-/jobs/artifacts/ref/download?job=job_name"
+```
+
+Good resource: <https://docs.gitlab.com/ee/api/jobs.html#download-the-artifacts-archive>.
+
+<!-- Links -->
+
+[downloading the latest artifacts]: https://docs.gitlab.com/ee/user/project/pipelines/job_artifacts.html#downloading-the-latest-artifacts
+[gitlab-org/gitlab-ce#22957]: https://gitlab.com/gitlab-org/gitlab-ce/issues/22957
diff --git a/.ci/pages/download_docs.py b/.ci/pages/download_docs.py
@@ -0,0 +1,249 @@
+import argparse
+import concurrent.futures
+import functools
+import itertools
+import json
+import logging
+import os
+import re
+import shutil
+import string
+import subprocess
+import sys
+import tempfile
+import zipfile
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, NamedTuple, Optional, Set, Union
+
+import gitlab
+from jinja2 import Template
+
+logger = logging.getLogger()
+PROJECT_ROOT = Path(__file__).parent
+URL = "ttps://ostrokach.gitlab.io"
+
+
+class JobInfo(NamedTuple):
+    job_id: int
+    tag_name: str
+    finished_at: datetime
+    folder: Optional[Path] = None
+
+
+def main(args):
+    """
+    Args:
+        job_name: Name of the job building desired artifacts.
+        project_id: ID of the project.
+        private_token: Token for accessing GitLab API.
+        output_path: Location where pages should be extracted
+    """
+    output_path = Path(args.output_dir).expanduser().resolve(strict=True)
+
+    existing_jobs = [
+        JobInfo(0, p.name, datetime.fromtimestamp(p.stat().st_ctime), p)
+        for p in output_path.glob("*")
+        if p.is_dir()
+    ]
+    existing_tag_names = {j.tag_name for j in existing_jobs}
+    logger.info("Existing tag names: %s.", existing_tag_names)
+
+    if args.project_id is not None:
+        previous_jobs = download_pervious_versions(
+            args.project_id, args.job_name, args.private_token, output_path, existing_tag_names
+        )
+    else:
+        previous_jobs = []
+
+    all_jobs = sort_jobs_by_tag(existing_jobs + previous_jobs)
+    all_jobs = [j for j in all_jobs if j.folder is not None]
+    version_lst = jobs_to_version_lst(all_jobs)
+    with output_path.joinpath("versions.json").open("wt") as fout:
+        json.dump(version_lst, fout, indent=4)
+    index_source = render_html(all_jobs)
+    write_index_files(index_source, output_path)
+
+
+def download_pervious_versions(
+    project_id: Union[str, int],
+    job_name: str,
+    private_token: str,
+    output_path: Path,
+    existing_tag_names: List[str] = [],
+):
+    gl = gitlab.Gitlab("https://gitlab.com", private_token=private_token)
+    project = gl.projects.get(project_id)
+    refs = {t.name for t in project.tags.list(all=True, as_list=False)}
+
+    job_list = get_job_list(project, job_name=job_name, refs=refs)
+    job_list = remove_duplicate_tags(job_list)
+    job_list = [j for j in job_list if j.tag_name not in existing_tag_names]
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        artifact_files = download_artifacts(project, job_list, Path(temp_dir))
+        job_paths = extract_artifacts(job_list, artifact_files, output_path)
+    job_list = [j._replace(folder=jp) for j, jp in zip(job_list, job_paths)]
+    return job_list
+
+
+def get_job_list(project, job_name=None, refs=None) -> List[JobInfo]:
+    job_list: List[JobInfo] = []
+    for pipeline in project.pipelines.list(all=True, as_list=False):
+        if refs is None or pipeline.attributes["ref"] in refs:
+            for job in pipeline.jobs.list(all=True, as_list=False):
+                if job.attributes["status"] == "success" and (
+                    job_name is None or job.attributes["name"] == job_name
+                ):
+                    finished_at = datetime.strptime(
+                        job.attributes["finished_at"], "%Y-%m-%dT%H:%M:%S.%fZ"
+                    )
+                    job_list.append(JobInfo(job.id, job.attributes["ref"], finished_at))
+    return job_list
+
+
+def remove_duplicate_tags(job_list: List[JobInfo]) -> List[JobInfo]:
+    job_list = sort_jobs_by_date(job_list)
+
+    _seen: Set[str] = set()
+    job_list = [
+        j for j in job_list if j.tag_name not in _seen and not _seen.add(j.tag_name)  # type: ignore
+    ]
+    return job_list
+
+
+def sort_jobs_by_date(job_list: List[JobInfo]) -> List[JobInfo]:
+    job_list = sorted(job_list, key=lambda x: x.finished_at, reverse=True)
+    assert len(job_list) == 0 or job_list[0].finished_at >= job_list[-1].finished_at
+    return job_list
+
+
+def sort_jobs_by_tag(job_list: List[JobInfo]) -> List[JobInfo]:
+    def _str_to_float(s):
+        s = s.strip(string.ascii_letters)
+        try:
+            return float(s)
+        except ValueError:
+            return float("inf")
+
+    job_list = sorted(
+        job_list,
+        key=lambda j: tuple(_str_to_float(s) for s in re.split(r"\s|\.|-|_", j.tag_name)),
+        reverse=True,
+    )
+
+    return job_list
+
+
+def download_artifacts(project, job_list: List[JobInfo], temp_dir: Path) -> List[Path]:
+    artifact_files = []
+    for j in job_list:
+        job = project.jobs.get(j.job_id, lazy=True)
+        artifact_file = Path(temp_dir, j.tag_name + ".zip")
+        with artifact_file.open("wb") as fout:
+            job.artifacts(streamed=True, action=fout.write)
+        artifact_files.append(artifact_file)
+    return artifact_files
+
+
+def extract_artifacts(job_list: List[JobInfo], artifact_files: List[Path], output_path: Path):
+    """
+    This may be faster than using :any:`zipfile.ZipFile`.
+    """
+    unzip = shutil.which("unzip")
+    Pool: object
+    if unzip is not None:
+        Pool = concurrent.futures.ThreadPoolExecutor
+        fn = functools.partial(_extract_artifact, _extract_fn=_extract1)
+    else:
+        Pool = concurrent.futures.ProcessPoolExecutor
+        fn = functools.partial(_extract_artifact, _extract_fn=_extract2)
+    with Pool() as pool:
+        futures = pool.map(fn, job_list, artifact_files, itertools.repeat(output_path))
+        results = list(futures)
+    return results
+
+
+def _extract1(artifact_file: Path, temp_dir: str):
+    proc = subprocess.run(
+        ["unzip", "-o", "-q", "-d", temp_dir, artifact_file],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        universal_newlines=True,
+    )
+    output = proc.stdout + proc.stderr
+    if output.strip():
+        logger.info(output.strip())
+
+
+def _extract2(artifact_file: Path, temp_dir: str):
+    with zipfile.ZipFile(artifact_file, "r") as zip_file:
+        zip_file.extractall(temp_dir)
+
+
+def _extract_artifact(
+    job_info: JobInfo, artifact_file: Path, output_path: Path, _extract_fn=_extract1
+) -> Optional[Path]:
+    """
+    Args:
+        artifact_file: Zip archive containing artifact data. Should have name '{tag}.zip'.
+        output_path: Location where the 'public' artifact folder will be extracted.
+    """
+    version_path = Path(output_path, artifact_file.stem)
+    if version_path.exists():
+        msg = f"Version path '{version_path}' already exists! Skipping."
+        logger.warning(msg)
+        return None
+    with tempfile.TemporaryDirectory() as temp_dir:
+        _extract_fn(artifact_file, temp_dir)
+        folders = os.listdir(temp_dir)
+        if "public" not in folders:
+            msg = f"The artifact archive should contain a 'public' folder! Found {folders}."
+            logger.info(msg)
+            return None
+        else:
+            shutil.move(Path(temp_dir, "public"), version_path)
+            finished_at_seconds = int(job_info.finished_at.strftime("%s"))
+            os.utime(version_path, (finished_at_seconds, finished_at_seconds))
+            return version_path
+
+
+def render_html(job_list: List[JobInfo]) -> str:
+    with PROJECT_ROOT.joinpath("templates", "index.html").open("rt") as fin:
+        template_src = fin.read()
+    template = Template(template_src)
+    source = template.render(URL=URL, items=job_list)
+    return source
+
+
+def jobs_to_version_lst(all_jobs: List[JobInfo]) -> List[Dict[str, Any]]:
+    version_lst = []
+    for i, job_info in enumerate(all_jobs):
+        version = {
+            "latest": i == 0,
+            "version": job_info.tag_name.strip("v"),
+            "display": job_info.tag_name.strip("v"),
+            "url": f"{URL}/{job_info.tag_name}/",
+        }
+        version_lst.append(version)
+    return version_lst
+
+
+def write_index_files(index_source: str, output_path: Path):
+    with output_path.joinpath("index.html").open("wt") as fout:
+        fout.write(index_source)
+    for path in PROJECT_ROOT.joinpath("static").glob("*"):
+        shutil.copy2(path, output_path.joinpath(path.name))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output-dir")
+    parser.add_argument("--project-id", default=None)
+    parser.add_argument("--job-name", default=None)
+    parser.add_argument("--private-token", default=None)
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.INFO)
+
+    sys.exit(main(args))
diff --git a/.ci/pages/download_docs.sh b/.ci/pages/download_docs.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+set -ev
+
+# Make sure required parameters have been set
+REQUIRED_VARS=(CI_PROJECT_ID CI_DOCS_TOKEN OUTPUT_DIR)
+for var in ${REQUIRED_VARS[*]} ; do
+    if [[ -z "${!var}" ]] ; then
+        echo "Environment variable '${var}' has not been set!"
+        exit -1
+    fi
+done
+
+# Parameters
+TEMP_DIR=temp
+
+# Create a folder for temporary work
+mkdir -p ${TEMP_DIR}
+pushd ${TEMP_DIR}
+
+# Create a list of tags
+git tag -l --sort="-v:refname" | tee tags.txt
+
+# Remove tags that we wish to ignore
+if [[ "x${TAGS_TO_IGNORE}" != "x" ]] ; then
+    rg -v "${TAGS_TO_IGNORE}" tags.txt > tags_filtered.txt ;
+    mv tags_filtered.txt tags.txt ;
+fi
+
+# Download docs artifacts and rename the public folder to ${OUTPUT_DIR}/${tag}
+while read tag ; do
+    echo "Downloading artifacts for ${tag}..."
+    curl --header "PRIVATE-TOKEN: $CI_DOCS_TOKEN" -L -s -o artifact.zip \
+        https://gitlab.com/api/v4/projects/${CI_PROJECT_ID}/jobs/artifacts/${tag}/download?job=docs
+    file artifact.zip
+    unzip -o -q artifact.zip || true
+    mv -f public "${OUTPUT_DIR}/${tag}" || true
+done <tags.txt
+
+# Clean up temporary files
+popd
+rm -rf ${TEMP_DIR}
diff --git a/.ci/pages/static/folder-solid.svg b/.ci/pages/static/folder-solid.svg
diff --git a/.ci/pages/static/style.css b/.ci/pages/static/style.css
@@ -0,0 +1,9 @@
+body {
+  background-color: powderblue;
+}
+h1 {
+  color: blue;
+}
+p {
+  color: red;
+}
diff --git a/.ci/pages/templates/index.html b/.ci/pages/templates/index.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta http-equiv="Refresh" content="0; url={{ URL }}/{{ items[0].tag_name }}/" />
+    <meta charset="utf-8">
+    <link rel="stylesheet" href="styles.css">
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.6.3/css/all.css" integrity="sha384-UHRtZLI+pbxtHCWp1t77Bi1L4ZtiqrqD80Kn4Z8NTSRyMA2Fd33n5dQ8lWUE00s/" crossorigin="anonymous">
+  </head>
+  <body>
+    <table align="center">
+      {% for item in items %}
+      <tr>
+        <td class="c1"><i class="far fa-folder" style="color:grey"></i></td>
+        <td class="c2"><a href="./{{ item.tag_name }}">{{ item.tag_name }}</a></td>
+        <td class="c3">{{ item.finished_at.strftime("%Y-%m-%d %H:%M:%S") }}</td>
+        <td class="c4>{{ item.finished_at.strftime("%H:%M:%S") }}</td>
+        <td class="c5">
+          <span>{{ item.position }}</SPAN>
+        </td>
+        <td class="c6">
+          <span>{{ item.status }}</SPAN>
+        </td>
+      </tr>
+      {% endfor %}
+    </table>
+  </body>
+</html>