Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: fast initializer for conda environments #2226

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 8 additions & 36 deletions metaflow/plugins/pypi/fast_bootstrap.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,19 @@
import concurrent.futures
import gzip
import io
import json
import os
import shutil
import subprocess
import sys
import tarfile
import time
from urllib.error import URLError
from urllib.request import urlopen
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, CONDA_FAST_INIT_BIN_URL
from metaflow.plugins import DATASTORES
from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL
from metaflow.util import which
from urllib.request import Request
import warnings

from . import MAGIC_FILE, _datastore_packageroot

URL = os.environ.get("FAST_INIT_URL")

# Bootstraps a valid conda virtual environment composed of conda and pypi packages


Expand All @@ -37,29 +30,6 @@ def wrapper(*args, **kwargs):

if __name__ == "__main__":
# TODO: Detect architecture on the fly when dealing with arm architectures.
# ARCH=$(uname -m)
# OS=$(uname)

# if [[ "$OS" == "Linux" ]]; then
# PLATFORM="linux"
# if [[ "$ARCH" == "aarch64" ]]; then
# ARCH="aarch64";
# elif [[ $ARCH == "ppc64le" ]]; then
# ARCH="ppc64le";
# else
# ARCH="64";
# fi
# fi

# if [[ "$OS" == "Darwin" ]]; then
# PLATFORM="osx";
# if [[ "$ARCH" == "arm64" ]]; then
# ARCH="arm64";
# else
# ARCH="64"
# fi
# fi

def run_cmd(cmd, stdin_str):
result = subprocess.run(
cmd,
Expand Down Expand Up @@ -88,6 +58,9 @@ def install_fast_initializer(architecture):
return fast_initializer_path

# TODO: take architecture into account
url = CONDA_FAST_INIT_BIN_URL
if url is None:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this change, does the TODO above still make sense? Or are we going to handle the architecture at a different level, by setting CONDA_FAST_INIT_BIN_URL to an architecture-specific url?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point. The client needs to have knowledge of the target architecture either way as it is the one doing the solving for packages. The client also provides the architecture input for the bootstrap script, so we might as well keep the URL defining logic completely client-side.

raise Exception("URL for Binary is unset.")

# Prepare directory once
os.makedirs(os.path.dirname(fast_initializer_path), exist_ok=True)
Expand Down Expand Up @@ -116,10 +89,7 @@ def _download_and_extract(url):
)
time.sleep(2**attempt)

if URL is None:
raise Exception("URL for Binary is unset.")

_download_and_extract(URL)
_download_and_extract(url)

# Set executable permission
os.chmod(fast_initializer_path, 0o755)
Expand Down Expand Up @@ -151,7 +121,9 @@ def setup_environment(architecture, storage, env, prefix, pkgs_dir):
run_cmd(cmd, all_package_urls)

if len(sys.argv) != 5:
print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
print(
"Usage: fast_bootstrap.py <flow_name> <id> <datastore_type> <architecture>"
)
sys.exit(1)

try:
Expand Down
Loading