Skip to content

Commit a54ddc4

Browse files
aivanoufacebook-github-bot
authored andcommitted
Move examples under torchx module (#224)
Summary: Pull Request resolved: #224 The diff moves `examples` under `torchx` namespace, also removes examples Dockerfile, and makes torchx image to use dev-requirements Reviewed By: kiukchung Differential Revision: D31464358 fbshipit-source-id: c56a92ce1fbf5b2881156e57152aa9d40af9eac8
1 parent f6d16cd commit a54ddc4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+45
-84
lines changed

.coveragerc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ source =
44

55
omit =
66
*test*
7+
torchx/examples/*

dev-requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ torchserve>=0.4.0
1414
captum>=0.3.1
1515
importlib-metadata
1616
ax-platform[mysql]>=0.2.2
17-
fsspec>=2021.09.0
17+
fsspec[s3]>=2021.09.0
18+
torch-model-archiver>=0.4.2

docs/source/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,8 +324,8 @@ def handle_item(fieldarg, content):
324324

325325
sphinx_gallery_conf = {
326326
"examples_dirs": [
327-
"../../examples/apps",
328-
"../../examples/pipelines",
327+
"../../torchx/examples/apps",
328+
"../../torchx/examples/pipelines",
329329
],
330330
"gallery_dirs": [
331331
"examples_apps",

examples/apps/Dockerfile

Lines changed: 0 additions & 12 deletions
This file was deleted.

scripts/component_integration_tests.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,10 @@ def get_local_docker_sched_info(image: str) -> SchedulerInfo:
5252
def main() -> None:
5353
print("Starting components integration tests")
5454
torchx_image = "dummy_image"
55-
examples_image = "dummy_image"
5655
dryrun: bool = False
5756
try:
5857
build = build_and_push_image()
5958
torchx_image = build.torchx_image
60-
examples_image = build.examples_image
6159
except MissingEnvError:
6260
dryrun = True
6361
print("Skip runnig tests, executed only docker buid step")
@@ -75,8 +73,8 @@ def main() -> None:
7573
test_suite.run_components(
7674
examples_app_defs_providers,
7775
scheduler_infos=[
78-
get_local_docker_sched_info(examples_image),
79-
get_k8s_sched_info(examples_image),
76+
get_local_docker_sched_info(torchx_image),
77+
get_k8s_sched_info(torchx_image),
8078
],
8179
dryrun=dryrun,
8280
)

scripts/example_app_defs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
"""
1111

1212

13-
import examples.apps.datapreproc.component as dp_component
14-
import examples.apps.dist_cifar.component as dist_cifar_component
15-
import examples.apps.lightning_classy_vision.component as cv_component
13+
import torchx.examples.apps.datapreproc.component as dp_component
14+
import torchx.examples.apps.dist_cifar.component as dist_cifar_component
15+
import torchx.examples.apps.lightning_classy_vision.component as cv_component
1616
from torchx.components.integration_tests.component_provider import ComponentProvider
1717
from torchx.specs import AppDef
1818

scripts/integ_test_utils.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
class BuildInfo:
1818
id: str
1919
torchx_image: str
20-
examples_image: str
2120

2221

2322
class MissingEnvError(AssertionError):
@@ -41,15 +40,6 @@ def run_in_bg(*args: str) -> "subprocess.Popen[str]":
4140
return subprocess.Popen(args)
4241

4342

44-
def build_examples_canary(id: str) -> str:
45-
examples_tag = "torchx_examples_canary"
46-
47-
print(f"building {examples_tag}")
48-
run("docker", "build", "-t", examples_tag, "examples/apps/")
49-
50-
return examples_tag
51-
52-
5343
def build_torchx_canary(id: str) -> str:
5444
torchx_tag = "torchx_canary"
5545

@@ -65,30 +55,18 @@ def torchx_container_tag(id: str) -> str:
6555
return f"{CONTAINER_REPO}:canary_{id}_torchx"
6656

6757

68-
def examples_container_tag(id: str) -> str:
69-
CONTAINER_REPO = getenv_asserts("CONTAINER_REPO")
70-
return f"{CONTAINER_REPO}:canary_{id}_examples"
71-
72-
7358
def build_images() -> BuildInfo:
7459
id = f"{getuser()}_{random_id()}"
75-
examples_image = build_examples_canary(id)
7660
torchx_image = build_torchx_canary(id)
7761
return BuildInfo(
7862
id=id,
7963
torchx_image=torchx_image,
80-
examples_image=examples_image,
8164
)
8265

8366

8467
def push_images(build: BuildInfo) -> None:
85-
examples_tag = examples_container_tag(build.id)
86-
run("docker", "tag", build.examples_image, examples_tag)
87-
build.examples_image = examples_tag
88-
8968
torchx_tag = torchx_container_tag(build.id)
9069
run("docker", "tag", build.torchx_image, torchx_tag)
9170
build.torchx_image = torchx_tag
9271

93-
run("docker", "push", examples_tag)
9472
run("docker", "push", torchx_tag)

scripts/kfpint.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
```
2525
export KFP_NAMESPACE=<kfp namespace>
2626
export INTEGRATION_TEST_STORAGE=<cloud storage path>
27-
export EXAMPLES_CONTAINER_REPO=<docker repo>
2827
export TORCHX_CONTAINER_REPO=<docker repo>
2928
```
3029
@@ -141,7 +140,6 @@ def save_advanced_pipeline_spec(path: str, build: BuildInfo) -> None:
141140

142141
id = build.id
143142
torchx_image = build.torchx_image
144-
examples_image = build.examples_image
145143

146144
STORAGE_PATH = os.getenv("INTEGRATION_TEST_STORAGE", "/tmp/storage")
147145
root = os.path.join(STORAGE_PATH, id)
@@ -153,8 +151,6 @@ def save_advanced_pipeline_spec(path: str, build: BuildInfo) -> None:
153151
"--output_path",
154152
output,
155153
"--image",
156-
examples_image,
157-
"--torchx_image",
158154
torchx_image,
159155
"--model_name",
160156
f"tiny_image_net_{id}",
@@ -165,7 +161,7 @@ def save_pipeline_spec(path: str, pipeline_file: str, *args: str) -> None:
165161
print(f"generating pipeline spec for {pipeline_file}")
166162

167163
with tempfile.TemporaryDirectory() as tmpdir:
168-
run(os.path.join("examples/pipelines/kfp", pipeline_file), *args)
164+
run(os.path.join("torchx/examples/pipelines/kfp", pipeline_file), *args)
169165
shutil.copy("pipeline.yaml", path)
170166

171167

@@ -202,7 +198,6 @@ def run_pipeline(build: BuildInfo, pipeline_file: str) -> object:
202198
except MaxRetryError:
203199
print(_connection_error_message())
204200
raise
205-
namespace = getenv_asserts("KFP_NAMESPACE")
206201
resp = client.create_run_from_pipeline_package(
207202
pipeline_file,
208203
arguments={},

scripts/kube_dist_trainer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
import argparse
1313
import os
1414

15-
from examples.apps.dist_cifar.component import trainer
16-
1715
# pyre-ignore-all-errors[21] # Cannot find module utils
1816
# pyre-ignore-all-errors[11]
1917
from integ_test_utils import (
@@ -23,6 +21,7 @@
2321
BuildInfo,
2422
)
2523
from pyre_extensions import none_throws
24+
from torchx.examples.apps.dist_cifar.component import trainer
2625
from torchx.runner import get_runner
2726
from torchx.specs import Resource, named_resources, RunConfig, AppState
2827

@@ -48,7 +47,7 @@ def build_and_push_image() -> BuildInfo:
4847
def run_job(dryrun: bool = False) -> None:
4948
register_gpu_resource()
5049
build = build_and_push_image()
51-
image = build.examples_image
50+
image = build.torchx_image
5251
runner = get_runner("kubeflow-dist-runner")
5352

5453
storage_path = os.getenv("INTEGRATION_TEST_STORAGE", "/tmp/storage")

torchx/cli/cmd_run.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ def _run(self, runner: Runner, args: argparse.Namespace) -> Optional[str]:
140140
return
141141
else:
142142
app_handle = cast(specs.AppHandle, result)
143+
# do not delete this line. It is used by slurm tests to retrieve the app id
144+
print(app_handle)
143145

144146
if args.scheduler.startswith("local"):
145147
self._wait_and_exit(runner, app_handle)

0 commit comments

Comments
 (0)