Skip to content

Commit f7ab10a

Browse files
clumsyazzhipa
andauthored
feat: expose run_name via env in dist/spmd (#1035) (#1036)
Co-authored-by: Alexander Zhipa <[email protected]>
1 parent cb2407f commit f7ab10a

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

torchx/components/dist.py

+1
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ def ddp(
248248
)
249249

250250
env["TORCHX_TRACKING_EXPERIMENT_NAME"] = argname.experiment_name
251+
env["TORCHX_TRACKING_RUN_NAME"] = argname.run_name
251252

252253
env.setdefault("LOGLEVEL", os.getenv("LOGLEVEL", "WARNING"))
253254
if debug:

torchx/components/test/dist_test.py

+28
Original file line numberDiff line numberDiff line change
@@ -56,27 +56,31 @@ def test_validate_spmd(self) -> None:
5656
def test_spmd_call_by_module_or_script_no_name(self) -> None:
5757
appdef = spmd(script="foo/bar.py")
5858
self.assertEqual("bar", appdef.name)
59+
self.assertEqual("bar", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
5960
self.assertEqual(
6061
"default-experiment",
6162
appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"],
6263
)
6364

6465
appdef = spmd("-a", "b", script="foo/bar.py")
6566
self.assertEqual("bar", appdef.name)
67+
self.assertEqual("bar", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
6668
self.assertEqual(
6769
"default-experiment",
6870
appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"],
6971
)
7072

7173
appdef = spmd(m="foo.bar")
7274
self.assertEqual("bar", appdef.name)
75+
self.assertEqual("bar", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
7376
self.assertEqual(
7477
"default-experiment",
7578
appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"],
7679
)
7780

7881
appdef = spmd("-a", "b", m="foo.bar")
7982
self.assertEqual("bar", appdef.name)
83+
self.assertEqual("bar", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
8084
self.assertEqual(
8185
"default-experiment",
8286
appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"],
@@ -91,35 +95,43 @@ def test_spmd_call_by_module_or_script_no_name(self) -> None:
9195
def test_spmd_call_by_module_or_script_with_name(self) -> None:
9296
appdef = spmd(script="foo/bar.py", name="baz/trial_1")
9397
self.assertEqual("trial_1", appdef.name)
98+
self.assertEqual("trial_1", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
9499
self.assertEqual("baz", appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"])
95100

96101
appdef = spmd("-a", "b", script="foo/bar.py", name="baz/trial_1")
97102
self.assertEqual("trial_1", appdef.name)
98103
self.assertEqual("baz", appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"])
104+
self.assertEqual("trial_1", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
99105

100106
appdef = spmd(m="foo.bar", name="baz/trial_1")
101107
self.assertEqual("trial_1", appdef.name)
108+
self.assertEqual("trial_1", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
102109
self.assertEqual("baz", appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"])
103110

104111
appdef = spmd("-a", "b", m="foo.bar", name="baz/trial_1")
105112
self.assertEqual("trial_1", appdef.name)
113+
self.assertEqual("trial_1", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
106114
self.assertEqual("baz", appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"])
107115

108116
def test_spmd_call_by_module_or_script_with_experiment_name(self) -> None:
109117
appdef = spmd(script="foo/bar.py", name="baz/")
110118
self.assertEqual("bar", appdef.name)
119+
self.assertEqual("bar", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
111120
self.assertEqual("baz", appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"])
112121

113122
appdef = spmd("-a", "b", script="foo/bar.py", name="baz/")
114123
self.assertEqual("bar", appdef.name)
124+
self.assertEqual("bar", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
115125
self.assertEqual("baz", appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"])
116126

117127
appdef = spmd(m="foo.bar", name="baz/")
118128
self.assertEqual("bar", appdef.name)
129+
self.assertEqual("bar", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
119130
self.assertEqual("baz", appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"])
120131

121132
appdef = spmd("-a", "b", m="foo.bar", name="baz/")
122133
self.assertEqual("bar", appdef.name)
134+
self.assertEqual("bar", appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"])
123135
self.assertEqual("baz", appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"])
124136

125137
def test_spmd_call_by_module_or_script_with_run_name(self) -> None:
@@ -129,24 +141,40 @@ def test_spmd_call_by_module_or_script_with_run_name(self) -> None:
129141
"default-experiment",
130142
appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"],
131143
)
144+
self.assertEqual(
145+
"trial_1",
146+
appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"],
147+
)
132148

133149
appdef = spmd("-a", "b", script="foo/bar.py", name="/trial_1")
134150
self.assertEqual("trial_1", appdef.name)
135151
self.assertEqual(
136152
"default-experiment",
137153
appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"],
138154
)
155+
self.assertEqual(
156+
"trial_1",
157+
appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"],
158+
)
139159

140160
appdef = spmd(m="foo.bar", name="/trial_1")
141161
self.assertEqual("trial_1", appdef.name)
142162
self.assertEqual(
143163
"default-experiment",
144164
appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"],
145165
)
166+
self.assertEqual(
167+
"trial_1",
168+
appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"],
169+
)
146170

147171
appdef = spmd("-a", "b", m="foo.bar", name="/trial_1")
148172
self.assertEqual("trial_1", appdef.name)
149173
self.assertEqual(
150174
"default-experiment",
151175
appdef.roles[0].env["TORCHX_TRACKING_EXPERIMENT_NAME"],
152176
)
177+
self.assertEqual(
178+
"trial_1",
179+
appdef.roles[0].env["TORCHX_TRACKING_RUN_NAME"],
180+
)

0 commit comments

Comments
 (0)