Skip to content

Commit 9ca90fa

Browse files
DUPRAT, JULIENEmmanuelBRELLE
authored andcommitted
[UBCL] No bxi endpoints if we don't need them
Signed-off-by: Brelle Emmanuel <[email protected]>
1 parent 7c5a405 commit 9ca90fa

File tree

1 file changed

+28
-27
lines changed

1 file changed

+28
-27
lines changed

ompi/mca/pml/ubcl/pml_ubcl_endpoint.c

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ static int mca_pml_ubcl_export_local_endpoint_handle(const int type)
111111

112112
err = ubcl_export_local_endpoint_handle(type, endpoint_h, &remote_rank_u64);
113113
if (UBCL_SUCCESS != err) {
114-
return OMPI_ERROR;
114+
return ubcl_error_to_ompi(err);
115115
}
116116

117117
mca_pml_ubcl_endpoint_modex_put(type, (void *) endpoint_h, size);
@@ -120,10 +120,10 @@ static int mca_pml_ubcl_export_local_endpoint_handle(const int type)
120120
* The actual recv rank will be allocated during add_procs calls */
121121
err = ubcl_close_local_endpoint_channel(type, remote_rank_u64);
122122
if (UBCL_SUCCESS != err) {
123-
mca_pml_ubcl_warn(OMPI_ERROR,
123+
mca_pml_ubcl_warn(ubcl_error_to_ompi(err),
124124
"PML/UBCL failed to clean local endpoint (very unlikely error)."
125125
" For safety reason PML will be disabled.");
126-
return OMPI_ERROR;
126+
return ubcl_error_to_ompi(err);
127127
}
128128

129129
return OMPI_SUCCESS;
@@ -133,35 +133,31 @@ int mca_pml_ubcl_create_local_endpoint(void)
133133
{
134134
int type;
135135
ubcl_error_t err;
136-
int ompi_error;
137136

138137
type = UBCL_ENDPOINT_TYPE_SELF;
139138
err = ubcl_create_local_endpoint(type);
140139
if (UBCL_SUCCESS != err) {
141-
mca_pml_ubcl_error(OMPI_ERROR, "Failed ubcl_create_local_endpoint %d (%d)", type, err);
140+
mca_pml_ubcl_warn(ubcl_error_to_ompi(err), "Failed ubcl_create_local_endpoint %d (%d)", type, err);
142141
}
143142

144-
/* UBCL_ENDPOINT_SHM */
145143
if (!mca_pml_ubcl_component.force_intranode_bxi) {
146144
type = UBCL_ENDPOINT_TYPE_SHMEM;
147145
err = ubcl_create_local_endpoint(type);
148-
if (UBCL_SUCCESS != err) {
149-
mca_pml_ubcl_error(OMPI_ERROR, "Failed ubcl_create_local_endpoint %d (%d)", type, err);
146+
if (UBCL_SUCCESS == err) {
147+
err = mca_pml_ubcl_export_local_endpoint_handle(type);
150148
}
151-
ompi_error = mca_pml_ubcl_export_local_endpoint_handle(type);
152-
if (OMPI_SUCCESS != ompi_error) {
153-
return ompi_error;
149+
if (UBCL_SUCCESS != err) {
150+
mca_pml_ubcl_warn(ubcl_error_to_ompi(err), "Failed ubcl_create_local_endpoint %d (%d)", type, err);
154151
}
155152
}
156153

157154
type = UBCL_ENDPOINT_TYPE_BXI;
158155
err = ubcl_create_local_endpoint(type);
159-
if (UBCL_SUCCESS != err) {
160-
mca_pml_ubcl_error(OMPI_ERROR, "Failed ubcl_create_local_endpoint %d (%d)", type, err);
156+
if (UBCL_SUCCESS == err) {
157+
err = mca_pml_ubcl_export_local_endpoint_handle(type);
161158
}
162-
ompi_error = mca_pml_ubcl_export_local_endpoint_handle(type);
163-
if (OMPI_SUCCESS != ompi_error) {
164-
return ompi_error;
159+
if (UBCL_SUCCESS != err) {
160+
mca_pml_ubcl_warn(ubcl_error_to_ompi(err), "Failed ubcl_create_local_endpoint %d (%d)", type, err);
165161
}
166162

167163
return OMPI_SUCCESS;
@@ -170,20 +166,23 @@ int mca_pml_ubcl_create_local_endpoint(void)
170166
int mca_pml_ubcl_free_local_endpoints()
171167
{
172168
int ret;
173-
/* Finalize BXI */
174169
ret = ubcl_free_local_endpoint(UBCL_ENDPOINT_TYPE_BXI);
175-
if (UBCL_SUCCESS != ret) {
176-
return OMPI_ERROR;
170+
if (UBCL_SUCCESS != ret && UBCL_ERR_NOT_AVAILABLE != ret) {
171+
/* If the transport was unavailable we silence the error,
172+
* we're closing it anyway */
173+
return ubcl_error_to_ompi(ret);
177174
}
175+
178176
if (!mca_pml_ubcl_component.force_intranode_bxi) {
179177
ret = ubcl_free_local_endpoint(UBCL_ENDPOINT_TYPE_SHMEM);
180-
if (UBCL_SUCCESS != ret) {
181-
return OMPI_ERROR;
178+
if (UBCL_SUCCESS != ret && UBCL_ERR_NOT_AVAILABLE != ret) {
179+
return ubcl_error_to_ompi(ret);
182180
}
183181
}
182+
184183
ret = ubcl_free_local_endpoint(UBCL_ENDPOINT_TYPE_SELF);
185-
if (UBCL_SUCCESS != ret) {
186-
return OMPI_ERROR;
184+
if (UBCL_SUCCESS != ret && UBCL_ERR_NOT_AVAILABLE != ret) {
185+
return ubcl_error_to_ompi(ret);
187186
}
188187

189188
return OMPI_SUCCESS;
@@ -331,14 +330,16 @@ static int mca_pml_ubcl_create_endpoints(ompi_proc_t *proc)
331330

332331
err = mca_pml_ubcl_create_recv_endpoint(new_endpoint->rank, new_endpoint->type);
333332
if (OMPI_SUCCESS != err) {
334-
mca_pml_ubcl_error(err, "Failed to create recv endpoint for rank %zu\n",
335-
new_endpoint->rank);
333+
mca_pml_ubcl_warn(err, "Failed to create recv endpoint for rank %zu\n",
334+
new_endpoint->rank);
335+
return err;
336336
}
337337

338338
err = mca_pml_ubcl_create_send_endpoint(proc, new_endpoint->rank, new_endpoint->type);
339339
if (OMPI_SUCCESS != err) {
340-
mca_pml_ubcl_error(err, "Failed to create send endpoint for rank %zu\n",
341-
new_endpoint->rank);
340+
mca_pml_ubcl_warn(err, "Failed to create send endpoint for rank %zu\n",
341+
new_endpoint->rank);
342+
return err;
342343
}
343344

344345
end:

0 commit comments

Comments
 (0)