Skip to content

Commit a5a4075

Browse files
committed
TEST: Add a test that registers different portions of DRAM with
different backends This test fails. This test creates two backends that support DRAM. Then it does memory registration with one backend on one data buffer and memory registration on the other backend with a different data buffer. Then it tries to do prepXferDlist on a descriptor for both data buffers and that fails. This could be fixed by having prepXferDlist attempt to populate each segment with each backend and allowing for failure at the backend call without failing the entire operation. However, this just kicks the can down the road, because now the user can get failures in makeXferReq depending on which indices of the nixlXferDlistH they choose. I suggest fixing this with a different approach. First, we observe that almost all real users only have one backend type anyway. Second, if some user really has two separate pools of DRAM that are used by different backends with NIXL, they could just as easily solve that problem by creating two agents, each with one backend, rather than creating multiple backends. So I propose we make the agent require all backends to support all memory of the same type. In other words, do not allow registerMem to register on specific backends. Instead, registerMem should force the registration on all backends that support the memory type. Then the end user knows that for a given agent, all backends that can access DRAM can interoperate via all registered DRAM, for example. Even with the above change, NIXL can still support memory registered with different backends that don't overlap. The user would just need to use multiple agents. I think that's far more intuitive anyway. Signed-off-by: Ben Walker <[email protected]>
1 parent e35a8f4 commit a5a4075

File tree

1 file changed

+126
-0
lines changed

1 file changed

+126
-0
lines changed

test/gtest/unit/agent/agent.cpp

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,132 @@ namespace agent {
254254
EXPECT_NE(std::find(mem2.begin(), mem2.end(), FILE_SEG), mem2.end());
255255
}
256256

257+
TEST_F(singleAgentSessionFixture, PrepDlistWithMultipleBackendsSameMemoTypeTest) {
258+
// Create two mock backends that both support DRAM_SEG
259+
testing::NiceMock<mocks::GMockBackendEngine> gmock_engine1;
260+
nixl_mem_list_t mem_types1 = {DRAM_SEG};
261+
ON_CALL(gmock_engine1, getSupportedMems()).WillByDefault(testing::Return(mem_types1));
262+
263+
nixl_b_params_t params1;
264+
gmock_engine1.SetToParams(params1);
265+
nixlBackendH *backend1;
266+
EXPECT_EQ(agent_->createBackend(GetMockBackendName(), params1, backend1), NIXL_SUCCESS);
267+
EXPECT_NE(backend1, nullptr);
268+
269+
testing::NiceMock<mocks::GMockBackendEngine> gmock_engine2;
270+
nixl_mem_list_t mem_types2 = {DRAM_SEG};
271+
ON_CALL(gmock_engine2, getSupportedMems()).WillByDefault(testing::Return(mem_types2));
272+
273+
nixl_b_params_t params2;
274+
gmock_engine2.SetToParams(params2);
275+
nixlBackendH *backend2;
276+
EXPECT_EQ(agent_->createBackend(GetMockBackendName2(), params2, backend2), NIXL_SUCCESS);
277+
EXPECT_NE(backend2, nullptr);
278+
279+
// Allocate two separate buffers with explicit addresses
280+
constexpr size_t buf_size = 256;
281+
std::unique_ptr<char[]> buf1 = std::make_unique<char[]>(buf_size);
282+
std::unique_ptr<char[]> buf2 = std::make_unique<char[]>(buf_size);
283+
284+
// Explicitly set addresses for the two DRAM blobs
285+
uintptr_t addr1 = reinterpret_cast<uintptr_t>(buf1.get());
286+
uintptr_t addr2 = reinterpret_cast<uintptr_t>(buf2.get());
287+
288+
// Create descriptors with explicit addresses
289+
nixlBlobDesc dram_desc1(addr1, buf_size, 0);
290+
nixlBlobDesc dram_desc2(addr2, buf_size, 0);
291+
292+
// Verify they have different addresses
293+
EXPECT_NE(addr1, addr2);
294+
EXPECT_NE(dram_desc1.addr, dram_desc2.addr);
295+
296+
// Register first DRAM blob with backend1
297+
nixl_reg_dlist_t dram_list1(DRAM_SEG);
298+
dram_list1.addDesc(dram_desc1);
299+
nixl_opt_args_t reg_params1;
300+
reg_params1.backends.push_back(backend1);
301+
EXPECT_EQ(agent_->registerMem(dram_list1, &reg_params1), NIXL_SUCCESS);
302+
303+
// Register second DRAM blob with backend2
304+
nixl_reg_dlist_t dram_list2(DRAM_SEG);
305+
dram_list2.addDesc(dram_desc2);
306+
nixl_opt_args_t reg_params2;
307+
reg_params2.backends.push_back(backend2);
308+
EXPECT_EQ(agent_->registerMem(dram_list2, &reg_params2), NIXL_SUCCESS);
309+
310+
// Create a single transfer descriptor list containing both DRAM ranges
311+
nixl_xfer_dlist_t combined_dram_list(DRAM_SEG);
312+
combined_dram_list.addDesc(nixlBasicDesc(dram_desc1));
313+
combined_dram_list.addDesc(nixlBasicDesc(dram_desc2));
314+
315+
// Call prepXferDlist once with both ranges
316+
// This should result in a nixlDlistH with multiple backend keys internally
317+
nixlDlistH *combined_dlist_hndl = nullptr;
318+
EXPECT_EQ(agent_->prepXferDlist(NIXL_INIT_AGENT, combined_dram_list, combined_dlist_hndl),
319+
NIXL_SUCCESS);
320+
EXPECT_NE(combined_dlist_hndl, nullptr);
321+
322+
// Prepare a remote descriptor list for loopback transfer
323+
nixlDlistH *combined_remote_hndl = nullptr;
324+
EXPECT_EQ(agent_->prepXferDlist(local_agent_name, combined_dram_list, combined_remote_hndl),
325+
NIXL_SUCCESS);
326+
EXPECT_NE(combined_remote_hndl, nullptr);
327+
328+
// Verify we can use this descriptor list by creating transfer requests
329+
// Transfer from blob1 to blob2 should work since both are registered
330+
std::vector<int> src_indices = {0};
331+
std::vector<int> dst_indices = {1};
332+
333+
// This transfer should fail because blob1 (backend1) and blob2 (backend2)
334+
// are on different backends - NIXL cannot find a common backend
335+
nixlXferReqH *xfer_req = nullptr;
336+
EXPECT_NE(agent_->makeXferReq(NIXL_WRITE,
337+
combined_dlist_hndl,
338+
src_indices,
339+
combined_remote_hndl,
340+
dst_indices,
341+
xfer_req,
342+
nullptr),
343+
NIXL_SUCCESS)
344+
<< "Transfer from backend1-registered memory to backend2-registered memory should fail";
345+
346+
// Verify we can transfer within same backend (blob1 to blob1 on backend1)
347+
nixlXferReqH *xfer_req2 = nullptr;
348+
nixl_opt_args_t xfer_params1;
349+
EXPECT_EQ(agent_->makeXferReq(NIXL_WRITE,
350+
combined_dlist_hndl,
351+
src_indices,
352+
combined_remote_hndl,
353+
src_indices,
354+
xfer_req2,
355+
&xfer_params1),
356+
NIXL_SUCCESS)
357+
<< "Transfer within backend1 should succeed";
358+
EXPECT_NE(xfer_req2, nullptr);
359+
EXPECT_EQ(agent_->releaseXferReq(xfer_req2), NIXL_SUCCESS);
360+
361+
// Verify we can create transfer within same backend (blob2 to blob2 on backend2)
362+
nixlXferReqH *xfer_req3 = nullptr;
363+
nixl_opt_args_t xfer_params2;
364+
EXPECT_EQ(agent_->makeXferReq(NIXL_WRITE,
365+
combined_dlist_hndl,
366+
dst_indices,
367+
combined_remote_hndl,
368+
dst_indices,
369+
xfer_req3,
370+
&xfer_params2),
371+
NIXL_SUCCESS)
372+
<< "Transfer within backend2 should succeed";
373+
EXPECT_NE(xfer_req3, nullptr);
374+
EXPECT_EQ(agent_->releaseXferReq(xfer_req3), NIXL_SUCCESS);
375+
376+
// Cleanup
377+
EXPECT_EQ(agent_->releasedDlistH(combined_dlist_hndl), NIXL_SUCCESS);
378+
EXPECT_EQ(agent_->releasedDlistH(combined_remote_hndl), NIXL_SUCCESS);
379+
EXPECT_EQ(agent_->deregisterMem(dram_list1), NIXL_SUCCESS);
380+
EXPECT_EQ(agent_->deregisterMem(dram_list2), NIXL_SUCCESS);
381+
}
382+
257383
TEST_F(singleAgentSessionFixture, MultipleBackendsMemoryAndTransferTest) {
258384
// Create first mock backend that supports DRAM_SEG and VRAM_SEG
259385
testing::NiceMock<mocks::GMockBackendEngine> gmock_engine1;

0 commit comments

Comments
 (0)