@@ -2358,33 +2358,35 @@ GGML_CALL static void ggml_backend_cuda_get_tensor_async(ggml_backend_t backend,
2358
2358
}
2359
2359
2360
2360
GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async (ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, ggml_tensor * dst) {
2361
- GGML_ASSERT (ggml_backend_is_cuda (backend_src) || ggml_backend_is_cuda (backend_dst));
2362
-
2363
2361
ggml_backend_buffer_t buf_src = src->view_src ? src->view_src ->buffer : src->buffer ;
2364
2362
ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src ->buffer : dst->buffer ;
2365
2363
2366
- if (!ggml_backend_buffer_is_cuda (src-> buffer )) {
2364
+ if (!ggml_backend_is_cuda (backend_src) || ! ggml_backend_is_cuda (backend_dst )) {
2367
2365
return false ;
2368
2366
}
2369
2367
2370
- if (!ggml_backend_buffer_is_cuda (dst->buffer )) {
2368
+ if (!ggml_backend_buffer_is_cuda (src-> buffer ) || ! ggml_backend_buffer_is_cuda ( dst->buffer )) {
2371
2369
return false ;
2372
2370
}
2373
2371
2374
- // device -> device
2372
+ // device -> device copy
2375
2373
ggml_backend_cuda_context * cuda_ctx_src = (ggml_backend_cuda_context *)backend_src->context ;
2376
2374
ggml_backend_cuda_context * cuda_ctx_dst = (ggml_backend_cuda_context *)backend_dst->context ;
2377
2375
2378
- if (backend_src != backend_dst) {
2379
- ggml_backend_cuda_buffer_context * buf_ctx_src = (ggml_backend_cuda_buffer_context *)buf_src->context ;
2380
- ggml_backend_cuda_buffer_context * buf_ctx_dst = (ggml_backend_cuda_buffer_context *)buf_dst->context ;
2376
+ ggml_backend_cuda_buffer_context * buf_ctx_src = (ggml_backend_cuda_buffer_context *)buf_src->context ;
2377
+ ggml_backend_cuda_buffer_context * buf_ctx_dst = (ggml_backend_cuda_buffer_context *)buf_dst->context ;
2381
2378
2382
- GGML_ASSERT (cuda_ctx_src->device == buf_ctx_src->device );
2383
- GGML_ASSERT (cuda_ctx_dst->device == buf_ctx_dst->device );
2379
+ if (cuda_ctx_src->device != buf_ctx_src->device || cuda_ctx_dst->device != buf_ctx_dst->device ) {
2380
+ #ifndef NDEBUG
2381
+ GGML_CUDA_LOG_WARN (" %s: backend and buffer devices do not match\n " , __func__);
2382
+ #endif
2383
+ return false ;
2384
+ }
2384
2385
2386
+ if (backend_src != backend_dst) {
2385
2387
// copy on src stream
2386
2388
if (cuda_ctx_src->device == cuda_ctx_dst->device ) {
2387
- CUDA_CHECK (cudaMemcpyAsync (dst->data , src->data , ggml_nbytes (dst), cudaMemcpyDeviceToDevice, cuda_ctx_dst ->stream ()));
2389
+ CUDA_CHECK (cudaMemcpyAsync (dst->data , src->data , ggml_nbytes (dst), cudaMemcpyDeviceToDevice, cuda_ctx_src ->stream ()));
2388
2390
} else {
2389
2391
#ifdef GGML_CUDA_NO_PEER_COPY
2390
2392
return false ;
@@ -2393,7 +2395,7 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
2393
2395
#endif
2394
2396
}
2395
2397
2396
- // record event on src stream
2398
+ // record event on src stream after the copy
2397
2399
if (!cuda_ctx_src->copy_event ) {
2398
2400
ggml_cuda_set_device (cuda_ctx_src->device );
2399
2401
CUDA_CHECK (cudaEventCreateWithFlags (&cuda_ctx_src->copy_event , cudaEventDisableTiming));
@@ -2405,7 +2407,7 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
2405
2407
CUDA_CHECK (cudaStreamWaitEvent (cuda_ctx_dst->stream (), cuda_ctx_src->copy_event , 0 ));
2406
2408
} else {
2407
2409
// src and dst are on the same backend
2408
- CUDA_CHECK (cudaMemcpyAsync (dst->data , src->data , ggml_nbytes (dst), cudaMemcpyDeviceToDevice, cuda_ctx_dst ->stream ()));
2410
+ CUDA_CHECK (cudaMemcpyAsync (dst->data , src->data , ggml_nbytes (dst), cudaMemcpyDeviceToDevice, cuda_ctx_src ->stream ()));
2409
2411
}
2410
2412
return true ;
2411
2413
}
0 commit comments