Skip to content

Commit 64ff9d3

Browse files
Fixes for misaligned hostPtr enqueueReadWrite
- use getGpuAddress for BuiltinOpParams - fix read/writeImage Change-Id: I2e6e9a1d91871fa9f22851f31eb5a7b337b5aecc
1 parent 3c59bae commit 64ff9d3

11 files changed

+275
-25
lines changed

runtime/built_ins/built_ins.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -476,6 +476,7 @@ class BuiltInOp<HWFamily, EBuiltInOps::CopyBufferToImage3d> : public BuiltinDisp
476476

477477
// Determine size of host ptr surface for residency purposes
478478
size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0;
479+
hostPtrSize += operationParams.srcOffset.x;
479480

480481
// Set-up kernel
481482
auto bytesExponent = Math::log2(bytesPerPixel);
@@ -562,6 +563,7 @@ class BuiltInOp<HWFamily, EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDisp
562563

563564
// Determine size of host ptr surface for residency purposes
564565
size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0;
566+
hostPtrSize += operationParams.dstOffset.x;
565567

566568
// Set-up ISA
567569
auto bytesExponent = Math::log2(bytesPerPixel);

runtime/command_queue/enqueue_read_buffer.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
8888
BuiltInOwnershipWrapper builtInLock(builder, this->context);
8989

9090
void *dstPtr = ptr;
91-
void *alignedDstPtr = dstPtr;
92-
size_t dstPtrOffset = 0;
93-
94-
if (!isAligned<4>(dstPtr)) {
95-
alignedDstPtr = alignDown(dstPtr, 4);
96-
dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
97-
}
9891

9992
MemObjSurface bufferSurf(buffer);
10093
HostPtrSurface hostPtrSurf(dstPtr, size);
@@ -105,8 +98,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
10598
if (!status) {
10699
return CL_OUT_OF_RESOURCES;
107100
}
101+
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
108102
}
109103

104+
void *alignedDstPtr = alignDown(dstPtr, 4);
105+
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
106+
110107
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
111108
dc.dstPtr = alignedDstPtr;
112109
dc.dstOffset = {dstPtrOffset, 0, 0};

runtime/command_queue/enqueue_read_buffer_rect.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
8080
if (!status) {
8181
return CL_OUT_OF_RESOURCES;
8282
}
83-
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
83+
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
8484
}
8585

8686
void *alignedDstPtr = alignDown(dstPtr, 4);

runtime/command_queue/enqueue_read_image.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -86,12 +86,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
8686
if (!status) {
8787
return CL_OUT_OF_RESOURCES;
8888
}
89-
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
89+
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
9090
}
9191

92+
void *alignedDstPtr = alignDown(dstPtr, 4);
93+
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
94+
9295
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
9396
dc.srcMemObj = srcImage;
94-
dc.dstPtr = dstPtr;
97+
dc.dstPtr = alignedDstPtr;
98+
dc.dstOffset.x = dstPtrOffset;
9599
dc.srcOffset = origin;
96100
dc.size = region;
97101
dc.srcRowPitch = inputRowPitch;

runtime/command_queue/enqueue_write_buffer.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
8989
BuiltInOwnershipWrapper builtInLock(builder, this->context);
9090

9191
void *srcPtr = const_cast<void *>(ptr);
92-
void *alignedSrcPtr = srcPtr;
93-
size_t srcPtrOffset = 0;
94-
95-
if (!isAligned<4>(srcPtr)) {
96-
alignedSrcPtr = alignDown(srcPtr, 4);
97-
srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
98-
}
9992

10093
HostPtrSurface hostPtrSurf(srcPtr, size, true);
10194
MemObjSurface bufferSurf(buffer);
@@ -106,8 +99,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
10699
if (!status) {
107100
return CL_OUT_OF_RESOURCES;
108101
}
102+
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
109103
}
110104

105+
void *alignedSrcPtr = alignDown(srcPtr, 4);
106+
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
107+
111108
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
112109
dc.srcPtr = alignedSrcPtr;
113110
dc.srcOffset = {srcPtrOffset, 0, 0};

runtime/command_queue/enqueue_write_buffer_rect.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
7979
if (!status) {
8080
return CL_OUT_OF_RESOURCES;
8181
}
82-
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
82+
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
8383
}
8484

8585
void *alignedSrcPtr = alignDown(srcPtr, 4);

runtime/command_queue/enqueue_write_image.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -80,11 +80,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
8080
if (!status) {
8181
return CL_OUT_OF_RESOURCES;
8282
}
83-
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddressToPatch());
83+
srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
8484
}
8585

86+
void *alignedSrcPtr = alignDown(srcPtr, 4);
87+
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
88+
8689
BuiltinDispatchInfoBuilder::BuiltinOpParams dc;
87-
dc.srcPtr = srcPtr;
90+
dc.srcPtr = alignedSrcPtr;
91+
dc.srcOffset.x = srcPtrOffset;
8892
dc.dstMemObj = dstImage;
8993
dc.dstOffset = origin;
9094
dc.size = region;

unit_tests/aub_tests/command_queue/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ target_sources(igdrcl_aub_tests PRIVATE
1818
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_aub_tests.cpp
1919
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_aub_tests.cpp
2020
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_aub_tests.cpp
21+
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_image_aub_fixture.h
2122
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_buffer_aub_tests.cpp
2223
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_image_aub_tests.cpp
2324
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_with_timestamp_packet_aub_tests.cpp

unit_tests/aub_tests/command_queue/enqueue_read_image_aub_tests.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2018 Intel Corporation
2+
* Copyright (C) 2017-2019 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -12,6 +12,7 @@
1212
#include "runtime/mem_obj/image.h"
1313
#include "runtime/memory_manager/os_agnostic_memory_manager.h"
1414
#include "unit_tests/aub_tests/command_queue/command_enqueue_fixture.h"
15+
#include "unit_tests/aub_tests/command_queue/enqueue_read_write_image_aub_fixture.h"
1516
#include "unit_tests/mocks/mock_context.h"
1617
#include "test.h"
1718

@@ -222,3 +223,19 @@ INSTANTIATE_TEST_CASE_P(
222223
::testing::Values( // channels
223224
CL_R, CL_RG, CL_RGBA),
224225
::testing::ValuesIn(readImageParams)));
226+
227+
using AUBReadImageUnaligned = AUBImageUnaligned;
228+
229+
HWTEST_F(AUBReadImageUnaligned, misalignedHostPtr) {
230+
const std::vector<size_t> pixelSizes = {1, 2, 4};
231+
const std::vector<size_t> offsets = {0, 1, 2, 3};
232+
const std::vector<size_t> sizes = {3, 2, 1};
233+
234+
for (auto pixelSize : pixelSizes) {
235+
for (auto offset : offsets) {
236+
for (auto size : sizes) {
237+
testReadImageUnaligned<FamilyType>(offset, size, pixelSize);
238+
}
239+
}
240+
}
241+
}

0 commit comments

Comments
 (0)