Skip to content

Commit

Permalink
Switch OpenCL code to the new vector -> vector dependency model
Browse files Browse the repository at this point in the history
Previously our functions followed the API of OpenCL itself, that is
took a vector of dependencies and returned a single event for the
caller to wait on. The new scheme is to return a vector of events
to wait on. It makes no-op returns trivial to do.
  • Loading branch information
Tulon committed Sep 7, 2015
1 parent e0df1b8 commit aa2b983
Show file tree
Hide file tree
Showing 18 changed files with 275 additions and 318 deletions.
7 changes: 4 additions & 3 deletions acceleration/opencl/Copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace opencl
OpenCLGrid<float> copy(
cl::CommandQueue const& command_queue, cl::Program const& program,
OpenCLGrid<float> const& src_grid, int dst_padding,
std::vector<cl::Event>* wait_for, cl::Event* event)
std::vector<cl::Event> const* dependencies, std::vector<cl::Event>* completion_set)
{
int const width = src_grid.width();
int const height = src_grid.height();
Expand Down Expand Up @@ -56,15 +56,16 @@ OpenCLGrid<float> copy(
kernel.setArg(idx++, dst_grid.offset());
kernel.setArg(idx++, dst_grid.stride());

cl::Event evt;
command_queue.enqueueNDRangeKernel(
kernel,
cl::NullRange,
cl::NDRange(thisOrNextMultipleOf(width, h_wg_size), thisOrNextMultipleOf(height, v_wg_size)),
cl::NDRange(h_wg_size, v_wg_size),
wait_for,
event
dependencies, &evt
);

indicateCompletion(completion_set, evt);
return dst_grid;
}

Expand Down
10 changes: 5 additions & 5 deletions acceleration/opencl/Copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ namespace opencl
* @param src The source grid.
* @param dst_padding The padding the destination grid will have.
* The padding will be left uninitialized.
* @param wait_for If provided, the kernels enqueued by this function will be
* @param dependencies If provided, the kernels enqueued by this function will be
* made to depend on the events provided.
* @param event If provided, this event will be initialised to enable waiting
* for this operation to complete.
* @param completion_set If provided, used to return a set of events indicating
* the completion of all asynchronous operations initiated by this function.
* @return The copy.
*/
OpenCLGrid<float> copy(
cl::CommandQueue const& command_queue, cl::Program const& program,
OpenCLGrid<float> const& src, int dst_padding,
std::vector<cl::Event>* wait_for = nullptr,
cl::Event* event = nullptr);
std::vector<cl::Event> const* dependencies = nullptr,
std::vector<cl::Event>* completion_set = nullptr);

} // namespace opencl

Expand Down
63 changes: 23 additions & 40 deletions acceleration/opencl/OpenCLAcceleratedOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,33 +128,28 @@ OpenCLAcceleratedOperations::gaussBlurUnguarded(
return Grid<float>();
}

std::vector<cl::Event> deps;
std::vector<cl::Event> events;
cl::Event evt;

cl::Buffer const src_buffer(m_context, CL_MEM_READ_ONLY, src.totalBytes());
OpenCLGrid<float> src_grid(src_buffer, src);

m_commandQueue.enqueueWriteBuffer(
src_grid.buffer(), CL_FALSE, 0, src.totalBytes(), src.paddedData(), &deps, &evt
src_grid.buffer(), CL_FALSE, 0, src.totalBytes(), src.paddedData(), &events, &evt
);
deps.clear();
deps.push_back(evt);
indicateCompletion(&events, evt);

auto dst_grid = opencl::gaussBlur(
m_commandQueue, m_program, src_grid, h_sigma, v_sigma, &deps, &evt
m_commandQueue, m_program, src_grid, h_sigma, v_sigma, &events, &events
);
deps.clear();
deps.push_back(evt);

Grid<float> dst(dst_grid.toUninitializedHostGrid());

m_commandQueue.enqueueReadBuffer(
dst_grid.buffer(), CL_FALSE, 0, dst_grid.totalBytes(), dst.paddedData(), &deps, &evt
dst_grid.buffer(), CL_FALSE, 0, dst_grid.totalBytes(), dst.paddedData(), &events, &evt
);
deps.clear();
deps.push_back(evt);
indicateCompletion(&events, evt);

evt.wait();
cl::WaitForEvents(events);

return std::move(dst);
}
Expand Down Expand Up @@ -185,34 +180,29 @@ OpenCLAcceleratedOperations::anisotropicGaussBlurUnguarded(
return Grid<float>();
}

std::vector<cl::Event> deps;
std::vector<cl::Event> events;
cl::Event evt;

cl::Buffer const src_buffer(m_context, CL_MEM_READ_ONLY, src.totalBytes());
OpenCLGrid<float> src_grid(src_buffer, src);

m_commandQueue.enqueueWriteBuffer(
src_grid.buffer(), CL_FALSE, 0, src.totalBytes(), src.paddedData(), &deps, &evt
src_grid.buffer(), CL_FALSE, 0, src.totalBytes(), src.paddedData(), &events, &evt
);
deps.clear();
deps.push_back(evt);
indicateCompletion(&events, evt);

auto dst_grid = opencl::anisotropicGaussBlur(
m_commandQueue, m_program, src_grid,
dir_x, dir_y, dir_sigma, ortho_dir_sigma, &deps, &evt
dir_x, dir_y, dir_sigma, ortho_dir_sigma, &events, &events
);
deps.clear();
deps.push_back(evt);

Grid<float> dst(dst_grid.toUninitializedHostGrid());

m_commandQueue.enqueueReadBuffer(
dst_grid.buffer(), CL_FALSE, 0, dst_grid.totalBytes(), dst.paddedData(), &deps, &evt
dst_grid.buffer(), CL_FALSE, 0, dst_grid.totalBytes(), dst.paddedData(), &events, &evt
);
deps.clear();
deps.push_back(evt);
indicateCompletion(&events, evt);

evt.wait();
cl::WaitForEvents(events);

return std::move(dst);
}
Expand Down Expand Up @@ -243,43 +233,36 @@ OpenCLAcceleratedOperations::textFilterBankUnguarded(
return std::make_pair(Grid<float>(), Grid<uint8_t>());
}

std::vector<cl::Event> deps;
std::vector<cl::Event> events;
cl::Event evt;

cl::Buffer const src_buffer(m_context, CL_MEM_READ_ONLY, src.totalBytes());
OpenCLGrid<float> src_grid(src_buffer, src);

m_commandQueue.enqueueWriteBuffer(
src_grid.buffer(), CL_FALSE, 0, src.totalBytes(), src.paddedData(), &deps, &evt
src_grid.buffer(), CL_FALSE, 0, src.totalBytes(), src.paddedData(), &events, &evt
);
deps.clear();
deps.push_back(evt);
indicateCompletion(&events, evt);

std::pair<OpenCLGrid<float>, OpenCLGrid<uint8_t>> dst = opencl::textFilterBank(
m_commandQueue, m_program, src_grid,
directions, sigmas, shoulder_length, &deps, &evt
directions, sigmas, shoulder_length, &events, &events
);
deps.clear();
deps.push_back(evt);

Grid<float> accum(dst.first.toUninitializedHostGrid());

m_commandQueue.enqueueReadBuffer(
dst.first.buffer(), CL_FALSE, 0, accum.totalBytes(), accum.paddedData(), &deps, &evt
dst.first.buffer(), CL_FALSE, 0, accum.totalBytes(), accum.paddedData(), &events, &evt
);
deps.clear();
deps.push_back(evt);
indicateCompletion(&events, evt);

Grid<uint8_t> direction_map(dst.second.toUninitializedHostGrid());

m_commandQueue.enqueueReadBuffer(
dst.second.buffer(), CL_FALSE, 0, direction_map.totalBytes(),
direction_map.paddedData(), &deps, &evt
direction_map.paddedData(), &events, &evt
);
deps.clear();
deps.push_back(evt);
indicateCompletion(&events, evt);

evt.wait();
cl::WaitForEvents(events);

return std::make_pair(std::move(accum), std::move(direction_map));
}
Expand Down
21 changes: 13 additions & 8 deletions acceleration/opencl/OpenCLAffineTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ QImage affineTransform(
auto const adapted = adaptImage(src, outside_pixels);
QSizeF const unit_size(calcSrcUnitSize(inv_xform, min_mapping_area));

std::vector<cl::Event> events;
cl::Event evt;

// Create source and destination images on the device.
cl::Image2D src_image(
context, CL_MEM_READ_ONLY,
Expand All @@ -154,9 +157,10 @@ QImage affineTransform(
region[1] = src.height();
region[2] = 1;
command_queue.enqueueWriteImage(
src_image, CL_TRUE, origin, region, adapted.image.bytesPerLine(), 0,
(void*)adapted.image.bits()
src_image, CL_FALSE, origin, region, adapted.image.bytesPerLine(), 0,
(void*)adapted.image.bits(), &events, &evt
);
indicateCompletion(&events, evt);

cl::Kernel kernel(program, "affine_transform");
int idx = 0;
Expand All @@ -174,19 +178,17 @@ QImage affineTransform(
(float)qAlpha(outside_pixels.rgba()) / 255.f
});

cl::Event evt;

command_queue.enqueueNDRangeKernel(
kernel,
cl::NullRange,
cl::NDRange(
thisOrNextMultipleOf(dst_rect.width(), h_wg_size),
thisOrNextMultipleOf(dst_rect.height(), v_wg_size)
),
cl::NDRange(h_wg_size, v_wg_size), nullptr, &evt
cl::NDRange(h_wg_size, v_wg_size),
&events, &evt
);

evt.wait();
indicateCompletion(&events, evt);

QImage dst(dst_rect.size(), adapted.image.format());
if (dst.format() == QImage::Format_Indexed8) {
Expand All @@ -197,8 +199,11 @@ QImage affineTransform(
region[1] = dst.height();
region[2] = 1;
command_queue.enqueueReadImage(
dst_image, CL_TRUE, origin, region, dst.bytesPerLine(), 0, dst.bits()
dst_image, CL_FALSE, origin, region, dst.bytesPerLine(), 0, dst.bits(), &events, &evt
);
indicateCompletion(&events, evt);

cl::WaitForEvents(events);

return dst;
}
Expand Down
25 changes: 15 additions & 10 deletions acceleration/opencl/OpenCLDewarp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,20 @@ QImage dewarp(
adapted.clFormat, dst_size.width(), dst_size.height()
);

std::vector<cl::Event> events;
cl::Event evt;

// Write the source image to device memory.
cl::size_t<3> const origin;
cl::size_t<3> region;
region[0] = src.width();
region[1] = src.height();
region[2] = 1;
command_queue.enqueueWriteImage(
src_image, CL_TRUE, origin, region, adapted.image.bytesPerLine(), 0,
(void*)adapted.image.bits()
src_image, CL_FALSE, origin, region, adapted.image.bytesPerLine(), 0,
(void*)adapted.image.bits(), &events, &evt
);
indicateCompletion(&events, evt);

// Create host and device buffers for storing Generatrix structures.
// Because of constant memory size limitations, we may need several passes.
Expand Down Expand Up @@ -181,9 +185,10 @@ QImage dewarp(

// Copy generatrix_host_buffer to generatrix_device_buffer.
command_queue.enqueueWriteBuffer(
range_device_buffer, CL_TRUE, 0, (range_end + 1 - range_begin)*sizeof(Generatrix),
range_host_buffer.data()
range_device_buffer, CL_FALSE, 0, (range_end + 1 - range_begin)*sizeof(Generatrix),
range_host_buffer.data(), &events, &evt
);
indicateCompletion(&events, evt);

cl::Kernel kernel(program, "dewarp");
int idx = 0;
Expand All @@ -201,19 +206,16 @@ QImage dewarp(
(float)min_mapping_area.width(), (float)min_mapping_area.height()
});

cl::Event evt;

command_queue.enqueueNDRangeKernel(
kernel,
cl::NDRange(range_begin, 0),
cl::NDRange(
thisOrNextMultipleOf(range_end - range_begin, h_wg_size),
thisOrNextMultipleOf(dst_size.height(), v_wg_size)
),
cl::NDRange(h_wg_size, v_wg_size), nullptr, &evt
cl::NDRange(h_wg_size, v_wg_size), &events, &evt
);

evt.wait();
indicateCompletion(&events, evt);
}

QImage dst(dst_size, adapted.image.format());
Expand All @@ -225,8 +227,11 @@ QImage dewarp(
region[1] = dst.height();
region[2] = 1;
command_queue.enqueueReadImage(
dst_image, CL_TRUE, origin, region, dst.bytesPerLine(), 0, dst.bits()
dst_image, CL_FALSE, origin, region, dst.bytesPerLine(), 0, dst.bits(), &events, &evt
);
indicateCompletion(&events, evt);

cl::WaitForEvents(events);

return dst;
}
Expand Down
Loading

0 comments on commit aa2b983

Please sign in to comment.