Skip to content

Commit cc626a3

Browse files
committed
Merge pull request opencv#9627 from dtmoodie:pyrlk_bugfix
2 parents bc348eb + 4566105 commit cc626a3

File tree

2 files changed

+81
-29
lines changed

2 files changed

+81
-29
lines changed

modules/cudaoptflow/src/cuda/pyrlk.cu

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,16 +1050,45 @@ namespace pyrlk
10501050
}
10511051
}
10521052

1053-
void loadConstants(int2 winSize, int iters, cudaStream_t stream)
1053+
void loadWinSize(int* winSize, int* halfWinSize, cudaStream_t stream)
10541054
{
1055-
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_x, &winSize.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1056-
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_y, &winSize.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1055+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_x, winSize, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1056+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_y, winSize + 1, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
10571057

1058-
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
1059-
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_x, &halfWin.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1060-
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_y, &halfWin.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1058+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_x, halfWinSize, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1059+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_y, halfWinSize + 1, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1060+
}
1061+
1062+
void loadIters(int* iters, cudaStream_t stream)
1063+
{
1064+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_iters, iters, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1065+
}
10611066

1062-
cudaSafeCall( cudaMemcpyToSymbolAsync(c_iters, &iters, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1067+
void loadConstants(int2 winSize_, int iters_, cudaStream_t stream)
1068+
{
1069+
static int2 winSize = make_int2(0,0);
1070+
if(winSize.x != winSize_.x || winSize.y != winSize_.y)
1071+
{
1072+
winSize = winSize_;
1073+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_x, &winSize.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1074+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_y, &winSize.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1075+
}
1076+
1077+
static int2 halfWin = make_int2(0,0);
1078+
int2 half = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
1079+
if(halfWin.x != half.x || halfWin.y != half.y)
1080+
{
1081+
halfWin = half;
1082+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_x, &halfWin.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1083+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_y, &halfWin.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1084+
}
1085+
1086+
static int iters = 0;
1087+
if(iters != iters_)
1088+
{
1089+
iters = iters_;
1090+
cudaSafeCall( cudaMemcpyToSymbolAsync(c_iters, &iters, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
1091+
}
10631092
}
10641093

10651094
template<typename T, int cn> struct pyrLK_caller

modules/cudaoptflow/src/pyrlk.cpp

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ Ptr<cv::cuda::DensePyrLKOpticalFlow> cv::cuda::DensePyrLKOpticalFlow::create(Siz
5555

5656
namespace pyrlk
5757
{
58-
void loadConstants(int2 winSize, int iters, cudaStream_t stream);
58+
void loadConstants(int* winSize, int iters, cudaStream_t stream);
59+
void loadWinSize(int* winSize, int* halfWinSize, cudaStream_t stream);
60+
void loadIters(int* iters, cudaStream_t stream);
5961
template<typename T, int cn> struct pyrLK_caller
6062
{
6163
static void sparse(PtrStepSz<typename device::TypeVec<T, cn>::vec_type> I, PtrStepSz<typename device::TypeVec<T, cn>::vec_type> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
@@ -88,7 +90,8 @@ namespace
8890
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, Stream& stream);
8991

9092
protected:
91-
Size winSize_;
93+
int winSize_[2];
94+
int halfWinSize_[2];
9295
int maxLevel_;
9396
int iters_;
9497
bool useInitialFlow_;
@@ -100,8 +103,14 @@ namespace
100103
};
101104

102105
PyrLKOpticalFlowBase::PyrLKOpticalFlowBase(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
103-
winSize_(winSize), maxLevel_(maxLevel), iters_(iters), useInitialFlow_(useInitialFlow)
106+
maxLevel_(maxLevel), iters_(iters), useInitialFlow_(useInitialFlow)
104107
{
108+
winSize_[0] = winSize.width;
109+
winSize_[1] = winSize.height;
110+
halfWinSize_[0] = (winSize.width - 1) / 2;
111+
halfWinSize_[1] = (winSize.height - 1) / 2;
112+
pyrlk::loadWinSize(winSize_, halfWinSize_, 0);
113+
pyrlk::loadIters(&iters_, 0);
105114
}
106115

107116
void calcPatchSize(Size winSize, dim3& block, dim3& patch)
@@ -148,7 +157,7 @@ namespace
148157
CV_Assert(prevPyr[0].size() == nextPyr[0].size());
149158
CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);
150159
CV_Assert(maxLevel_ >= 0);
151-
CV_Assert(winSize_.width > 2 && winSize_.height > 2);
160+
CV_Assert(winSize_[0] > 2 && winSize_[1] > 2);
152161
if (useInitialFlow_)
153162
CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == prevPts.type());
154163
else
@@ -171,9 +180,11 @@ namespace
171180
}
172181

173182
dim3 block, patch;
174-
calcPatchSize(winSize_, block, patch);
183+
calcPatchSize(Size(winSize_[0], winSize_[1]), block, patch);
175184
CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
176-
pyrlk::loadConstants(make_int2(winSize_.width, winSize_.height), iters_, StreamAccessor::getStream(stream));
185+
cudaStream_t stream_ = StreamAccessor::getStream(stream);
186+
pyrlk::loadWinSize(winSize_, halfWinSize_, stream_);
187+
pyrlk::loadIters(&iters_, stream_);
177188

178189
const int cn = prevPyr[0].channels();
179190
const int type = prevPyr[0].depth();
@@ -185,12 +196,12 @@ namespace
185196
// while ushort does work, it has significantly worse performance, and thus doesn't pass accuracy tests.
186197
static const func_t funcs[6][4] =
187198
{
188-
{ pyrlk::dispatcher<uchar, 1> , /*pyrlk::dispatcher<uchar, 2>*/ 0, pyrlk::dispatcher<uchar, 3> , pyrlk::dispatcher<uchar, 4> },
189-
{ /*pyrlk::dispatcher<char, 1>*/ 0, /*pyrlk::dispatcher<char, 2>*/ 0, /*pyrlk::dispatcher<char, 3>*/ 0, /*pyrlk::dispatcher<char, 4>*/ 0 },
190-
{ pyrlk::dispatcher<ushort, 1> , /*pyrlk::dispatcher<ushort, 2>*/0, pyrlk::dispatcher<ushort, 3> , pyrlk::dispatcher<ushort, 4> },
191-
{ /*pyrlk::dispatcher<short, 1>*/ 0, /*pyrlk::dispatcher<short, 2>*/ 0, /*pyrlk::dispatcher<short, 3>*/ 0, /*pyrlk::dispatcher<short, 4>*/0 },
192-
{ pyrlk::dispatcher<int, 1> , /*pyrlk::dispatcher<int, 2>*/ 0, pyrlk::dispatcher<int, 3> , pyrlk::dispatcher<int, 4> },
193-
{ pyrlk::dispatcher<float, 1> , /*pyrlk::dispatcher<float, 2>*/ 0, pyrlk::dispatcher<float, 3> , pyrlk::dispatcher<float, 4> }
199+
{ pyrlk::dispatcher<uchar, 1> , /*pyrlk::dispatcher<uchar, 2>*/ 0, pyrlk::dispatcher<uchar, 3> , pyrlk::dispatcher<uchar, 4> },
200+
{ /*pyrlk::dispatcher<char, 1>*/ 0, /*pyrlk::dispatcher<char, 2>*/ 0, /*pyrlk::dispatcher<char, 3>*/ 0 , /*pyrlk::dispatcher<char, 4>*/ 0 },
201+
{ pyrlk::dispatcher<ushort, 1> , /*pyrlk::dispatcher<ushort, 2>*/0, pyrlk::dispatcher<ushort, 3> , pyrlk::dispatcher<ushort, 4> },
202+
{ /*pyrlk::dispatcher<short, 1>*/ 0, /*pyrlk::dispatcher<short, 2>*/ 0, /*pyrlk::dispatcher<short, 3>*/ 0 , /*pyrlk::dispatcher<short, 4>*/0 },
203+
{ pyrlk::dispatcher<int, 1> , /*pyrlk::dispatcher<int, 2>*/ 0, pyrlk::dispatcher<int, 3> , pyrlk::dispatcher<int, 4> },
204+
{ pyrlk::dispatcher<float, 1> , /*pyrlk::dispatcher<float, 2>*/ 0, pyrlk::dispatcher<float, 3> , pyrlk::dispatcher<float, 4> }
194205
};
195206

196207
func_t func = funcs[type][cn-1];
@@ -201,7 +212,7 @@ namespace
201212
prevPts.ptr<float2>(), nextPts.ptr<float2>(),
202213
status.ptr(), level == 0 && err ? err->ptr<float>() : 0,
203214
prevPts.cols, level, block, patch,
204-
StreamAccessor::getStream(stream));
215+
stream_);
205216
}
206217
}
207218

@@ -229,7 +240,7 @@ namespace
229240
CV_Assert( prevImg.type() == CV_8UC1 );
230241
CV_Assert( prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type() );
231242
CV_Assert( maxLevel_ >= 0 );
232-
CV_Assert( winSize_.width > 2 && winSize_.height > 2 );
243+
CV_Assert( winSize_[0] > 2 && winSize_[1] > 2 );
233244

234245
// build the image pyramids.
235246

@@ -262,9 +273,11 @@ namespace
262273
vPyr[0].setTo(Scalar::all(0), stream);
263274
uPyr[1].setTo(Scalar::all(0), stream);
264275
vPyr[1].setTo(Scalar::all(0), stream);
265-
266-
int2 winSize2i = make_int2(winSize_.width, winSize_.height);
267-
pyrlk::loadConstants(winSize2i, iters_, StreamAccessor::getStream(stream));
276+
cudaStream_t stream_ = StreamAccessor::getStream(stream);
277+
pyrlk::loadWinSize(winSize_, halfWinSize_, stream_);
278+
pyrlk::loadIters(&iters_, stream_);
279+
int2 winSize2i = make_int2(winSize_[0], winSize_[1]);
280+
//pyrlk::loadConstants(winSize2i, iters_, StreamAccessor::getStream(stream));
268281

269282
int idx = 0;
270283

@@ -275,7 +288,7 @@ namespace
275288
pyrlk::pyrLK_caller<float,1>::dense(prevPyr_[level], nextPyr_[level],
276289
uPyr[idx], vPyr[idx], uPyr[idx2], vPyr[idx2],
277290
PtrStepSzf(), winSize2i,
278-
StreamAccessor::getStream(stream));
291+
stream_);
279292

280293
if (level > 0)
281294
idx = idx2;
@@ -293,8 +306,13 @@ namespace
293306
{
294307
}
295308

296-
virtual Size getWinSize() const { return winSize_; }
297-
virtual void setWinSize(Size winSize) { winSize_ = winSize; }
309+
virtual Size getWinSize() const { return cv::Size(winSize_[0], winSize_[1]); }
310+
virtual void setWinSize(Size winSize) {
311+
winSize_[0] = winSize.width;
312+
winSize_[1] = winSize.height;
313+
halfWinSize_[0] = (winSize.width - 1) / 2;
314+
halfWinSize_[1] = (winSize.height -1) / 2;
315+
}
298316

299317
virtual int getMaxLevel() const { return maxLevel_; }
300318
virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; }
@@ -339,8 +357,13 @@ namespace
339357
{
340358
}
341359

342-
virtual Size getWinSize() const { return winSize_; }
343-
virtual void setWinSize(Size winSize) { winSize_ = winSize; }
360+
virtual Size getWinSize() const { return cv::Size(winSize_[0], winSize_[1]); }
361+
virtual void setWinSize(Size winSize) {
362+
winSize_[0] = winSize.width;
363+
winSize_[1] = winSize.height;
364+
halfWinSize_[0] = (winSize.width - 1) / 2;
365+
halfWinSize_[1] = (winSize.height -1) / 2;
366+
}
344367

345368
virtual int getMaxLevel() const { return maxLevel_; }
346369
virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; }

0 commit comments

Comments
 (0)