Skip to content

Commit acd9a9e

Browse files
SDA USRsdausr
authored and
GitHub Enterprise
committed
Squashed 'vision' changes from 63ab88e..b1c2f1c (#972)
b1c2f1c Removed compare, compareS tests 7b3be51 Update releasenotesxfopencv.rst 386de98 Merge pull request #644 from mounikk/next 5ce5940 created compare and compareS test cases 8a020a6 Merge pull request #643 from yuanqian/add_stacksize_1 2e30063 try to fix Software emulation of compute unit(s) exited unexpectedly 805a939 Moved aie-ml tests to aie_dev2 branch aa01097 Merge pull request #573 from turrahma/rgba2grey 671266d Merge pull request #575 from turrahma/pixelwise 3597e08 Merge pull request #641 from mounikk/next 089e8fc udpated doc 7c68f11 udpated doc f3115cc clang format applied 632c5c6 updated ltm constructor 9b24c67 udpated doc file 6507025 Added pixelwise select with background pl case 70f441c Added pixelwise select no background pl case 7e9da9b Updated test names in GMIO cases 230b188 Clang formatting applied 3fec785 Updated description.json 31fbe6c Added pixelwise select gmio test with background 24b7f5c Added pixelwise select gmio test and kernel 75c2c1e clanfg formatted 66b1f96 Clang formatted. 70964e1 Removed print statements 6247c08 added rgba2grey for aie-ml Co-authored-by: sdausr <[email protected]>
1 parent fbdea14 commit acd9a9e

File tree

6 files changed

+488
-138
lines changed

6 files changed

+488
-138
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/*
2+
* Copyright 2021 Xilinx, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#ifndef __XF_PIXELWISE_SELECT_
18+
#define __XF_PIXELWISE_SELECT_
19+
20+
#include <adf.h>
21+
#include <algorithm>
22+
#include <aie_api/utils.hpp>
23+
#include <aie_api/aie.hpp>
24+
25+
#include <common/xf_aie_hw_utils.hpp>
26+
27+
namespace xf {
28+
namespace cv {
29+
namespace aie {
30+
31+
class PixelwiseSelect {
32+
public:
33+
void runImpl(adf::input_buffer<uint8_t>& frame,
34+
adf::input_buffer<uint8_t>& mask,
35+
adf::output_buffer<uint8_t>& output);
36+
void runImpl(adf::input_buffer<uint8_t>& in_frame,
37+
adf::input_buffer<uint8_t>& mask,
38+
adf::input_buffer<uint8_t>& bg_frame,
39+
adf::output_buffer<uint8_t>& output);
40+
void xf_pixel_wise_select(uint8_t* frame, uint8_t* mask, int16 height, int16 width, uint8_t* output);
41+
void xf_pixel_wise_select(
42+
uint8_t* in_frame, uint8_t* mask, uint8_t* bg_frame, int16 height, int16 width, uint8_t* output);
43+
};
44+
45+
__attribute__((noinline)) void PixelwiseSelect::xf_pixel_wise_select(
46+
uint8_t* frame, uint8_t* mask, int16 height, int16 width, uint8_t* output) {
47+
const int16 image_width = width;
48+
const int16 image_height = height;
49+
50+
uint8_t* restrict _frame = (uint8_t*)(frame);
51+
uint8_t* restrict _mask = (uint8_t*)(mask);
52+
uint8_t* restrict _output = (uint8_t*)(output);
53+
int16_t num_vectors = image_width >> 5;
54+
55+
::aie::vector<uint8_t, 32> vec_x;
56+
::aie::vector<uint8_t, 32> vec_x1;
57+
::aie::vector<uint8_t, 32> ones = ::aie::broadcast<uint8, 32>(1);
58+
::aie::vector<uint8_t, 32> t1;
59+
60+
::aie::accum<acc32, 32> acc_x;
61+
62+
for (int i = 0; i < image_height * num_vectors; i++) chess_prepare_for_pipelining chess_loop_range(1, ) {
63+
vec_x = ::aie::load_v<32>(_frame);
64+
vec_x1 = ::aie::load_v<32>(_mask);
65+
acc_x = ::aie::mul(vec_x, vec_x1);
66+
::aie::store_v(_output, acc_x.template to_vector<uint8>(0));
67+
_frame += 32;
68+
_mask += 32;
69+
_output += 32;
70+
}
71+
}
72+
73+
__attribute__((noinline)) void PixelwiseSelect::xf_pixel_wise_select(
74+
uint8_t* in_frame, uint8_t* mask, uint8_t* bg_frame, int16 height, int16 width, uint8_t* output) {
75+
const int16 image_width = width;
76+
const int16 image_height = height;
77+
78+
uint8_t* restrict _in_frame = (uint8_t*)(in_frame);
79+
uint8_t* restrict _bg_frame = (uint8_t*)(bg_frame);
80+
uint8_t* restrict _mask = (uint8_t*)(mask);
81+
uint8_t* restrict _output = (uint8_t*)(output);
82+
int16_t num_vectors = image_width >> 5;
83+
84+
::aie::vector<uint8_t, 32> vec_in;
85+
::aie::vector<uint8_t, 32> vec_bg;
86+
::aie::vector<uint8_t, 32> vec_m;
87+
::aie::vector<uint8_t, 32> vec_out;
88+
89+
for (int i = 0; i < image_height * num_vectors; i++) chess_prepare_for_pipelining chess_loop_range(1, ) {
90+
vec_in = ::aie::load_v<32>(_in_frame);
91+
vec_bg = ::aie::load_v<32>(_bg_frame);
92+
vec_m = ::aie::load_v<32>(_mask);
93+
auto mask_val = ::aie::gt(vec_m, (uint8_t)0);
94+
vec_out = ::aie::select(vec_bg, vec_in, mask_val);
95+
::aie::store_v(_output, vec_out);
96+
_in_frame += 32;
97+
_bg_frame += 32;
98+
_mask += 32;
99+
_output += 32;
100+
}
101+
}
102+
103+
void PixelwiseSelect::runImpl(adf::input_buffer<uint8_t>& frame,
104+
adf::input_buffer<uint8_t>& mask,
105+
adf::output_buffer<uint8_t>& output) {
106+
uint8_t* f = (uint8_t*)::aie::begin(frame);
107+
uint8_t* m = (uint8_t*)::aie::begin(mask);
108+
uint8_t* o = (uint8_t*)::aie::begin(output);
109+
110+
int height = xfGetTileHeight(f);
111+
int width = xfGetTileWidth(f);
112+
113+
xfCopyMetaData(f, o);
114+
115+
uint8_t* f_ptr = (uint8_t*)xfGetImgDataPtr(f);
116+
uint8_t* m_ptr = (uint8_t*)xfGetImgDataPtr(m);
117+
uint8_t* o_ptr = (uint8_t*)xfGetImgDataPtr(o);
118+
119+
::aie::vector<int16, 16> vv = ::aie::broadcast<int16, 16>(width);
120+
::aie::print(vv, true, "width:");
121+
122+
vv = ::aie::broadcast<int16, 16>(height);
123+
::aie::print(vv, true, "height:");
124+
xf_pixel_wise_select(f_ptr, m_ptr, height, width, o_ptr);
125+
}
126+
127+
void PixelwiseSelect::runImpl(adf::input_buffer<uint8_t>& in_frame,
128+
adf::input_buffer<uint8_t>& mask,
129+
adf::input_buffer<uint8_t>& bg_frame,
130+
adf::output_buffer<uint8_t>& output) {
131+
uint8_t* f = (uint8_t*)::aie::begin(in_frame);
132+
uint8_t* m = (uint8_t*)::aie::begin(mask);
133+
uint8_t* b = (uint8_t*)::aie::begin(bg_frame);
134+
uint8_t* o = (uint8_t*)::aie::begin(output);
135+
136+
int height = xfGetTileHeight(f);
137+
int width = xfGetTileWidth(f);
138+
139+
xfCopyMetaData(f, o);
140+
141+
uint8_t* f_ptr = (uint8_t*)xfGetImgDataPtr(f);
142+
uint8_t* m_ptr = (uint8_t*)xfGetImgDataPtr(m);
143+
uint8_t* b_ptr = (uint8_t*)xfGetImgDataPtr(b);
144+
uint8_t* o_ptr = (uint8_t*)xfGetImgDataPtr(o);
145+
146+
xf_pixel_wise_select(f_ptr, m_ptr, b_ptr, height, width, o_ptr);
147+
}
148+
149+
} // namespace aie
150+
} // namespace cv
151+
} // namespace xf
152+
153+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/*
2+
* Copyright 2022 Xilinx, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <adf.h>
18+
#include <aie_api/utils.hpp>
19+
#include <aie_api/aie.hpp>
20+
#include <common/xf_aie_hw_utils.hpp>
21+
//#include <common/xf_aie_hw_utils.hpp>
22+
// #include <stdio.h>
23+
// #include <iostream>
24+
25+
#ifndef __XF_RGBA2GRAY__HPP__
26+
#define __XF_RGBA2GRAY__HPP__
27+
28+
namespace xf {
29+
namespace cv {
30+
namespace aie {
31+
32+
class Rgba2Gray {
33+
private:
34+
static constexpr int VECTORIZATION_FACTOR = 32;
35+
36+
public:
37+
void runImpl(adf::input_buffer<uint8_t>& in, adf::output_buffer<uint8_t>& out);
38+
void xf_rgba2gray(uint8_t* ptr1, uint8_t* out_ptr, uint16_t tile_width, uint16_t tile_height);
39+
};
40+
41+
__attribute__((noinline)) void Rgba2Gray::xf_rgba2gray(uint8_t* restrict ptr1,
42+
uint8_t* restrict ptr_out,
43+
uint16_t tile_width,
44+
uint16_t tile_height) {
45+
::aie::vector<uint8_t, 16> wt(77, 150, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
46+
::aie::vector<uint8_t, 32> wt_blue = ::aie::broadcast<uint8_t, 32>(29);
47+
::aie::vector<uint8_t, 32> rgba_channel0, rgba_channel1, rgba_channel3, rgba_channel2;
48+
::aie::vector<uint8_t, 32> r, g, b, gray;
49+
::aie::accum<acc32, VECTORIZATION_FACTOR> acc;
50+
uint16_t more_pixels = 0, loop_count;
51+
loop_count = (tile_height * tile_width) >> 5; // Divide by VECTORIZATION-FACTOR - ASSUMING VEC-FACT = 32*
52+
53+
for (int j = 0; j < loop_count; j += 1) {
54+
// READ 32-bit RGBA channels of 32 pixels. Total 1024 bits.
55+
rgba_channel0 = ::aie::load_v<32>(ptr1);
56+
ptr1 += 32;
57+
rgba_channel1 = ::aie::load_v<32>(ptr1);
58+
ptr1 += 32;
59+
rgba_channel2 = ::aie::load_v<32>(ptr1);
60+
ptr1 += 32;
61+
rgba_channel3 = ::aie::load_v<32>(ptr1);
62+
ptr1 += 32;
63+
64+
// Unzip the interleaved channels
65+
auto[rg_temp, ba_temp] = ::aie::interleave_unzip(::aie::concat(rgba_channel0, rgba_channel1),
66+
::aie::concat(rgba_channel2, rgba_channel3), 2);
67+
r = ::aie::filter_even(rg_temp, 1);
68+
g = ::aie::filter_odd(rg_temp, 1);
69+
b = ::aie::filter_even(ba_temp, 1);
70+
71+
// MAC operations and store
72+
acc = ::aie::mul(b, wt_blue);
73+
acc = ::aie::accumulate<VECTORIZATION_FACTOR>(acc, wt, 0, r, g);
74+
gray = acc.template to_vector<uint8_t>(8);
75+
::aie::store_v((uint8_t*)ptr_out, gray);
76+
ptr_out = ptr_out + VECTORIZATION_FACTOR;
77+
}
78+
79+
// Check if more pixels to be processed? // No. of more pixels to be processed
80+
more_pixels = (tile_height * tile_width) - (loop_count * VECTORIZATION_FACTOR);
81+
82+
// If more pixels to be processed, then move the pointers back so that we have 32 pixels to process
83+
if (more_pixels != 0) {
84+
// Find the pixel-shift requried to process 32 pixels at once
85+
more_pixels = VECTORIZATION_FACTOR - more_pixels;
86+
87+
// Each input pixel is 32 bit (4 uint8_t). So pointer moved back with (pixel-shift*4)
88+
ptr1 = ptr1 - (more_pixels << 2);
89+
90+
// Each output pixel is 8 bit (1 uint8_t). So pointer moved back with (pixel-shift*1)
91+
ptr_out = ptr_out - more_pixels;
92+
93+
// Repeat as above loop
94+
rgba_channel0 = ::aie::load_unaligned_v<32>(ptr1);
95+
ptr1 += 32;
96+
rgba_channel1 = ::aie::load_unaligned_v<32>(ptr1);
97+
ptr1 += 32;
98+
rgba_channel2 = ::aie::load_unaligned_v<32>(ptr1);
99+
ptr1 += 32;
100+
rgba_channel3 = ::aie::load_unaligned_v<32>(ptr1);
101+
auto[rg_temp, ba_temp] = ::aie::interleave_unzip(::aie::concat(rgba_channel0, rgba_channel1),
102+
::aie::concat(rgba_channel2, rgba_channel3), 2);
103+
r = ::aie::filter_even(rg_temp, 1);
104+
g = ::aie::filter_odd(rg_temp, 1);
105+
b = ::aie::filter_even(ba_temp, 1);
106+
107+
acc = ::aie::mul(b, wt_blue);
108+
acc = ::aie::accumulate<VECTORIZATION_FACTOR>(acc, wt, 0, r, g);
109+
gray = acc.template to_vector<uint8_t>(8);
110+
::aie::store_unaligned_v((uint8_t*)ptr_out, gray);
111+
}
112+
}
113+
114+
void Rgba2Gray::runImpl(adf::input_buffer<uint8_t>& in, adf::output_buffer<uint8_t>& out) {
115+
uint8_t* img_in = (uint8_t*)::aie::begin(in);
116+
uint8_t* img_out = (uint8_t*)::aie::begin(out);
117+
118+
int16_t tile_width = xfGetTileWidth(img_in);
119+
int16_t tile_height = xfGetTileHeight(img_in);
120+
121+
if (tile_width == 0 || tile_height == 0) return;
122+
123+
xfCopyMetaData(img_in, img_out);
124+
xfSetTileWidth(img_out, tile_width);
125+
126+
xfUnsignedSaturation(img_out);
127+
128+
uint8_t* in_ptr = (uint8_t*)xfGetImgDataPtr(img_in);
129+
uint8_t* out_ptr = (uint8_t*)xfGetImgDataPtr(img_out);
130+
131+
xf_rgba2gray(in_ptr, out_ptr, tile_width, tile_height);
132+
}
133+
} // aie
134+
} // cv
135+
} // xf
136+
#endif

vision/L1/include/imgproc/xf_ltm.hpp

+20
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,26 @@ class LTM {
218218

219219
LTM() { assert(!is_floating_point<OUT_TYPE>::value); }
220220

221+
LTM(xf::cv::Mat<IN_TYPE, ROWS, COLS, NPC, XFCVDEPTH_IN_1>& in,
222+
XF_CTUNAME(IN_TYPE, NPC) omin_r[MinMaxVArrSize][MinMaxHArrSize],
223+
XF_CTUNAME(IN_TYPE, NPC) omax_r[MinMaxVArrSize][MinMaxHArrSize],
224+
XF_CTUNAME(IN_TYPE, NPC) omin_w[MinMaxVArrSize][MinMaxHArrSize],
225+
XF_CTUNAME(IN_TYPE, NPC) omax_w[MinMaxVArrSize][MinMaxHArrSize],
226+
xf::cv::Mat<OUT_TYPE, ROWS, COLS, NPC, XFCVDEPTH_OUT_1>& out) {
227+
process(in, omin_r, omax_r, omin_w, omax_w, out);
228+
}
229+
230+
LTM(xf::cv::Mat<IN_TYPE, ROWS, COLS, NPC, XFCVDEPTH_IN_1>& in,
231+
int block_rows,
232+
int block_cols,
233+
XF_CTUNAME(IN_TYPE, NPC) omin_r[MinMaxVArrSize][MinMaxHArrSize],
234+
XF_CTUNAME(IN_TYPE, NPC) omax_r[MinMaxVArrSize][MinMaxHArrSize],
235+
XF_CTUNAME(IN_TYPE, NPC) omin_w[MinMaxVArrSize][MinMaxHArrSize],
236+
XF_CTUNAME(IN_TYPE, NPC) omax_w[MinMaxVArrSize][MinMaxHArrSize],
237+
xf::cv::Mat<OUT_TYPE, ROWS, COLS, NPC, XFCVDEPTH_OUT_1>& out) {
238+
process(in, block_rows, block_cols, omin_r, omax_r, omin_w, omax_w, out);
239+
}
240+
221241
// Limit implementation SFINAE principal [[
222242
template <int T = IN_TYPE, typename std::enable_if<!is_floating_point<T>::value>::type* = nullptr>
223243
static constexpr XF_CTUNAME(IN_TYPE, NPC) LOW() {

vision/L3/examples/isp_24bit_decompand/description.json

+1
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@
182182
],
183183
"testinfo": {
184184
"disable": false,
185+
"stacksize": 16384,
185186
"jobs": [
186187
{
187188
"index": 0,

0 commit comments

Comments
 (0)