Skip to content

Commit 223ffb0

Browse files
SDA USRsdausr
authored and
GitHub Enterprise
committed
Squashed 'dsp' changes from 4d0fd03..a66bcfc87 (#1133)
a66bcfc87 Merge pull request #1577 from mlechtan/main d0cda82bc FIR TDM: Add a clause to margin frame calculation f0fa3ba10 FIR TDM: AIE1 optimization for evenFrames architecture 28957b3af create main branch from next branch e4b818acb Merge pull request #1561 from berry/dev2next241108 bbeae76e7 Update getPhaseAlias.py 834ad428f copy dev docs, csv, and other changes to next e237f65be Merge pull request #1559 from mlechtan/next 9d0e9dd0c Revert "Adding benchmark tables max rows" eb2c7b367 Merge pull request #1557 from mlechtan/next 331f07979 Adding benchmark tables max rows a822430b3 Fixing release notes formatting 3f550d5e2 Merge pull request #1555 from mlechtan/next ac45d8c9c dev2next - release notes + docs review items 6b6443014 Merge pull request #1549 from mlechtan/next 1363d0341 Tidy up VMC DFT testcase 5c4402615 Removing docs/src/rst files 954a1a035 Merge branch 'next' of https://gitenterprise.xilinx.com/mlechtan/xf_dsp into next d6739a68e dev2next - copy docs, doxygen + meta license plates da007d345 dev2next - copy docs, doxygen + meta license plates f26b31154 Merge pull request #1537 from berry/next fa199858a Merge branch 'FaaSApps:next' into next 76a2eb377 Merge pull request #1534 from berry/dev2next_241101 c609edb5e Merge branch 'dev2next_241101' of https://gitenterprise.xilinx.com/berry/xf_dsp into dev2next_241101 e077b2844 copy over dds helper mk fix 957b9abf8 Removing Example 2d FFT acb6ccdbf descjson - standardization fix - "625" to 625 b9383a773 copy dev (54fd806) to next 47270f08b Merge pull request #1517 from berry/dev2next_241025 75e2366aa copy dev (5ea67f7) to next 06ee35c96 Merge pull request #1512 from wesleyl/next 118612ba2 Fix errors in DDS LUT stream, Mixer Lut stream and FIR TDM Stream 39d4780cc vmc testcases updated with ssr added in buffer blocks and DDS and Mixer blocks updated 5141ab3c5 Merge pull request #1509 from berry/dev2next_241024 708898e1d copy dev (713b962) to next 358afba29 Merge pull request #1485 from wesleyl/next 8a75273dc Merge pull request #1498 from mlechtan/next e4d5da6b5 Merge branch 'next' into next d8f1e13c2 Adding type_traits to device_defs cfe8998b6 Merge pull request #1486 from berry/dev2next_241017 912de5fa1 add ssr parameters to buffer blocks 90ce09110 fix vmc validator 664b50abf copy L2 examples from dev 142845e24 copy metadata from dev 18b1446ac fix validator wrt VMC 4fc251822 Merge pull request #1478 from berry/dev2next_241015 fef6be597 copy further meta data changes from dev 2c74e3cb8 metadata 5bd75668e L2/include 49007a787 L1/include 1408bf66b L1/src f92783f05 L2/examples 79ea46400 user_guides 154a02ac1 L2/tests 93237744c Merge pull request #1471 from FaaSApps/cr1216301 be272546e Merge pull request #1473 from mlechtan/next a820d1a1a FIR TDM fix for CR-1216299 64225a981 FIR TDM fix for CR-1216299 7896064bf Fix for CR-1216301 10c25af9a Merge pull request #1449 from berry/dev2next_241007 6c23bf8ce conv corr debug strip fix 856bbaee3 copy dev (bc08255) to next 254ab36ba Merge pull request #1431 from wesleyl/next b8ce1d4e7 DDS & Mixer LUT test cases updated f304f18d3 update vmc changes- firlength, capitalize dds-mixer lut, remove dual_ip & out_ports 05660681d Merge branch 'FaaSApps:next' into next cf69e25f5 Merge pull request #1433 from mlechtan/next acb8c95eb Adding missing Meta check for FIR TDM 12968d1f7 Add DDS LUT and Mixer LUT blocks and update vmc_dsp.py file 4c16b1de1 Merge pull request #1424 from berry/dev2next_240925 794f88ede part 3b 95a64e38e dev to next - part 3 to fix VMC fails 2b16d4f14 dev to next - part 2 1fd7dcea8 copy dev 2 next - part 1 8a20c03cf Merge pull request #1409 from berry/dev2next_240923 f7637abd7 Merge pull request #1363 from wesleyl/next 009c9f1f5 CR1210012 fix for fft_1024_tmp1 allocation 88b18a3ef copy dev to next 4be538fe5 Merge branch 'FaaSApps:next' into next 3df41977e Delete decomposer_multi_params.json ba6f48794 Merge branch 'FaaSApps:next' into next 38a4e388d Merge pull request #1405 from uvimalku/dds_cr_next 19f8d9340 DDS MIxer metadata bug fix 51b75e776 DDS LUT update 5c77f7841 push VMC changes 1eefba75b Merge pull request #1391 from berry/dev2next_240912 8ce03229d fix buggy debug strip ea7c53e47 ew d48c7b03d debug strip missing files. and copyright fix dee880c03 copy dev (fc696f3) to next b9b4493f1 Merge pull request #1387 from liyuanz/revert-1322-add_config 0db7e88ee Revert "Add template of config file for system case " 1c9d8ebf7 Merge pull request #1375 from berry/dev2next_240904 071dbbb89 copy dev (commit e43b507) to next 379d39543 Merge pull request #1359 from berry/dev2next240826 3bcc9cde0 FIR TDM VMC Meta fix 60fb497af FIR TDM VMC Meta fix 85afa01a2 copy from dev commit 77d16e6 094fde662 Merge pull request #1322 from liyuanz/add_config 82804e8d5 Merge pull request #1349 from wesleyl/next d4fd62409 function definition updated for tdm da8a9ba4a Merge pull request #1345 from berry/dev2next_240815 99b7cc544 fix formatting of vss files dcffa3639 dev 987f25e to next aa0203cf3 Update Jenkinsfile a7be41d2d update 77a03ac2d add config file 2e7d3f4b0 Merge pull request #1316 from berry/dev2next_240730 952a0ffd5 dev commit a4d561c to next branch 5f886f4d3 Merge pull request #1306 from berry/dev2next_24_07_23 795c1ccc2 copy dev (commit c75f418) to next cc6e31f44 Merge pull request #1291 from berry/dev2next_240701 3065c5c3f FIR TDM fix for evenFrames optimization switch. #1293 ca46bd59f dev commit 5b25784 to next 3c7fa604f Merge pull request #1262 from liyuanz/fix_os_next 0714ba636 copy contents of dev to next (dev commit 2b4b9c9) (#1288) 99fb748b8 Revert next branch back to 2024.1_stable_latest (#1279) d2228a4d1 change 2024.1_stable_latest to 2024.2_stable_latest 13b76bb4f update to next branch Co-authored-by: sdausr <[email protected]>
1 parent 80bc480 commit 223ffb0

File tree

3 files changed

+35
-30
lines changed

3 files changed

+35
-30
lines changed

dsp/Jenkinsfile

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
@Library('pipeline-library')_
22

3-
VitisLibPipeline (branch: 'next', libname: 'xf_dsp', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_aie_sim:vitis_aie_x86sim',
4-
upstream_dependencies: 'xf_utils_hw,next,../utils; xf_data_mover,next,../data_mover; dsplib_internal_scripts,main,../dsplib_internal_scripts',
5-
devtest: 'RunDeploy.sh', TOOLVERSION: '2024.2_stable_latest',
3+
VitisLibPipeline (branch: 'main', libname: 'xf_dsp', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_aie_sim:vitis_aie_x86sim',
4+
upstream_dependencies: 'xf_utils_hw,main,../utils; xf_data_mover,main,../data_mover; dsplib_internal_scripts,main,../dsplib_internal_scripts',
5+
devtest: 'RunDeploy.sh', TOOLVERSION: '2024.2_released',
66
77
post_launch: '../dsplib_internal_scripts/scripts/jenkins/post_launch_wrapper.sh |& tee -a reporting_log.txt')

dsp/L1/include/aie/fir_tdm.hpp

+2-9
Original file line numberDiff line numberDiff line change
@@ -200,10 +200,7 @@ class kernelFilterClass {
200200
#if __HAS_ACCUM_PERMUTES__ == 1
201201
// cint16/int16 combo can be overloaded with 2 column MUL/MACs.
202202
static constexpr unsigned int columnMultiple =
203-
(std::is_same<TT_DATA, cint16>::value && std::is_same<TT_COEFF, int16>::value) &&
204-
(TP_TDM_CHANNELS > m_kVOutSize) && (TP_TDM_CHANNELS % (2 * m_kVOutSize) == 0)
205-
? 2
206-
: 1;
203+
(std::is_same<TT_DATA, cint16>::value && std::is_same<TT_COEFF, int16>::value) ? 2 : 1;
207204
static constexpr unsigned int coeffToDataMultiple = 1;
208205
#else
209206
static constexpr unsigned int columnMultiple = 1;
@@ -256,11 +253,7 @@ class kernelFilterClass {
256253
// Operate on multiple frames in parallel, when possible.
257254
// Optimized to reduce data loads, handy when 512-bits of data and 256-bits of coeffs are needed on each clock
258255
// cycle.
259-
static constexpr unsigned int useEvenFrames =
260-
(TP_NUM_FRAMES % 2 == 0 && columnMultiple == 2 && TP_TDM_CHANNELS > m_kVOutSize &&
261-
TP_TDM_CHANNELS % kSamplesInVectData == 0)
262-
? 1
263-
: 0;
256+
static constexpr unsigned int useEvenFrames = (TP_NUM_FRAMES % 2 == 0 && columnMultiple == 2) ? 1 : 0;
264257
// TDM FIR Margin = (TP_FIR_LEN-1)*TP_TDM_CHANNELS
265258
// or set to 0, if handled with internal buffer.
266259
static constexpr unsigned int enableInternalMargin = __HAS_ACCUM_PERMUTES__ ? 1 : 0;

dsp/L1/src/aie/fir_tdm.cpp

+30-18
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,6 @@ INLINE_DECL void kernelFilterClass<TT_DATA,
547547
// Rewind by
548548
inRdItr -= (TP_FIR_RANGE_LEN)*TP_TDM_LOOP_SIZE - 1;
549549
}
550-
// inRdItr += m_kFirCoeffOffset * columnMultiple * TP_TDM_CHANNELS / kSamplesInVectData;
551550
}
552551
};
553552

@@ -615,22 +614,38 @@ kernelFilterClass<TT_DATA,
615614
using accVect_t = ::aie::accum<typename tTDMAccBaseType<TT_DATA, TT_COEFF>::type, kSamplesInVectAcc>;
616615

617616
dataVect_t dataVect;
618-
dataVect_t* __restrict inPointer;
617+
dataRead_t* __restrict inPointer;
619618
outDataVect_t outVect, outVect2;
620619
coeffVect_t* __restrict coeffVectPtr;
621620

622621
coeffVect_t coeffVect;
623622
accVect_t acc, acc2;
624623
input_circular_buffer<TT_DATA, extents<internalBufferSize>, margin<0> > inWindowCirc(&m_inputBuffer[0],
625624
internalBufferSize, 0);
626-
auto inWrItr = ::aie::begin_vector_random_circular<kSamplesInVectData>(m_inputBuffer, internalBufferSize);
627-
auto inRdItr = ::aie::begin_vector_random_circular<kSamplesInVectAcc>(inWindowCirc);
625+
auto inWrItr = ::aie::begin_vector_random_circular<kSamplesInVectAcc>(m_inputBuffer, internalBufferSize);
626+
auto inRdItr = ::aie::begin_vector_random_circular<kSamplesInVectAcc>(m_inputBuffer, internalBufferSize);
628627
dataVect_t* frameStart = (dataVect_t*)inInterface.inWindow;
629-
// #undef _DSPLIB_FIR_TDM_HPP_DEBUG_
630628

631-
inWrItr += (marginFrame)*TP_TDM_CHANNELS / kSamplesInVectData;
629+
inWrItr += (marginFrame)*TP_TDM_CHANNELS / kSamplesInVectAcc;
632630
int readIncr = ((marginFrame + 2 + m_kFirCoeffOffset)) * columnMultiple * TP_TDM_CHANNELS / kSamplesInVectData;
633631
inRdItr += readIncr;
632+
// precalculate margin frame prior to jumping into inner loop.
633+
// Alternatively, calculate margin frame within inner loop, to avoid a costly div.
634+
// Calculating frame margin inside inner loop benefits cases that operate on a fairly small number of frames.
635+
constexpr unsigned int precalculatedMarginFrame = (TP_NUM_FRAMES > internalBufferFrames) ? 1 : 0;
636+
if
637+
constexpr(m_kFirMargin == 0) {
638+
if
639+
constexpr(precalculatedMarginFrame == 1) {
640+
marginFrame = (((marginFrame + TP_NUM_FRAMES) >= internalBufferFrames)
641+
? ((marginFrame + TP_NUM_FRAMES) % internalBufferFrames)
642+
: (marginFrame + TP_NUM_FRAMES));
643+
}
644+
}
645+
else {
646+
// Margin has been copied externally and is as part of the window
647+
marginFrame = 0;
648+
}
634649

635650
// Loop through 2 frames at a time
636651
for (int frame = 0; frame < TP_NUM_FRAMES / 2; frame++)
@@ -640,25 +655,23 @@ kernelFilterClass<TT_DATA,
640655
// Embed margin handling here, as this would reduce the amount of buffer size.
641656

642657
for (int j = 0; j < 2; j++) {
643-
dataVect_t* frameStart =
644-
(dataVect_t*)inInterface.inWindow + j * TP_TDM_CHANNELS / kSamplesInVectData;
658+
dataRead_t* frameStart =
659+
(dataRead_t*)inInterface.inWindow + j * TP_TDM_CHANNELS / kSamplesInVectAcc;
645660
// Copy margin for 2 frames at a time
646-
for (int i = 0; i < TP_TDM_CHANNELS / kSamplesInVectData; i++) {
647-
inPointer =
648-
((dataVect_t*)frameStart) + i + 2 * frame * TP_TDM_CHANNELS / kSamplesInVectData;
661+
for (int i = 0; i < TP_TDM_CHANNELS / kSamplesInVectAcc; i++) {
662+
inPointer = ((dataRead_t*)frameStart) + i + 2 * frame * TP_TDM_CHANNELS / kSamplesInVectAcc;
649663
// dataVect = *inPointer;
650664
// *inWrItr++ = dataVect;
651665
*inWrItr++ = *inPointer;
652666
}
653667
// Copying 2 frames at a time.
654-
marginFrame = (marginFrame == (internalBufferFrames - 1) ? 0 : marginFrame + 1);
668+
if
669+
constexpr(precalculatedMarginFrame == 0) {
670+
marginFrame = (marginFrame == (internalBufferFrames - 1) ? 0 : marginFrame + 1);
671+
}
655672
}
656-
chess_memory_fence();
673+
chess_separator_scheduler();
657674
}
658-
else {
659-
// Margin has been copied externally and is as part of the window
660-
marginFrame = 0;
661-
}
662675
// Read once, prior to the loop
663676
if
664677
constexpr(columnMultiple == 2) {
@@ -713,7 +726,6 @@ kernelFilterClass<TT_DATA,
713726
}
714727
if
715728
constexpr(TP_CASC_IN == CASC_IN_TRUE) {
716-
// acc = (accVect_t)readincr_v<kSamplesInVectAcc>(inInterface.inCascade);
717729
acc2 = readCascade<TT_DATA, TT_COEFF>(inInterface, acc2);
718730
acc2 = macTdm2(acc2, dataVect, coeffVect);
719731
}

0 commit comments

Comments
 (0)