@@ -356,32 +356,6 @@ void load_vector(const T1& data_lane,
356356 }
357357}
358358
359- template <typename T1, typename T2>
360- void store_vector (const T1& data_lane,
361- const T2& data_lanes,
362- const Xbyak_aarch64::XReg& ptr_reg,
363- const int64_t offset,
364- jit_generator* h,
365- const size_t lane_count = 0 ) {
366- if (lane_count == 0 ) {
367- if (offset == 0 ) {
368- h->st1 (data_lanes, ptr (ptr_reg));
369- } else {
370- h->add_imm (h->X_DEFAULT_ADDR , ptr_reg, offset, h->X_TMP_0 );
371- h->st1 (data_lanes, ptr (h->X_DEFAULT_ADDR ));
372- }
373- } else {
374- for (size_t lane = 0 ; lane < lane_count; ++lane) {
375- const auto lane_offset = offset + static_cast <int64_t >(lane);
376- if (lane_offset == 0 ) {
377- h->st1 (data_lane[static_cast <int >(lane)], ptr (ptr_reg));
378- } else {
379- h->add_imm (h->X_DEFAULT_ADDR , ptr_reg, lane_offset, h->X_TMP_0 );
380- h->st1 (data_lane[static_cast <int >(lane)], ptr (h->X_DEFAULT_ADDR ));
381- }
382- }
383- }
384- }
385359} // namespace utils
386360
387361template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
@@ -575,7 +549,16 @@ void jit_uni_eltwise_generic<isa>::store_vector(const XReg& ptr,
575549 case ov::element::i8 :
576550 case ov::element::u8 : {
577551 const size_t lane_count = cpu_isa_traits<isa>::vlen / src_prc.size ();
578- utils::store_vector (data.b , data.b , ptr, ptr_offset, this , lane_count);
552+ auto data_bytes = data;
553+ for (size_t lane = 0 ; lane < lane_count; ++lane) {
554+ const auto lane_offset = ptr_offset + static_cast <int32_t >(lane);
555+ if (lane_offset == 0 ) {
556+ st1 (data_bytes.b [static_cast <int >(lane)], Xbyak_aarch64::ptr (ptr));
557+ } else {
558+ add_imm (X_DEFAULT_ADDR, ptr, lane_offset, X_TMP_0);
559+ st1 (data_bytes.b [static_cast <int >(lane)], Xbyak_aarch64::ptr (X_DEFAULT_ADDR));
560+ }
561+ }
579562 break ;
580563 }
581564 default : {
0 commit comments