@@ -924,17 +924,29 @@ int IR_Builder::translateVISAArithmeticDoubleInst(
924
924
tmpCR0ForRoundRestore, tmpCR0ForRoundDenormRestore);
925
925
}; // for loop
926
926
927
- if (!noDstMove) {
928
- // make final copy to dst
929
- // dst = r8:df mov (instExecSize) dstOpnd, t8_src_opnd_final {Q1/N1}
930
- // final result is at r8.noacc
931
- G4_SrcRegRegion tsrc8_final (* this , Mod_src_undef, Direct, t8->getRegVar (),
932
- 0 , 0 , getRegionStride1 (), Type_DF);
927
+ // make final copy to dst
928
+ if (!noDstMove || !hasDefaultRoundDenorm) {
929
+ G4_SrcRegRegion tsrc8_final (
930
+ * this , Mod_src_undef, Direct,
931
+ noDstMove ? dstOpnd-> getBase () : t8->getRegVar (),
932
+ noDstMove ? dstOpnd-> getRegOff () : 0 , 0 , getRegionStride1 (), Type_DF);
933
933
G4_SrcRegRegion *t8_src_opnd_final = createSrcRegRegion (tsrc8_final);
934
934
t8_src_opnd_final->setAccRegSel (ACC_UNDEFINED);
935
- inst = createInst (predOpnd, G4_mov, nullptr , saturate, instExecSize,
936
- dstOpnd, t8_src_opnd_final, NULL ,
937
- Get_Gen4_Emask (emask, instExecSize), true );
935
+ if (hasDefaultRoundDenorm) {
936
+ // mov(instExecSize) dstOpnd, t8_src_opnd_final
937
+ inst = createInst (predOpnd, G4_mov, nullptr , saturate, instExecSize,
938
+ dstOpnd, t8_src_opnd_final, nullptr ,
939
+ Get_Gen4_Emask (emask, instExecSize), true );
940
+ } else {
941
+ // If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
942
+ // When denorm flush-to-zero is set, mov instructions with the same source
943
+ // and destination data type may retain denorm as output. So, we need to
944
+ // use mul instruction instead.
945
+ // mul (instExecSize) dstOpnd, t8_src_opnd_final 1.0:df
946
+ inst = createInst (predOpnd, G4_mul, nullptr , saturate, instExecSize,
947
+ dstOpnd, t8_src_opnd_final, createImm (1 , Type_DF),
948
+ Get_Gen4_Emask (emask, instExecSize), true );
949
+ }
938
950
}
939
951
940
952
return VISA_SUCCESS;
@@ -1280,11 +1292,22 @@ int IR_Builder::translateVISAArithmeticSingleDivideIEEEInst(
1280
1292
};
1281
1293
1282
1294
// make final copy to dst
1283
- // dst = r8:f mov (instExecSize) r20.0<1>:f r110.0<8;8,1>:f {Q1/H1}
1284
1295
t8_src_opnd_final->setAccRegSel (ACC_UNDEFINED);
1285
- inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize, dstOpnd,
1286
- t8_src_opnd_final, NULL ,
1287
- Get_Gen4_Emask (emask, instExecSize), true );
1296
+ if (hasDefaultRoundDenorm) {
1297
+ // mov (instExecSize) r86.0<1>:f r8.0<8;8,1>:f
1298
+ inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize,
1299
+ dstOpnd, t8_src_opnd_final, nullptr ,
1300
+ Get_Gen4_Emask (emask, instExecSize), true );
1301
+ } else {
1302
+ // If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
1303
+ // When denorm flush-to-zero is set, mov instructions with the same source
1304
+ // and destination data type may retain denorm as output. So, we need to
1305
+ // use mul instruction instead.
1306
+ // mul (instExecSize) r86.0<1>:f r8.0<8;8,1>:f 1.0:f
1307
+ inst = createInst (predOpnd, G4_mul, condMod, saturate, instExecSize,
1308
+ dstOpnd, t8_src_opnd_final, createImm (1 , Type_F),
1309
+ Get_Gen4_Emask (emask, instExecSize), true );
1310
+ }
1288
1311
1289
1312
return VISA_SUCCESS;
1290
1313
}
@@ -1595,11 +1618,22 @@ int IR_Builder::translateVISAArithmeticSingleSQRTIEEEInst(
1595
1618
};
1596
1619
1597
1620
// make final copy to dst
1598
- // dst = r8:df mov (instExecSize) r86.0<1>:f r8.0<8;8,1>:f {Q1/H1}
1599
1621
t7_src_opnd_final->setAccRegSel (ACC_UNDEFINED);
1600
- inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize, dstOpnd,
1601
- t7_src_opnd_final, NULL ,
1602
- Get_Gen4_Emask (emask, instExecSize), true );
1622
+ if (hasDefaultRoundDenorm) {
1623
+ // mov (instExecSize) r86.0<1>:f r7.0<8;8,1>:f
1624
+ inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize,
1625
+ dstOpnd, t7_src_opnd_final, nullptr ,
1626
+ Get_Gen4_Emask (emask, instExecSize), true );
1627
+ } else {
1628
+ // If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
1629
+ // When denorm flush-to-zero is set, mov instructions with the same source
1630
+ // and destination data type may retain denorm as output. So, we need to
1631
+ // use mul instruction instead.
1632
+ // mul (instExecSize) r86.0<1>:f r7.0<8;8,1>:f 1.0:f
1633
+ inst = createInst (predOpnd, G4_mul, condMod, saturate, instExecSize,
1634
+ dstOpnd, t7_src_opnd_final, createImm (1 , Type_F),
1635
+ Get_Gen4_Emask (emask, instExecSize), true );
1636
+ }
1603
1637
1604
1638
return VISA_SUCCESS;
1605
1639
}
@@ -2156,18 +2190,30 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
2156
2190
tmpCR0ForRoundRestore, tmpCR0ForRoundDenormRestore);
2157
2191
};
2158
2192
2159
- if (!noDstMove) {
2160
- // make final copy to dst
2161
- // src = r7:df
2162
- // final result is at r7.noacc
2163
- G4_SrcRegRegion tsrc7_final (*this , Mod_src_undef, Direct, t7->getRegVar (),
2164
- 0 , 0 , getRegionStride1 (), t7->getElemType ());
2193
+ // make final copy to dst
2194
+ if (!noDstMove || !hasDefaultRoundDenorm) {
2195
+ G4_SrcRegRegion tsrc7_final (*this , Mod_src_undef, Direct,
2196
+ noDstMove ? dstOpnd->getBase ()
2197
+ : t7->getRegVar (),
2198
+ noDstMove ? dstOpnd->getRegOff () : 0 , 0 ,
2199
+ getRegionStride1 (), t7->getElemType ());
2165
2200
G4_SrcRegRegion *t7_src_opnd_final = createSrcRegRegion (tsrc7_final);
2166
2201
t7_src_opnd_final->setAccRegSel (ACC_UNDEFINED);
2167
- // mov (instExecSize) r20.0<1>:df r7.0<8;8,1>:df {Q1/H1}
2168
- inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize,
2169
- dstOpnd, t7_src_opnd_final, nullptr ,
2170
- Get_Gen4_Emask (emask, instExecSize), true );
2202
+ if (hasDefaultRoundDenorm) {
2203
+ // mov (instExecSize) r20.0<1>:df r7.0<8;8,1>:df
2204
+ inst = createInst (predOpnd, G4_mov, condMod, saturate, instExecSize,
2205
+ dstOpnd, t7_src_opnd_final, nullptr ,
2206
+ Get_Gen4_Emask (emask, instExecSize), true );
2207
+ } else {
2208
+ // If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
2209
+ // When denorm flush-to-zero is set, mov instructions with the same source
2210
+ // and destination data type may retain denorm as output. So, we need to
2211
+ // use mul instruction instead.
2212
+ // mul (instExecSize) r20.0<1>:df r7.0<8;8,1>:df 1.0:df
2213
+ inst = createInst (predOpnd, G4_mul, condMod, saturate, instExecSize,
2214
+ dstOpnd, t7_src_opnd_final, createImm (1 , Type_DF),
2215
+ Get_Gen4_Emask (emask, instExecSize), true );
2216
+ }
2171
2217
}
2172
2218
2173
2219
return VISA_SUCCESS;
0 commit comments