Skip to content

Commit c0617d7

Browse files
fangliu2020igcbot
authored andcommitted
Fix denorm flush-to-zero issue in math macro. 2nd try.
Single Precision or Double precision denorm mode in control register must be set to retain denorm mode when executing Math Macro instruction sequence. Besides, if vISA_hasRNEandDenorm is not set, the denorm mode could be flush-to-zero. When denorm flush-to-zero is set, mov instructions with the same source and destination data type may retain denorm as output. So, we need to insert a mul instruction with "1" to make denorm flush-to-zero take effect.
1 parent ab42f6e commit c0617d7

File tree

1 file changed

+73
-27
lines changed

1 file changed

+73
-27
lines changed

visa/VisaToG4/TranslateMath.cpp

Lines changed: 73 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -924,17 +924,29 @@ int IR_Builder::translateVISAArithmeticDoubleInst(
924924
tmpCR0ForRoundRestore, tmpCR0ForRoundDenormRestore);
925925
}; // for loop
926926

927-
if (!noDstMove) {
928-
// make final copy to dst
929-
// dst = r8:df mov (instExecSize) dstOpnd, t8_src_opnd_final {Q1/N1}
930-
// final result is at r8.noacc
931-
G4_SrcRegRegion tsrc8_final(*this, Mod_src_undef, Direct, t8->getRegVar(),
932-
0, 0, getRegionStride1(), Type_DF);
927+
// make final copy to dst
928+
if (!noDstMove || !hasDefaultRoundDenorm) {
929+
G4_SrcRegRegion tsrc8_final(
930+
*this, Mod_src_undef, Direct,
931+
noDstMove ? dstOpnd->getBase() : t8->getRegVar(),
932+
noDstMove ? dstOpnd->getRegOff() : 0, 0, getRegionStride1(), Type_DF);
933933
G4_SrcRegRegion *t8_src_opnd_final = createSrcRegRegion(tsrc8_final);
934934
t8_src_opnd_final->setAccRegSel(ACC_UNDEFINED);
935-
inst = createInst(predOpnd, G4_mov, nullptr, saturate, instExecSize,
936-
dstOpnd, t8_src_opnd_final, NULL,
937-
Get_Gen4_Emask(emask, instExecSize), true);
935+
if (hasDefaultRoundDenorm) {
936+
// mov(instExecSize) dstOpnd, t8_src_opnd_final
937+
inst = createInst(predOpnd, G4_mov, nullptr, saturate, instExecSize,
938+
dstOpnd, t8_src_opnd_final, nullptr,
939+
Get_Gen4_Emask(emask, instExecSize), true);
940+
} else {
941+
// If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
942+
// When denorm flush-to-zero is set, mov instructions with the same source
943+
// and destination data type may retain denorm as output. So, we need to
944+
// use mul instruction instead.
945+
// mul (instExecSize) dstOpnd, t8_src_opnd_final 1.0:df
946+
inst = createInst(predOpnd, G4_mul, nullptr, saturate, instExecSize,
947+
dstOpnd, t8_src_opnd_final, createImm(1, Type_DF),
948+
Get_Gen4_Emask(emask, instExecSize), true);
949+
}
938950
}
939951

940952
return VISA_SUCCESS;
@@ -1280,11 +1292,22 @@ int IR_Builder::translateVISAArithmeticSingleDivideIEEEInst(
12801292
};
12811293

12821294
// make final copy to dst
1283-
// dst = r8:f mov (instExecSize) r20.0<1>:f r110.0<8;8,1>:f {Q1/H1}
12841295
t8_src_opnd_final->setAccRegSel(ACC_UNDEFINED);
1285-
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize, dstOpnd,
1286-
t8_src_opnd_final, NULL,
1287-
Get_Gen4_Emask(emask, instExecSize), true);
1296+
if (hasDefaultRoundDenorm) {
1297+
// mov (instExecSize) r86.0<1>:f r8.0<8;8,1>:f
1298+
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize,
1299+
dstOpnd, t8_src_opnd_final, nullptr,
1300+
Get_Gen4_Emask(emask, instExecSize), true);
1301+
} else {
1302+
// If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
1303+
// When denorm flush-to-zero is set, mov instructions with the same source
1304+
// and destination data type may retain denorm as output. So, we need to
1305+
// use mul instruction instead.
1306+
// mul (instExecSize) r86.0<1>:f r8.0<8;8,1>:f 1.0:f
1307+
inst = createInst(predOpnd, G4_mul, condMod, saturate, instExecSize,
1308+
dstOpnd, t8_src_opnd_final, createImm(1, Type_F),
1309+
Get_Gen4_Emask(emask, instExecSize), true);
1310+
}
12881311

12891312
return VISA_SUCCESS;
12901313
}
@@ -1595,11 +1618,22 @@ int IR_Builder::translateVISAArithmeticSingleSQRTIEEEInst(
15951618
};
15961619

15971620
// make final copy to dst
1598-
// dst = r8:df mov (instExecSize) r86.0<1>:f r8.0<8;8,1>:f {Q1/H1}
15991621
t7_src_opnd_final->setAccRegSel(ACC_UNDEFINED);
1600-
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize, dstOpnd,
1601-
t7_src_opnd_final, NULL,
1602-
Get_Gen4_Emask(emask, instExecSize), true);
1622+
if (hasDefaultRoundDenorm) {
1623+
// mov (instExecSize) r86.0<1>:f r7.0<8;8,1>:f
1624+
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize,
1625+
dstOpnd, t7_src_opnd_final, nullptr,
1626+
Get_Gen4_Emask(emask, instExecSize), true);
1627+
} else {
1628+
// If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
1629+
// When denorm flush-to-zero is set, mov instructions with the same source
1630+
// and destination data type may retain denorm as output. So, we need to
1631+
// use mul instruction instead.
1632+
// mul (instExecSize) r86.0<1>:f r7.0<8;8,1>:f 1.0:f
1633+
inst = createInst(predOpnd, G4_mul, condMod, saturate, instExecSize,
1634+
dstOpnd, t7_src_opnd_final, createImm(1, Type_F),
1635+
Get_Gen4_Emask(emask, instExecSize), true);
1636+
}
16031637

16041638
return VISA_SUCCESS;
16051639
}
@@ -2156,18 +2190,30 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
21562190
tmpCR0ForRoundRestore, tmpCR0ForRoundDenormRestore);
21572191
};
21582192

2159-
if (!noDstMove) {
2160-
// make final copy to dst
2161-
// src = r7:df
2162-
// final result is at r7.noacc
2163-
G4_SrcRegRegion tsrc7_final(*this, Mod_src_undef, Direct, t7->getRegVar(),
2164-
0, 0, getRegionStride1(), t7->getElemType());
2193+
// make final copy to dst
2194+
if (!noDstMove || !hasDefaultRoundDenorm) {
2195+
G4_SrcRegRegion tsrc7_final(*this, Mod_src_undef, Direct,
2196+
noDstMove ? dstOpnd->getBase()
2197+
: t7->getRegVar(),
2198+
noDstMove ? dstOpnd->getRegOff() : 0, 0,
2199+
getRegionStride1(), t7->getElemType());
21652200
G4_SrcRegRegion *t7_src_opnd_final = createSrcRegRegion(tsrc7_final);
21662201
t7_src_opnd_final->setAccRegSel(ACC_UNDEFINED);
2167-
// mov (instExecSize) r20.0<1>:df r7.0<8;8,1>:df {Q1/H1}
2168-
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize,
2169-
dstOpnd, t7_src_opnd_final, nullptr,
2170-
Get_Gen4_Emask(emask, instExecSize), true);
2202+
if (hasDefaultRoundDenorm) {
2203+
// mov (instExecSize) r20.0<1>:df r7.0<8;8,1>:df
2204+
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize,
2205+
dstOpnd, t7_src_opnd_final, nullptr,
2206+
Get_Gen4_Emask(emask, instExecSize), true);
2207+
} else {
2208+
// If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
2209+
// When denorm flush-to-zero is set, mov instructions with the same source
2210+
// and destination data type may retain denorm as output. So, we need to
2211+
// use mul instruction instead.
2212+
// mul (instExecSize) r20.0<1>:df r7.0<8;8,1>:df 1.0:df
2213+
inst = createInst(predOpnd, G4_mul, condMod, saturate, instExecSize,
2214+
dstOpnd, t7_src_opnd_final, createImm(1, Type_DF),
2215+
Get_Gen4_Emask(emask, instExecSize), true);
2216+
}
21712217
}
21722218

21732219
return VISA_SUCCESS;

0 commit comments

Comments
 (0)