Skip to content

Commit 6ee1ad1

Browse files
fangliu2020igcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: c0617d7
Fix denorm flush-to-zero issue in math macro. 2nd try. Single Precision or Double precision denorm mode in control register must be set to retain denorm mode when executing Math Macro instruction sequence. Besides, if vISA_hasRNEandDenorm is not set, the denorm mode could be flush-to-zero. When denorm flush-to-zero is set, mov instructions with the same source and destination data type may retain denorm as output. So, we need to insert a mul instruction with "1" to make denorm flush-to-zero take effect.
1 parent a824b25 commit 6ee1ad1

File tree

1 file changed

+27
-73
lines changed

1 file changed

+27
-73
lines changed

visa/VisaToG4/TranslateMath.cpp

Lines changed: 27 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -924,29 +924,17 @@ int IR_Builder::translateVISAArithmeticDoubleInst(
924924
tmpCR0ForRoundRestore, tmpCR0ForRoundDenormRestore);
925925
}; // for loop
926926

927-
// make final copy to dst
928-
if (!noDstMove || !hasDefaultRoundDenorm) {
929-
G4_SrcRegRegion tsrc8_final(
930-
*this, Mod_src_undef, Direct,
931-
noDstMove ? dstOpnd->getBase() : t8->getRegVar(),
932-
noDstMove ? dstOpnd->getRegOff() : 0, 0, getRegionStride1(), Type_DF);
927+
if (!noDstMove) {
928+
// make final copy to dst
929+
// dst = r8:df mov (instExecSize) dstOpnd, t8_src_opnd_final {Q1/N1}
930+
// final result is at r8.noacc
931+
G4_SrcRegRegion tsrc8_final(*this, Mod_src_undef, Direct, t8->getRegVar(),
932+
0, 0, getRegionStride1(), Type_DF);
933933
G4_SrcRegRegion *t8_src_opnd_final = createSrcRegRegion(tsrc8_final);
934934
t8_src_opnd_final->setAccRegSel(ACC_UNDEFINED);
935-
if (hasDefaultRoundDenorm) {
936-
// mov(instExecSize) dstOpnd, t8_src_opnd_final
937-
inst = createInst(predOpnd, G4_mov, nullptr, saturate, instExecSize,
938-
dstOpnd, t8_src_opnd_final, nullptr,
939-
Get_Gen4_Emask(emask, instExecSize), true);
940-
} else {
941-
// If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
942-
// When denorm flush-to-zero is set, mov instructions with the same source
943-
// and destination data type may retain denorm as output. So, we need to
944-
// use mul instruction instead.
945-
// mul (instExecSize) dstOpnd, t8_src_opnd_final 1.0:df
946-
inst = createInst(predOpnd, G4_mul, nullptr, saturate, instExecSize,
947-
dstOpnd, t8_src_opnd_final, createImm(1, Type_DF),
948-
Get_Gen4_Emask(emask, instExecSize), true);
949-
}
935+
inst = createInst(predOpnd, G4_mov, nullptr, saturate, instExecSize,
936+
dstOpnd, t8_src_opnd_final, NULL,
937+
Get_Gen4_Emask(emask, instExecSize), true);
950938
}
951939

952940
return VISA_SUCCESS;
@@ -1292,22 +1280,11 @@ int IR_Builder::translateVISAArithmeticSingleDivideIEEEInst(
12921280
};
12931281

12941282
// make final copy to dst
1283+
// dst = r8:f mov (instExecSize) r20.0<1>:f r110.0<8;8,1>:f {Q1/H1}
12951284
t8_src_opnd_final->setAccRegSel(ACC_UNDEFINED);
1296-
if (hasDefaultRoundDenorm) {
1297-
// mov (instExecSize) r86.0<1>:f r8.0<8;8,1>:f
1298-
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize,
1299-
dstOpnd, t8_src_opnd_final, nullptr,
1300-
Get_Gen4_Emask(emask, instExecSize), true);
1301-
} else {
1302-
// If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
1303-
// When denorm flush-to-zero is set, mov instructions with the same source
1304-
// and destination data type may retain denorm as output. So, we need to
1305-
// use mul instruction instead.
1306-
// mul (instExecSize) r86.0<1>:f r8.0<8;8,1>:f 1.0:f
1307-
inst = createInst(predOpnd, G4_mul, condMod, saturate, instExecSize,
1308-
dstOpnd, t8_src_opnd_final, createImm(1, Type_F),
1309-
Get_Gen4_Emask(emask, instExecSize), true);
1310-
}
1285+
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize, dstOpnd,
1286+
t8_src_opnd_final, NULL,
1287+
Get_Gen4_Emask(emask, instExecSize), true);
13111288

13121289
return VISA_SUCCESS;
13131290
}
@@ -1618,22 +1595,11 @@ int IR_Builder::translateVISAArithmeticSingleSQRTIEEEInst(
16181595
};
16191596

16201597
// make final copy to dst
1598+
// dst = r8:df mov (instExecSize) r86.0<1>:f r8.0<8;8,1>:f {Q1/H1}
16211599
t7_src_opnd_final->setAccRegSel(ACC_UNDEFINED);
1622-
if (hasDefaultRoundDenorm) {
1623-
// mov (instExecSize) r86.0<1>:f r7.0<8;8,1>:f
1624-
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize,
1625-
dstOpnd, t7_src_opnd_final, nullptr,
1626-
Get_Gen4_Emask(emask, instExecSize), true);
1627-
} else {
1628-
// If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
1629-
// When denorm flush-to-zero is set, mov instructions with the same source
1630-
// and destination data type may retain denorm as output. So, we need to
1631-
// use mul instruction instead.
1632-
// mul (instExecSize) r86.0<1>:f r7.0<8;8,1>:f 1.0:f
1633-
inst = createInst(predOpnd, G4_mul, condMod, saturate, instExecSize,
1634-
dstOpnd, t7_src_opnd_final, createImm(1, Type_F),
1635-
Get_Gen4_Emask(emask, instExecSize), true);
1636-
}
1600+
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize, dstOpnd,
1601+
t7_src_opnd_final, NULL,
1602+
Get_Gen4_Emask(emask, instExecSize), true);
16371603

16381604
return VISA_SUCCESS;
16391605
}
@@ -2190,30 +2156,18 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
21902156
tmpCR0ForRoundRestore, tmpCR0ForRoundDenormRestore);
21912157
};
21922158

2193-
// make final copy to dst
2194-
if (!noDstMove || !hasDefaultRoundDenorm) {
2195-
G4_SrcRegRegion tsrc7_final(*this, Mod_src_undef, Direct,
2196-
noDstMove ? dstOpnd->getBase()
2197-
: t7->getRegVar(),
2198-
noDstMove ? dstOpnd->getRegOff() : 0, 0,
2199-
getRegionStride1(), t7->getElemType());
2159+
if (!noDstMove) {
2160+
// make final copy to dst
2161+
// src = r7:df
2162+
// final result is at r7.noacc
2163+
G4_SrcRegRegion tsrc7_final(*this, Mod_src_undef, Direct, t7->getRegVar(),
2164+
0, 0, getRegionStride1(), t7->getElemType());
22002165
G4_SrcRegRegion *t7_src_opnd_final = createSrcRegRegion(tsrc7_final);
22012166
t7_src_opnd_final->setAccRegSel(ACC_UNDEFINED);
2202-
if (hasDefaultRoundDenorm) {
2203-
// mov (instExecSize) r20.0<1>:df r7.0<8;8,1>:df
2204-
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize,
2205-
dstOpnd, t7_src_opnd_final, nullptr,
2206-
Get_Gen4_Emask(emask, instExecSize), true);
2207-
} else {
2208-
// If hasDefaultRoundDenorm is false, denorm mode may be flush to zero.
2209-
// When denorm flush-to-zero is set, mov instructions with the same source
2210-
// and destination data type may retain denorm as output. So, we need to
2211-
// use mul instruction instead.
2212-
// mul (instExecSize) r20.0<1>:df r7.0<8;8,1>:df 1.0:df
2213-
inst = createInst(predOpnd, G4_mul, condMod, saturate, instExecSize,
2214-
dstOpnd, t7_src_opnd_final, createImm(1, Type_DF),
2215-
Get_Gen4_Emask(emask, instExecSize), true);
2216-
}
2167+
// mov (instExecSize) r20.0<1>:df r7.0<8;8,1>:df {Q1/H1}
2168+
inst = createInst(predOpnd, G4_mov, condMod, saturate, instExecSize,
2169+
dstOpnd, t7_src_opnd_final, nullptr,
2170+
Get_Gen4_Emask(emask, instExecSize), true);
22172171
}
22182172

22192173
return VISA_SUCCESS;

0 commit comments

Comments
 (0)