Skip to content

Commit c37f997

Browse files
committed
Add support for AVX512
1 parent 49f5978 commit c37f997

File tree

4 files changed

+241
-0
lines changed

4 files changed

+241
-0
lines changed

SDK/C/TitanEngine.h

+18
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,12 @@ typedef struct
603603
XmmRegister_t High; //AVX part
604604
} YmmRegister_t;
605605

606+
typedef struct
607+
{
608+
YmmRegister_t Low; //AVX part
609+
YmmRegister_t High; //AVX-512 part
610+
} ZmmRegister_t;
611+
606612
typedef struct
607613
{
608614
BYTE data[10];
@@ -668,6 +674,16 @@ typedef struct
668674
#endif
669675
} TITAN_ENGINE_CONTEXT_t;
670676

677+
typedef struct
678+
{
679+
#ifdef _WIN64
680+
ZmmRegister_t ZmmRegisters[32];
681+
#else // x86
682+
ZmmRegister_t ZmmRegisters[8];
683+
#endif
684+
ULONGLONG Opmask[7]; // k0 is omitted; AVX-512BW extends these registers from 16 bits to 64 bits
685+
} TITAN_ENGINE_CONTEXT_AVX512_t;
686+
671687
#ifdef __cplusplus
672688
extern "C"
673689
{
@@ -859,6 +875,8 @@ __declspec(dllexport) bool TITCALL SetContextDataEx(HANDLE hActiveThread, DWORD
859875
__declspec(dllexport) bool TITCALL SetContextData(DWORD IndexOfRegister, ULONG_PTR NewRegisterValue);
860876
__declspec(dllexport) bool TITCALL GetAVXContext(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_t* titcontext);
861877
__declspec(dllexport) bool TITCALL SetAVXContext(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_t* titcontext);
878+
__declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext);
879+
__declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext);
862880
__declspec(dllexport) void TITCALL ClearExceptionNumber();
863881
__declspec(dllexport) long TITCALL CurrentExceptionNumber();
864882
__declspec(dllexport) bool TITCALL MatchPatternEx(HANDLE hProcess, void* MemoryToCheck, int SizeOfMemoryToCheck, void* PatternToMatch, int SizeOfPatternToMatch, PBYTE WildCard);

TitanEngine/TitanEngine.Debugger.Context.cpp

+205
Original file line numberDiff line numberDiff line change
@@ -1043,3 +1043,208 @@ __declspec(dllexport) bool TITCALL GetAVXContext(HANDLE hActiveThread, TITAN_ENG
10431043

10441044
return true;
10451045
}
1046+
1047+
// AVX-512 constants
1048+
#ifndef XSTATE_MASK_AVX512
1049+
#define XSTATE_AVX512_KMASK (5)
1050+
#define XSTATE_AVX512_ZMM_H (6)
1051+
#define XSTATE_AVX512_ZMM (7)
1052+
#define XSTATE_MASK_AVX512 ((1ui64 << (XSTATE_AVX512_KMASK)) | \
1053+
(1ui64 << (XSTATE_AVX512_ZMM_H)) | \
1054+
(1ui64 << (XSTATE_AVX512_ZMM)))
1055+
#endif
1056+
1057+
static bool SetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) {
1058+
// Fall back to using AVX and ignore the rest
1059+
TITAN_ENGINE_CONTEXT_t Avx;
1060+
memset(&Avx, 0, sizeof(Avx));
1061+
for (int i = 0; i < _countof(Avx.YmmRegisters); i++) {
1062+
Avx.YmmRegisters[i] = titcontext->ZmmRegisters[i].Low;
1063+
}
1064+
return SetAVXContext(hActiveThread, &Avx);
1065+
}
1066+
1067+
__declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
1068+
{
1069+
if (InitXState() == false)
1070+
return false;
1071+
1072+
DWORD64 FeatureMask = _GetEnabledXStateFeatures();
1073+
if ((FeatureMask & XSTATE_MASK_AVX512) == 0)
1074+
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
1075+
1076+
DWORD ContextSize = 0;
1077+
BOOL Success = _InitializeContext(NULL,
1078+
CONTEXT_ALL | CONTEXT_XSTATE,
1079+
NULL,
1080+
&ContextSize);
1081+
1082+
if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
1083+
return false;
1084+
1085+
DynBuf dataBuffer(ContextSize);
1086+
PVOID Buffer = dataBuffer.GetPtr();
1087+
if (Buffer == NULL)
1088+
return false;
1089+
1090+
PCONTEXT Context;
1091+
Success = _InitializeContext(Buffer,
1092+
CONTEXT_ALL | CONTEXT_XSTATE,
1093+
&Context,
1094+
&ContextSize);
1095+
if (Success == FALSE)
1096+
return false;
1097+
1098+
if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
1099+
return SetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
1100+
1101+
if (GetThreadContext(hActiveThread, Context) == FALSE)
1102+
return false;
1103+
1104+
if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
1105+
return false;
1106+
1107+
DWORD FeatureLengthSse;
1108+
DWORD FeatureLengthAvx;
1109+
DWORD FeatureLengthAvx512_KMASK;
1110+
DWORD FeatureLengthAvx512_ZMM_H;
1111+
DWORD FeatureLengthAvx512_ZMM;
1112+
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
1113+
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
1114+
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
1115+
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
1116+
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
1117+
1118+
if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL
1119+
{
1120+
for (int i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
1121+
Sse[i] = titcontext->ZmmRegisters[i].Low.Low;
1122+
}
1123+
1124+
if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL
1125+
{
1126+
for (int i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
1127+
Avx[i] = titcontext->ZmmRegisters[i].Low.High;
1128+
}
1129+
1130+
if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
1131+
{
1132+
for (int i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
1133+
Avx512_ZMM_H[i] = titcontext->ZmmRegisters[i].High;
1134+
}
1135+
1136+
if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
1137+
{
1138+
for (int i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
1139+
Avx512_ZMM[i] = titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)];
1140+
}
1141+
1142+
if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
1143+
{
1144+
// k0 is always 0, don't store it.
1145+
for (int i = 1; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG) - 1, _countof(titcontext->Opmask)); i++)
1146+
Avx512_KMASK[i] = titcontext->Opmask[i - 1];
1147+
}
1148+
1149+
return (SetThreadContext(hActiveThread, Context) == TRUE);
1150+
}
1151+
1152+
static bool GetAVX512ContextFallbackToAVX(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
1153+
{
1154+
// Fall back to using AVX and fill the rest with 0
1155+
TITAN_ENGINE_CONTEXT_t Avx;
1156+
memset(&Avx, 0, sizeof(Avx));
1157+
if (GetAVXContext(hActiveThread, &Avx)) {
1158+
for (int i = 0; i < _countof(Avx.YmmRegisters); i++)
1159+
titcontext->ZmmRegisters[i].Low = Avx.YmmRegisters[i];
1160+
return true;
1161+
}
1162+
else {
1163+
return false;
1164+
}
1165+
}
1166+
1167+
__declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
1168+
{
1169+
if (InitXState() == false)
1170+
return false;
1171+
1172+
DWORD64 FeatureMask = _GetEnabledXStateFeatures();
1173+
if ((FeatureMask & XSTATE_MASK_AVX512) == 0) //XSTATE_MASK_AVX512
1174+
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
1175+
1176+
DWORD ContextSize = 0;
1177+
BOOL Success = _InitializeContext(NULL,
1178+
CONTEXT_ALL | CONTEXT_XSTATE,
1179+
NULL,
1180+
&ContextSize);
1181+
1182+
if ((Success == TRUE) || (GetLastError() != ERROR_INSUFFICIENT_BUFFER))
1183+
return false;
1184+
1185+
DynBuf dataBuffer(ContextSize);
1186+
PVOID Buffer = dataBuffer.GetPtr();
1187+
if (Buffer == NULL)
1188+
return false;
1189+
1190+
PCONTEXT Context;
1191+
Success = _InitializeContext(Buffer,
1192+
CONTEXT_ALL | CONTEXT_XSTATE,
1193+
&Context,
1194+
&ContextSize);
1195+
if (Success == FALSE)
1196+
return false;
1197+
1198+
if (_SetXStateFeaturesMask(Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE)
1199+
return GetAVX512ContextFallbackToAVX(hActiveThread, titcontext);
1200+
1201+
if (GetThreadContext(hActiveThread, Context) == FALSE)
1202+
return false;
1203+
1204+
if (_GetXStateFeaturesMask(Context, &FeatureMask) == FALSE)
1205+
return false;
1206+
1207+
DWORD FeatureLengthSse;
1208+
DWORD FeatureLengthAvx;
1209+
DWORD FeatureLengthAvx512_KMASK;
1210+
DWORD FeatureLengthAvx512_ZMM_H;
1211+
DWORD FeatureLengthAvx512_ZMM;
1212+
XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
1213+
XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature(Context, XSTATE_AVX, &FeatureLengthAvx);
1214+
ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature(Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
1215+
ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
1216+
YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature(Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
1217+
1218+
if (Sse != NULL) //If the feature is unsupported by the processor it will return NULL
1219+
{
1220+
for (int i = 0; i < MIN(FeatureLengthSse / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
1221+
titcontext->ZmmRegisters[i].Low.Low = Sse[i];
1222+
}
1223+
1224+
if (Avx != NULL) //If the feature is unsupported by the processor it will return NULL
1225+
{
1226+
for (int i = 0; i < MIN(FeatureLengthAvx / sizeof(XmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
1227+
titcontext->ZmmRegisters[i].Low.High = Avx[i];
1228+
}
1229+
1230+
if (Avx512_ZMM_H != NULL) //If the feature is unsupported by the processor it will return NULL
1231+
{
1232+
for (int i = 0; i < MIN(FeatureLengthAvx512_ZMM_H / sizeof(YmmRegister_t), _countof(titcontext->ZmmRegisters)); i++)
1233+
titcontext->ZmmRegisters[i].High = Avx512_ZMM_H[i];
1234+
}
1235+
1236+
if (Avx512_ZMM != NULL) //If the feature is unsupported by the processor it will return NULL
1237+
{
1238+
for (int i = 0; i < MIN(FeatureLengthAvx512_ZMM / sizeof(ZmmRegister_t), _countof(titcontext->ZmmRegisters) - FeatureLengthAvx / sizeof(XmmRegister_t)); i++)
1239+
titcontext->ZmmRegisters[i + FeatureLengthAvx / sizeof(XmmRegister_t)] = Avx512_ZMM[i];
1240+
}
1241+
1242+
if (Avx512_KMASK != NULL) //If the feature is unsupported by the processor it will return NULL
1243+
{
1244+
// k0 is always 0, don't store it.
1245+
for (int i = 1; i < MIN(FeatureLengthAvx512_KMASK / sizeof(ULONGLONG) - 1, _countof(titcontext->Opmask)); i++)
1246+
titcontext->Opmask[i - 1] = Avx512_KMASK[i];
1247+
}
1248+
1249+
return true;
1250+
}

TitanEngine/definitions.h

+2
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ __declspec(dllexport) ULONG_PTR TITCALL GetContextDataEx(HANDLE hActiveThread, D
189189
__declspec(dllexport) ULONG_PTR TITCALL GetContextData(DWORD IndexOfRegister);
190190
__declspec(dllexport) bool TITCALL SetAVXContext(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_t* titcontext);
191191
__declspec(dllexport) bool TITCALL GetAVXContext(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_t* titcontext);
192+
__declspec(dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext);
193+
__declspec(dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext);
192194
__declspec(dllexport) bool TITCALL SetContextFPUDataEx(HANDLE hActiveThread, void* FPUSaveArea);
193195
__declspec(dllexport) bool TITCALL SetContextDataEx(HANDLE hActiveThread, DWORD IndexOfRegister, ULONG_PTR NewRegisterValue);
194196
__declspec(dllexport) bool TITCALL SetContextData(DWORD IndexOfRegister, ULONG_PTR NewRegisterValue);

TitanEngine/stdafx.h

+16
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,12 @@ typedef struct
125125
XmmRegister_t High; //AVX part
126126
} YmmRegister_t;
127127

128+
typedef struct
129+
{
130+
YmmRegister_t Low; //AVX part
131+
YmmRegister_t High; //AVX-512 part
132+
} ZmmRegister_t;
133+
128134
typedef struct
129135
{
130136
BYTE data[10];
@@ -190,6 +196,16 @@ typedef struct
190196
#endif
191197
} TITAN_ENGINE_CONTEXT_t;
192198

199+
typedef struct
200+
{
201+
#ifdef _WIN64
202+
ZmmRegister_t ZmmRegisters[32];
203+
#else // x86
204+
ZmmRegister_t ZmmRegisters[8];
205+
#endif
206+
ULONGLONG Opmask[7]; // k0 is omitted; AVX-512BW extends these registers from 16 bits to 64 bits
207+
} TITAN_ENGINE_CONTEXT_AVX512_t;
208+
193209
typedef struct
194210
{
195211
ULONG_PTR BreakPointAddress;

0 commit comments

Comments
 (0)