@@ -1043,3 +1043,208 @@ __declspec(dllexport) bool TITCALL GetAVXContext(HANDLE hActiveThread, TITAN_ENG
1043
1043
1044
1044
return true ;
1045
1045
}
1046
+
1047
+ // AVX-512 constants
1048
+ #ifndef XSTATE_MASK_AVX512
1049
+ #define XSTATE_AVX512_KMASK (5 )
1050
+ #define XSTATE_AVX512_ZMM_H (6 )
1051
+ #define XSTATE_AVX512_ZMM (7 )
1052
+ #define XSTATE_MASK_AVX512 ((1ui64 << (XSTATE_AVX512_KMASK)) | \
1053
+ (1ui64 << (XSTATE_AVX512_ZMM_H)) | \
1054
+ (1ui64 << (XSTATE_AVX512_ZMM)))
1055
+ #endif
1056
+
1057
+ static bool SetAVX512ContextFallbackToAVX (HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext) {
1058
+ // Fall back to using AVX and ignore the rest
1059
+ TITAN_ENGINE_CONTEXT_t Avx;
1060
+ memset (&Avx, 0 , sizeof (Avx));
1061
+ for (int i = 0 ; i < _countof (Avx.YmmRegisters ); i++) {
1062
+ Avx.YmmRegisters [i] = titcontext->ZmmRegisters [i].Low ;
1063
+ }
1064
+ return SetAVXContext (hActiveThread, &Avx);
1065
+ }
1066
+
1067
+ __declspec (dllexport) bool TITCALL SetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
1068
+ {
1069
+ if (InitXState () == false )
1070
+ return false ;
1071
+
1072
+ DWORD64 FeatureMask = _GetEnabledXStateFeatures ();
1073
+ if ((FeatureMask & XSTATE_MASK_AVX512) == 0 )
1074
+ return SetAVX512ContextFallbackToAVX (hActiveThread, titcontext);
1075
+
1076
+ DWORD ContextSize = 0 ;
1077
+ BOOL Success = _InitializeContext (NULL ,
1078
+ CONTEXT_ALL | CONTEXT_XSTATE,
1079
+ NULL ,
1080
+ &ContextSize);
1081
+
1082
+ if ((Success == TRUE ) || (GetLastError () != ERROR_INSUFFICIENT_BUFFER))
1083
+ return false ;
1084
+
1085
+ DynBuf dataBuffer (ContextSize);
1086
+ PVOID Buffer = dataBuffer.GetPtr ();
1087
+ if (Buffer == NULL )
1088
+ return false ;
1089
+
1090
+ PCONTEXT Context;
1091
+ Success = _InitializeContext (Buffer,
1092
+ CONTEXT_ALL | CONTEXT_XSTATE,
1093
+ &Context,
1094
+ &ContextSize);
1095
+ if (Success == FALSE )
1096
+ return false ;
1097
+
1098
+ if (_SetXStateFeaturesMask (Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE )
1099
+ return SetAVX512ContextFallbackToAVX (hActiveThread, titcontext);
1100
+
1101
+ if (GetThreadContext (hActiveThread, Context) == FALSE )
1102
+ return false ;
1103
+
1104
+ if (_GetXStateFeaturesMask (Context, &FeatureMask) == FALSE )
1105
+ return false ;
1106
+
1107
+ DWORD FeatureLengthSse;
1108
+ DWORD FeatureLengthAvx;
1109
+ DWORD FeatureLengthAvx512_KMASK;
1110
+ DWORD FeatureLengthAvx512_ZMM_H;
1111
+ DWORD FeatureLengthAvx512_ZMM;
1112
+ XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature (Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
1113
+ XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature (Context, XSTATE_AVX, &FeatureLengthAvx);
1114
+ ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature (Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
1115
+ ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature (Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
1116
+ YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature (Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
1117
+
1118
+ if (Sse != NULL ) // If the feature is unsupported by the processor it will return NULL
1119
+ {
1120
+ for (int i = 0 ; i < MIN (FeatureLengthSse / sizeof (XmmRegister_t), _countof (titcontext->ZmmRegisters )); i++)
1121
+ Sse[i] = titcontext->ZmmRegisters [i].Low .Low ;
1122
+ }
1123
+
1124
+ if (Avx != NULL ) // If the feature is unsupported by the processor it will return NULL
1125
+ {
1126
+ for (int i = 0 ; i < MIN (FeatureLengthAvx / sizeof (XmmRegister_t), _countof (titcontext->ZmmRegisters )); i++)
1127
+ Avx[i] = titcontext->ZmmRegisters [i].Low .High ;
1128
+ }
1129
+
1130
+ if (Avx512_ZMM_H != NULL ) // If the feature is unsupported by the processor it will return NULL
1131
+ {
1132
+ for (int i = 0 ; i < MIN (FeatureLengthAvx512_ZMM_H / sizeof (YmmRegister_t), _countof (titcontext->ZmmRegisters )); i++)
1133
+ Avx512_ZMM_H[i] = titcontext->ZmmRegisters [i].High ;
1134
+ }
1135
+
1136
+ if (Avx512_ZMM != NULL ) // If the feature is unsupported by the processor it will return NULL
1137
+ {
1138
+ for (int i = 0 ; i < MIN (FeatureLengthAvx512_ZMM / sizeof (ZmmRegister_t), _countof (titcontext->ZmmRegisters ) - FeatureLengthAvx / sizeof (XmmRegister_t)); i++)
1139
+ Avx512_ZMM[i] = titcontext->ZmmRegisters [i + FeatureLengthAvx / sizeof (XmmRegister_t)];
1140
+ }
1141
+
1142
+ if (Avx512_KMASK != NULL ) // If the feature is unsupported by the processor it will return NULL
1143
+ {
1144
+ // k0 is always 0, don't store it.
1145
+ for (int i = 1 ; i < MIN (FeatureLengthAvx512_KMASK / sizeof (ULONGLONG) - 1 , _countof (titcontext->Opmask )); i++)
1146
+ Avx512_KMASK[i] = titcontext->Opmask [i - 1 ];
1147
+ }
1148
+
1149
+ return (SetThreadContext (hActiveThread, Context) == TRUE );
1150
+ }
1151
+
1152
+ static bool GetAVX512ContextFallbackToAVX (HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
1153
+ {
1154
+ // Fall back to using AVX and fill the rest with 0
1155
+ TITAN_ENGINE_CONTEXT_t Avx;
1156
+ memset (&Avx, 0 , sizeof (Avx));
1157
+ if (GetAVXContext (hActiveThread, &Avx)) {
1158
+ for (int i = 0 ; i < _countof (Avx.YmmRegisters ); i++)
1159
+ titcontext->ZmmRegisters [i].Low = Avx.YmmRegisters [i];
1160
+ return true ;
1161
+ }
1162
+ else {
1163
+ return false ;
1164
+ }
1165
+ }
1166
+
1167
+ __declspec (dllexport) bool TITCALL GetAVX512Context(HANDLE hActiveThread, TITAN_ENGINE_CONTEXT_AVX512_t* titcontext)
1168
+ {
1169
+ if (InitXState () == false )
1170
+ return false ;
1171
+
1172
+ DWORD64 FeatureMask = _GetEnabledXStateFeatures ();
1173
+ if ((FeatureMask & XSTATE_MASK_AVX512) == 0 ) // XSTATE_MASK_AVX512
1174
+ return GetAVX512ContextFallbackToAVX (hActiveThread, titcontext);
1175
+
1176
+ DWORD ContextSize = 0 ;
1177
+ BOOL Success = _InitializeContext (NULL ,
1178
+ CONTEXT_ALL | CONTEXT_XSTATE,
1179
+ NULL ,
1180
+ &ContextSize);
1181
+
1182
+ if ((Success == TRUE ) || (GetLastError () != ERROR_INSUFFICIENT_BUFFER))
1183
+ return false ;
1184
+
1185
+ DynBuf dataBuffer (ContextSize);
1186
+ PVOID Buffer = dataBuffer.GetPtr ();
1187
+ if (Buffer == NULL )
1188
+ return false ;
1189
+
1190
+ PCONTEXT Context;
1191
+ Success = _InitializeContext (Buffer,
1192
+ CONTEXT_ALL | CONTEXT_XSTATE,
1193
+ &Context,
1194
+ &ContextSize);
1195
+ if (Success == FALSE )
1196
+ return false ;
1197
+
1198
+ if (_SetXStateFeaturesMask (Context, XSTATE_MASK_AVX | XSTATE_MASK_AVX512) == FALSE )
1199
+ return GetAVX512ContextFallbackToAVX (hActiveThread, titcontext);
1200
+
1201
+ if (GetThreadContext (hActiveThread, Context) == FALSE )
1202
+ return false ;
1203
+
1204
+ if (_GetXStateFeaturesMask (Context, &FeatureMask) == FALSE )
1205
+ return false ;
1206
+
1207
+ DWORD FeatureLengthSse;
1208
+ DWORD FeatureLengthAvx;
1209
+ DWORD FeatureLengthAvx512_KMASK;
1210
+ DWORD FeatureLengthAvx512_ZMM_H;
1211
+ DWORD FeatureLengthAvx512_ZMM;
1212
+ XmmRegister_t* Sse = (XmmRegister_t*)_LocateXStateFeature (Context, XSTATE_LEGACY_SSE, &FeatureLengthSse);
1213
+ XmmRegister_t* Avx = (XmmRegister_t*)_LocateXStateFeature (Context, XSTATE_AVX, &FeatureLengthAvx);
1214
+ ULONGLONG* Avx512_KMASK = (ULONGLONG*)_LocateXStateFeature (Context, XSTATE_AVX512_KMASK, &FeatureLengthAvx512_KMASK);
1215
+ ZmmRegister_t* Avx512_ZMM = (ZmmRegister_t *)_LocateXStateFeature (Context, XSTATE_AVX512_ZMM, &FeatureLengthAvx512_ZMM);
1216
+ YmmRegister_t* Avx512_ZMM_H = (YmmRegister_t *)_LocateXStateFeature (Context, XSTATE_AVX512_ZMM_H, &FeatureLengthAvx512_ZMM_H);
1217
+
1218
+ if (Sse != NULL ) // If the feature is unsupported by the processor it will return NULL
1219
+ {
1220
+ for (int i = 0 ; i < MIN (FeatureLengthSse / sizeof (XmmRegister_t), _countof (titcontext->ZmmRegisters )); i++)
1221
+ titcontext->ZmmRegisters [i].Low .Low = Sse[i];
1222
+ }
1223
+
1224
+ if (Avx != NULL ) // If the feature is unsupported by the processor it will return NULL
1225
+ {
1226
+ for (int i = 0 ; i < MIN (FeatureLengthAvx / sizeof (XmmRegister_t), _countof (titcontext->ZmmRegisters )); i++)
1227
+ titcontext->ZmmRegisters [i].Low .High = Avx[i];
1228
+ }
1229
+
1230
+ if (Avx512_ZMM_H != NULL ) // If the feature is unsupported by the processor it will return NULL
1231
+ {
1232
+ for (int i = 0 ; i < MIN (FeatureLengthAvx512_ZMM_H / sizeof (YmmRegister_t), _countof (titcontext->ZmmRegisters )); i++)
1233
+ titcontext->ZmmRegisters [i].High = Avx512_ZMM_H[i];
1234
+ }
1235
+
1236
+ if (Avx512_ZMM != NULL ) // If the feature is unsupported by the processor it will return NULL
1237
+ {
1238
+ for (int i = 0 ; i < MIN (FeatureLengthAvx512_ZMM / sizeof (ZmmRegister_t), _countof (titcontext->ZmmRegisters ) - FeatureLengthAvx / sizeof (XmmRegister_t)); i++)
1239
+ titcontext->ZmmRegisters [i + FeatureLengthAvx / sizeof (XmmRegister_t)] = Avx512_ZMM[i];
1240
+ }
1241
+
1242
+ if (Avx512_KMASK != NULL ) // If the feature is unsupported by the processor it will return NULL
1243
+ {
1244
+ // k0 is always 0, don't store it.
1245
+ for (int i = 1 ; i < MIN (FeatureLengthAvx512_KMASK / sizeof (ULONGLONG) - 1 , _countof (titcontext->Opmask )); i++)
1246
+ titcontext->Opmask [i - 1 ] = Avx512_KMASK[i];
1247
+ }
1248
+
1249
+ return true ;
1250
+ }
0 commit comments