@@ -187,13 +187,32 @@ function blendedExactnessScore(provider: string, model: string) {
187
187
return 0 ;
188
188
}
189
189
190
- const { avgExactDistance, avgNumericDistance, avgFScore } =
190
+ const { totalMatches , exactMatches , avgExactDistance, avgNumericDistance, avgFScore } =
191
191
validationSummaries . modelStats [
192
192
modelKey as keyof typeof validationSummaries . modelStats
193
193
] ;
194
194
195
- // strong preference for exact, numeric as backup, fscore as minor fallback (it's correlated with jaccard)
196
- return blendScore ( avgExactDistance , avgNumericDistance , avgFScore ) ;
195
+ // Calculate match rates
196
+ const totalMatchRate = totalMatches / validationSummaries . totalQuestions ;
197
+ const exactMatchRate = exactMatches / validationSummaries . totalQuestions ;
198
+ const failedMatchRate = 1 - totalMatchRate ;
199
+
200
+ // Calculate quality score for successful matches
201
+ const qualityScore = blendScore ( avgExactDistance , avgNumericDistance , avgFScore ) ;
202
+
203
+ // Calculate comprehensive exactness score with penalties for failures
204
+ // Base score from successful matches
205
+ const baseScore = totalMatchRate * qualityScore ;
206
+
207
+ // Penalty for failed matches (each failed match reduces score)
208
+ const failurePenalty = failedMatchRate * 1 ; // score here if needed
209
+
210
+ // Bonus for exact matches
211
+ const exactMatchBonus = exactMatchRate * 1 ; // score here if needed
212
+
213
+ const comprehensiveScore = Math . max ( 0 , baseScore - failurePenalty + exactMatchBonus ) ;
214
+
215
+ return comprehensiveScore ;
197
216
}
198
217
199
218
function blendScore ( exact : number , numeric : number , fscore : number ) {
0 commit comments