Skip to content

Commit 9d0aea7

Browse files
committed
refactor: improve exactness score calculation in evaluation
1 parent 55f5915 commit 9d0aea7

File tree

1 file changed

+22
-3
lines changed

1 file changed

+22
-3
lines changed

src/src/lib/eval.ts

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,13 +187,32 @@ function blendedExactnessScore(provider: string, model: string) {
187187
return 0;
188188
}
189189

190-
const { avgExactDistance, avgNumericDistance, avgFScore } =
190+
const { totalMatches, exactMatches, avgExactDistance, avgNumericDistance, avgFScore } =
191191
validationSummaries.modelStats[
192192
modelKey as keyof typeof validationSummaries.modelStats
193193
];
194194

195-
// strong preference for exact, numeric as backup, fscore as minor fallback (it's correlated with jaccard)
196-
return blendScore(avgExactDistance, avgNumericDistance, avgFScore);
195+
// Calculate match rates
196+
const totalMatchRate = totalMatches / validationSummaries.totalQuestions;
197+
const exactMatchRate = exactMatches / validationSummaries.totalQuestions;
198+
const failedMatchRate = 1 - totalMatchRate;
199+
200+
// Calculate quality score for successful matches
201+
const qualityScore = blendScore(avgExactDistance, avgNumericDistance, avgFScore);
202+
203+
// Calculate comprehensive exactness score with penalties for failures
204+
// Base score from successful matches
205+
const baseScore = totalMatchRate * qualityScore;
206+
207+
// Penalty for failed matches (each failed match reduces score)
208+
const failurePenalty = failedMatchRate * 1; // score here if needed
209+
210+
// Bonus for exact matches
211+
const exactMatchBonus = exactMatchRate * 1; // score here if needed
212+
213+
const comprehensiveScore = Math.max(0, baseScore - failurePenalty + exactMatchBonus);
214+
215+
return comprehensiveScore;
197216
}
198217

199218
function blendScore(exact: number, numeric: number, fscore: number) {

0 commit comments

Comments
 (0)