@@ -175,6 +175,17 @@ export function calculateRanks(metrics: ModelMetrics[]): ModelMetrics[] {
175175 } ) ;
176176}
177177
178+ /**
179+ * Calculates a comprehensive exactness score for a model based on validation results.
180+ *
181+ * The score combines:
182+ * 1. Match rate (what percentage of queries produced valid results)
183+ * 2. Quality of matches (how accurate the results are)
184+ * 3. Penalty for failures (reduces score for queries that failed)
185+ *
186+ * All calculations are done on a 0-1 scale internally for consistency,
187+ * then converted to 0-100 scale for final output.
188+ */
178189function blendedExactnessScore ( provider : string , model : string ) {
179190 const modelKey = `${ provider } /${ model } ` ;
180191
@@ -192,29 +203,40 @@ function blendedExactnessScore(provider: string, model: string) {
192203 modelKey as keyof typeof validationSummaries . modelStats
193204 ] ;
194205
195- // Calculate match rates
206+ // Calculate match rates (0-1 scale)
196207 const totalMatchRate = totalMatches / validationSummaries . totalQuestions ;
197208 const exactMatchRate = exactMatches / validationSummaries . totalQuestions ;
198209 const failedMatchRate = 1 - totalMatchRate ;
199210
200- // Calculate quality score for successful matches
201- const qualityScore = blendScore ( avgExactDistance , avgNumericDistance , avgFScore ) ;
211+ // Calculate quality score for successful matches (0-1 scale)
212+ // This already accounts for exact matches through the distance metrics
213+ const qualityScore = blendScore ( avgExactDistance , avgNumericDistance , avgFScore ) / 100 ;
202214
203- // Calculate comprehensive exactness score with penalties for failures
204- // Base score from successful matches
215+ // Calculate comprehensive exactness score
216+ // Base score: successful matches weighted by their quality
205217 const baseScore = totalMatchRate * qualityScore ;
206218
207- // Penalty for failed matches (each failed match reduces score)
208- const failurePenalty = failedMatchRate * 1 ; // score here if needed
219+ // Apply penalty for failed matches (reduces score proportionally)
220+ // Using a moderate penalty to avoid overly harsh scoring
221+ const failurePenalty = failedMatchRate * 0.3 ;
209222
210- // Bonus for exact matches
211- const exactMatchBonus = exactMatchRate * 1 ; // score here if needed
223+ // Apply bonus for exact matches (additional reward for perfect accuracy)
224+ const exactMatchBonus = exactMatchRate * 0. 1; // 10% bonus for exact matches
212225
213226 const comprehensiveScore = Math . max ( 0 , baseScore - failurePenalty + exactMatchBonus ) ;
214227
215- return comprehensiveScore ;
228+ // Convert back to 0-100 scale for consistency with other scores
229+ return Math . round ( comprehensiveScore * 100 ) ;
216230}
217231
232+ /**
233+ * Blends different distance metrics into a single quality score.
234+ *
235+ * @param exact - Exact distance metric (0 = perfect match, 1 = complete mismatch)
236+ * @param numeric - Numeric distance metric (0 = perfect match, 1 = complete mismatch)
237+ * @param fscore - F-score metric (0 = worst, 1 = best)
238+ * @returns Quality score on 0-100 scale (100 = perfect)
239+ */
218240function blendScore ( exact : number , numeric : number , fscore : number ) {
219241 return 100 * ( 0.65 * ( 1 - exact ) + 0.25 * ( 1 - numeric ) + 0.1 * fscore ) ;
220242}
0 commit comments