Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions report-app/src/app/pages/report-list/report-list.html
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@
</ul>
}
</div>
@if (group.promptNames.length) {
<ul class="status-badge-group prompt-names">
@for (name of group.promptNames; track name) {
<li class="status-badge neutral">{{ name }}</li>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't this mean that 100+ prompts are shown as badges on the report? Not sure this is worth doing?

}
</ul>
}
</div>
<div class="run-meta-container">
<div class="run-meta">
Expand Down
9 changes: 9 additions & 0 deletions report-app/src/app/pages/report-list/report-list.scss
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ h1, h2 {
padding: 0 20px;
}

.prompt-names {
margin-top: 0.4rem;

.status-badge {
font-size: 0.75rem;
font-weight: 400;
}
}

.select-for-comparison input[type='checkbox'] {
width: 20px;
height: 20px;
Expand Down
3 changes: 3 additions & 0 deletions runner/orchestration/grouping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ export function groupSimilarReports(inputRuns: RunInfo[]): RunGroup[] {
const groupResults: AssessmentResult[] = [];
const firstRun = groupRuns[0];
const labels = new Set<string>();
const promptNames = new Set<string>();
let totalForGroup = 0;
let maxForGroup = 0;
let appsCount = 0;
Expand All @@ -70,6 +71,7 @@ export function groupSimilarReports(inputRuns: RunInfo[]): RunGroup[] {
totalForRun += result.score.totalPoints;
maxForRun += result.score.maxOverallPoints;
groupResults.push(result);
promptNames.add(result.promptDef.name);
}

// `|| 0` in case there are no results, otherwise we'll get NaN.
Expand All @@ -90,6 +92,7 @@ export function groupSimilarReports(inputRuns: RunInfo[]): RunGroup[] {
maxOverallPoints: maxForGroup / groupRuns.length || 0,
appsCount,
labels: Array.from(labels),
promptNames: Array.from(promptNames),
environmentId: firstRun.details.summary.environmentId,
framework: firstRun.details.summary.framework,
model: firstRun.details.summary.model,
Expand Down
5 changes: 5 additions & 0 deletions runner/reporting/report-local-disk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ export async function fetchReportsFromDisk(directory: string): Promise<FetchedLo
// were part of the same invocation. Add a unique suffix to the ID to
// prevent further grouping.
run.group = group.id = `${group.id}-l${index}`;

// Derive prompt names from the run data for backward compatibility
// with older groups.json files that don't have the field.
group.promptNames ??= run.results.map(r => r.promptDef.name);

data.set(group.id, {group, run});
}),
);
Expand Down
2 changes: 2 additions & 0 deletions runner/shared-interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,8 @@ export interface RunGroup {
};
/** Runner used to generate code for the runs in the group. */
runner?: CodegenRunnerInfo;
/** Names of prompts that were evaluated in this group. */
promptNames: string[];
}

/** Request information for a file generation. */
Expand Down