Skip to content

Commit 56df0c4

Browse files
Add trending views for both leaf-level scenario runs and aggregated scenario groups. (#6207)
* Update package dependencies * Minor tweaks; trigger a build. * Update outdated packages * Handle multiple executions in the report summary. * Add ability to view multiple versions of the trend data * Support adding tags to scenarios Also includes - Support for measuring and logging diagnostic data related to caching, latency and token usage for all LLM turns involved in each evaluation. - Report generation changes to view and filter scenarios by the tags above, and to display the above usage statistics under each scenario in the tree. - Changes to make some previously public types internal to avoid public API bloat. - Other miscellaneous bug fixes and improvements in the report generation and display. Fixes #6034 and #5970 * WIP * Updates to fix merge conflicts * Add score item-level trend data. * Rework tree to use node keys and add reporting context. * Move rendermarkdown to ReportContext * Move more central data into the ReportContext * Build history report; Refactor major components into files. * Report styling updates. * Fix npm linter errors * Update package-lock.json * Fixup package-lock.json * Rollback change to make chat response and messages optional. * Messages cannot be null * Don't show the selectors if there is no history. * Fix for duplicate data embedding. * Move tag selection into the ReportContext * Rename firstExecutionName * Change default trend capture from 1 to 10 * Make conversations 100% wide up to 72rem. * Update maxWidth on container * Set a maxwidth on section. --------- Co-authored-by: Shyam Namboodiripad <[email protected]>
1 parent 34cd461 commit 56df0c4

File tree

21 files changed

+1882
-1200
lines changed

21 files changed

+1882
-1200
lines changed

src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/Commands/ReportCommand.cs

+11
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,25 @@ internal async Task<int> InvokeAsync(
5151

5252
List<ScenarioRunResult> results = [];
5353

54+
string? latestExecutionName = null;
55+
5456
await foreach (string executionName in
5557
resultStore.GetLatestExecutionNamesAsync(lastN, cancellationToken).ConfigureAwait(false))
5658
{
59+
latestExecutionName ??= executionName;
60+
5761
await foreach (ScenarioRunResult result in
5862
resultStore.ReadResultsAsync(
5963
executionName,
6064
cancellationToken: cancellationToken).ConfigureAwait(false))
6165
{
66+
if (result.ExecutionName != latestExecutionName)
67+
{
68+
// Clear the chat data for following executions
69+
result.Messages = [];
70+
result.ModelResponse = new ChatResponse();
71+
}
72+
6273
results.Add(result);
6374

6475
logger.LogInformation("Execution: {executionName} Scenario: {scenarioName} Iteration: {iterationName}", result.ExecutionName, result.ScenarioName, result.IterationName);

src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/Program.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ private static async Task<int> Main(string[] args)
8585
};
8686
reportCmd.AddOption(outputOpt);
8787

88-
var lastNOpt = new Option<int>(["-n"], () => 1, "Number of most recent executions to include in the report.");
88+
var lastNOpt = new Option<int>(["-n"], () => 10, "Number of most recent executions to include in the report.");
8989
reportCmd.AddOption(lastNOpt);
9090

9191
var formatOpt =

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/azure-devops-report/src/main.tsx

+6-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ import * as SDK from "azure-devops-extension-sdk";
1010
import { getClient } from "azure-devops-extension-api";
1111
import { Build, Attachment, BuildRestClient } from "azure-devops-extension-api/Build";
1212
import { FluentProvider, webLightTheme } from '@fluentui/react-components';
13-
import { createScoreTree } from '../../components/Summary.ts';
13+
import { createScoreSummary as createScoreSummary } from '../../components/Summary.ts';
14+
import { ReportContextProvider } from '../../components/ReportContext.tsx';
1415

1516
const ErrorHtml = ({ message }: { message: string }) =>
1617
<html>
@@ -67,12 +68,14 @@ const run = async () => {
6768
throw new Error('No data was available to load.');
6869
}
6970

70-
const scoreTree = createScoreTree(dataset);
71+
const scoreSummary = createScoreSummary(dataset);
7172

7273
createRoot(document.getElementById('root')!).render(
7374
<FluentProvider theme={webLightTheme}>
7475
<StrictMode>
75-
<App tree={scoreTree} dataset={dataset} />
76+
<ReportContextProvider dataset={dataset} scoreSummary={scoreSummary}>
77+
<App />
78+
</ReportContextProvider>
7679
</StrictMode>
7780
</FluentProvider>
7881
);

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/App.tsx

+13-27
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,11 @@ import { Settings28Regular, FilterDismissRegular, Dismiss20Regular } from '@flue
66
import { Drawer, DrawerBody, DrawerHeader, DrawerHeaderTitle, Switch, Tooltip } from '@fluentui/react-components';
77
import { makeStyles } from '@fluentui/react-components';
88
import './App.css';
9-
import { ScoreNode } from './Summary';
109
import { ScenarioGroup } from './ScenarioTree';
1110
import { GlobalTagsDisplay, FilterableTagsDisplay, categorizeAndSortTags } from './TagsDisplay';
1211
import { tokens } from '@fluentui/react-components';
13-
14-
type AppProperties = {
15-
dataset: Dataset,
16-
tree: ScoreNode,
17-
};
12+
import { ScoreNodeHistory } from './ScoreNodeHistory';
13+
import { useReportContext } from './ReportContext';
1814

1915
const useStyles = makeStyles({
2016
header: {
@@ -26,6 +22,8 @@ const useStyles = makeStyles({
2622
zIndex: 1,
2723
paddingBottom: '12px',
2824
backgroundColor: tokens.colorNeutralBackground1,
25+
borderBottom: `1px solid ${tokens.colorNeutralStroke2}`,
26+
marginBottom: '1rem',
2927
},
3028
headerTop: {
3129
display: 'flex',
@@ -82,28 +80,16 @@ const useStyles = makeStyles({
8280
drawerBody: { paddingTop: '1rem' },
8381
});
8482

85-
function App({ dataset, tree }: AppProperties) {
83+
function App() {
8684
const classes = useStyles();
85+
const { dataset, scoreSummary, selectedTags, clearFilters } = useReportContext();
8786
const [isSettingsOpen, setIsSettingsOpen] = useState(false);
88-
const [renderMarkdown, setRenderMarkdown] = useState(true);
89-
const [selectedTags, setSelectedTags] = useState<string[]>([]);
90-
87+
const { renderMarkdown, setRenderMarkdown } = useReportContext();
9188
const { globalTags, filterableTags } = categorizeAndSortTags(dataset);
9289

9390
const toggleSettings = () => setIsSettingsOpen(!isSettingsOpen);
94-
const toggleRenderMarkdown = () => setRenderMarkdown(!renderMarkdown);
9591
const closeSettings = () => setIsSettingsOpen(false);
9692

97-
const handleTagClick = (tag: string) => {
98-
setSelectedTags((prevTags) =>
99-
prevTags.includes(tag) ? prevTags.filter((t) => t !== tag) : [...prevTags, tag]
100-
);
101-
};
102-
103-
const clearFilters = () => {
104-
setSelectedTags([]);
105-
};
106-
10793
return (
10894
<>
10995
<div className={classes.header}>
@@ -125,17 +111,17 @@ function App({ dataset, tree }: AppProperties) {
125111
</div>
126112
</div>
127113
<GlobalTagsDisplay globalTags={globalTags} />
114+
128115
<FilterableTagsDisplay
129116
filterableTags={filterableTags}
130-
onTagClick={handleTagClick}
131-
selectedTags={selectedTags}
132117
/>
118+
119+
<ScoreNodeHistory />
133120
</div>
134121

135122
<ScenarioGroup
136-
node={tree}
137-
renderMarkdown={renderMarkdown}
138-
selectedTags={selectedTags}
123+
node={scoreSummary.primaryResult}
124+
scoreSummary={scoreSummary}
139125
/>
140126

141127
<p className={classes.footerText}>
@@ -150,7 +136,7 @@ function App({ dataset, tree }: AppProperties) {
150136
<DrawerBody className={classes.drawerBody}>
151137
<Switch
152138
checked={renderMarkdown}
153-
onChange={toggleRenderMarkdown}
139+
onChange={(_ev, data) => setRenderMarkdown(data.checked)}
154140
label={<span className={classes.switchLabel}>Render markdown for conversations</span>}
155141
/>
156142
</DrawerBody>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import { Table, TableHeader, TableRow, TableHeaderCell, TableBody, TableCell } from "@fluentui/react-components";
2+
import { ChevronDown12Regular, ChevronRight12Regular, Warning16Regular, CheckmarkCircle16Regular, Copy16Regular } from "@fluentui/react-icons";
3+
import { useState } from "react";
4+
import { useStyles } from "./Styles";
5+
6+
7+
export const ChatDetailsSection = ({ chatDetails }: { chatDetails: ChatDetails; }) => {
8+
const classes = useStyles();
9+
const [isExpanded, setIsExpanded] = useState(false);
10+
11+
const totalTurns = chatDetails.turnDetails.length;
12+
const cachedTurns = chatDetails.turnDetails.filter(turn => turn.cacheHit === true).length;
13+
14+
const hasCacheKey = chatDetails.turnDetails.some(turn => turn.cacheKey !== undefined);
15+
const hasCacheStatus = chatDetails.turnDetails.some(turn => turn.cacheHit !== undefined);
16+
const hasModelInfo = chatDetails.turnDetails.some(turn => turn.model !== undefined);
17+
const hasInputTokens = chatDetails.turnDetails.some(turn => turn.usage?.inputTokenCount !== undefined);
18+
const hasOutputTokens = chatDetails.turnDetails.some(turn => turn.usage?.outputTokenCount !== undefined);
19+
const hasTotalTokens = chatDetails.turnDetails.some(turn => turn.usage?.totalTokenCount !== undefined);
20+
21+
const copyToClipboard = (text: string) => {
22+
navigator.clipboard.writeText(text);
23+
};
24+
return (
25+
<div className={classes.section}>
26+
<div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
27+
{isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
28+
<h3 className={classes.sectionHeaderText}>LLM Chat Diagnostic Details</h3>
29+
{hasCacheStatus && (
30+
<div className={classes.hint}>
31+
{cachedTurns != totalTurns ?
32+
<Warning16Regular className={classes.cacheMissIcon} /> :
33+
<CheckmarkCircle16Regular className={classes.cacheHitIcon} />}
34+
{cachedTurns}/{totalTurns} chat responses for this evaluation were fulfiled from cache
35+
</div>
36+
)}
37+
</div>
38+
39+
{isExpanded && (
40+
<div className={classes.sectionContainer}>
41+
<div className={classes.tableContainer}>
42+
<Table>
43+
<TableHeader>
44+
<TableRow>
45+
{hasCacheKey && <TableHeaderCell>Cache Key</TableHeaderCell>}
46+
{hasCacheStatus && <TableHeaderCell>Cache Status</TableHeaderCell>}
47+
<TableHeaderCell>Latency (s)</TableHeaderCell>
48+
{hasModelInfo && <TableHeaderCell>Model Used</TableHeaderCell>}
49+
{hasInputTokens && <TableHeaderCell>Input Tokens</TableHeaderCell>}
50+
{hasOutputTokens && <TableHeaderCell>Output Tokens</TableHeaderCell>}
51+
{hasTotalTokens && <TableHeaderCell>Total Tokens</TableHeaderCell>}
52+
</TableRow>
53+
</TableHeader>
54+
<TableBody>
55+
{chatDetails.turnDetails.map((turn, index) => (
56+
<TableRow key={index}>
57+
{hasCacheKey && (
58+
<TableCell className={classes.cacheKeyCell}>
59+
{turn.cacheKey ? (
60+
<div className={classes.cacheKeyContainer} title={turn.cacheKey}>
61+
<span className={classes.cacheKey}>
62+
{turn.cacheKey.substring(0, 8)}...
63+
</span>
64+
<button
65+
className={classes.copyButton}
66+
onClick={(e) => {
67+
e.stopPropagation();
68+
copyToClipboard(turn.cacheKey || "");
69+
}}
70+
title="Copy Cache Key"
71+
>
72+
<Copy16Regular />
73+
</button>
74+
</div>
75+
) : (
76+
<span className={classes.noCacheKey}>N/A</span>
77+
)}
78+
</TableCell>
79+
)}
80+
{hasCacheStatus && (
81+
<TableCell>
82+
{turn.cacheHit === true ?
83+
<span className={classes.cacheHit}>
84+
<CheckmarkCircle16Regular className={classes.cacheHitIcon} /> Hit
85+
</span> :
86+
<span className={classes.cacheMiss}>
87+
<Warning16Regular className={classes.cacheMissIcon} /> Miss
88+
</span>}
89+
</TableCell>
90+
)}
91+
<TableCell>{turn.latency.toFixed(2)}</TableCell>
92+
{hasModelInfo && <TableCell>{turn.model || '-'}</TableCell>}
93+
{hasInputTokens && <TableCell>{turn.usage?.inputTokenCount || '-'}</TableCell>}
94+
{hasOutputTokens && <TableCell>{turn.usage?.outputTokenCount || '-'}</TableCell>}
95+
{hasTotalTokens && <TableCell>{turn.usage?.totalTokenCount || '-'}</TableCell>}
96+
</TableRow>
97+
))}
98+
</TableBody>
99+
</Table>
100+
</div>
101+
</div>
102+
)}
103+
</div>
104+
);
105+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import { mergeClasses } from "@fluentui/react-components";
2+
import { ChevronDown12Regular, ChevronRight12Regular } from "@fluentui/react-icons";
3+
import { useState } from "react";
4+
import ReactMarkdown from "react-markdown";
5+
import { useReportContext } from "./ReportContext";
6+
import { useStyles } from "./Styles";
7+
import { ChatMessageDisplay } from "./Summary";
8+
9+
10+
export const ConversationDetails = ({ messages, model, usage }: {
11+
messages: ChatMessageDisplay[];
12+
model?: string;
13+
usage?: UsageDetails;
14+
}) => {
15+
const classes = useStyles();
16+
const [isExpanded, setIsExpanded] = useState(true);
17+
const { renderMarkdown } = useReportContext();
18+
19+
const isUserSide = (role: string) => role.toLowerCase() === 'user' || role.toLowerCase() === 'system';
20+
21+
const infoText = [
22+
model && `Model: ${model}`,
23+
usage?.inputTokenCount && `Input Tokens: ${usage.inputTokenCount}`,
24+
usage?.outputTokenCount && `Output Tokens: ${usage.outputTokenCount}`,
25+
usage?.totalTokenCount && `Total Tokens: ${usage.totalTokenCount}`,
26+
].filter(Boolean).join(' • ');
27+
28+
return (
29+
<div className={classes.section}>
30+
<div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
31+
{isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
32+
<h3 className={classes.sectionHeaderText}>Conversation</h3>
33+
{infoText && <div className={classes.hint}>{infoText}</div>}
34+
</div>
35+
36+
{isExpanded && (
37+
<div className={classes.sectionContainer}>
38+
{messages.map((message, index) => {
39+
const isFromUserSide = isUserSide(message.role);
40+
const messageRowClass = mergeClasses(
41+
classes.messageRow,
42+
isFromUserSide ? classes.userMessageRow : classes.assistantMessageRow
43+
);
44+
45+
return (
46+
<div key={index} className={messageRowClass}>
47+
<div className={classes.messageParticipantName}>{message.participantName}</div>
48+
<div className={classes.messageBubble}>
49+
{renderMarkdown ?
50+
<ReactMarkdown>{message.content}</ReactMarkdown> :
51+
<pre className={classes.preWrap}>{message.content}</pre>}
52+
</div>
53+
</div>
54+
);
55+
})}
56+
</div>
57+
)}
58+
</div>
59+
);
60+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import { DismissCircle16Regular, Warning16Regular, Info16Regular } from "@fluentui/react-icons";
2+
import { useStyles } from "./Styles";
3+
4+
5+
export const DiagnosticsContent = ({ diagnostics }: { diagnostics: EvaluationDiagnostic[]; }) => {
6+
const classes = useStyles();
7+
8+
const errorDiagnostics = diagnostics.filter(d => d.severity === "error");
9+
const warningDiagnostics = diagnostics.filter(d => d.severity === "warning");
10+
const infoDiagnostics = diagnostics.filter(d => d.severity === "informational");
11+
12+
return (
13+
<>
14+
{errorDiagnostics.map((diag, index) => (
15+
<div key={`error-${index}`} className={classes.failMessage}>
16+
<DismissCircle16Regular /> {diag.message}
17+
</div>
18+
))}
19+
{warningDiagnostics.map((diag, index) => (
20+
<div key={`warning-${index}`} className={classes.warningMessage}>
21+
<Warning16Regular /> {diag.message}
22+
</div>
23+
))}
24+
{infoDiagnostics.map((diag, index) => (
25+
<div key={`info-${index}`} className={classes.infoMessage}>
26+
<Info16Regular /> {diag.message}
27+
</div>
28+
))}
29+
</>
30+
);
31+
};

0 commit comments

Comments
 (0)