Skip to content

Commit c4482c7

Browse files
Yzingyanzhi.yan
andauthored
chore: add logger write through evaluate advise (#895)
Co-authored-by: yanzhi.yan <[email protected]>
1 parent 1488306 commit c4482c7

File tree

4 files changed

+105
-17
lines changed

4 files changed

+105
-17
lines changed

__tests__/evaluation/advise/shared.ts

Lines changed: 91 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import winston from 'winston';
2+
import fs from 'fs';
13
import { loadDataset } from '../utils/load-dataset';
24
import { AVA, Spec } from '../../../src';
35

@@ -20,32 +22,107 @@ export type TestData = {
2022
*/
2123
export const runAdviseEvaluation = (
2224
selectQuestion: (data: TestData) => string,
23-
isPass: (spec: Spec, answer: TestData['answer']) => boolean
25+
isPass: (spec: Spec, answer: TestData['answer']) => boolean,
26+
loggerPath?: {
27+
info?: string;
28+
error?: string;
29+
}
2430
) => {
2531
jest.setTimeout(3600000);
2632

33+
const errorLogPath = loggerPath?.error || '__tests__/evaluation/advise/error.jsonl';
34+
const infoLogPath = loggerPath?.info || '__tests__/evaluation/advise/info.jsonl';
35+
36+
// remove old log file
37+
if (fs.existsSync(errorLogPath)) {
38+
fs.rmSync(errorLogPath);
39+
}
40+
if (fs.existsSync(infoLogPath)) {
41+
fs.rmSync(infoLogPath);
42+
}
43+
44+
const logger = winston.createLogger({
45+
format: winston.format.json(),
46+
defaultMeta: { service: 'evaluate-service' },
47+
transports: [
48+
new winston.transports.File({ filename: errorLogPath, level: 'error' }),
49+
new winston.transports.File({ filename: infoLogPath, level: 'info' }),
50+
],
51+
});
52+
2753
const ava = new AVA({
2854
llm: {
29-
appId: process.env.TBOX_APP_ID!,
30-
authorization: process.env.TBOX_AUTHORIZATION!,
55+
appId: process.env.TBOX_LLM_APP_ID!,
56+
authorization: process.env.TBOX_LLM_AUTH!,
3157
},
3258
});
3359

34-
const evaluateChartAdvise = (chartId: string) => {
60+
const evaluateChartAdvise = async (chartId: string) => {
3561
const dataset = loadDataset(chartId);
3662

37-
dataset.forEach((data: TestData, i: number) => {
38-
it(`evaluate ${chartId} case ${i}`, async () => {
39-
console.log(`evaluate ${chartId} case ${i}`);
40-
const { answer } = data;
41-
const question = selectQuestion(data);
42-
const dataShards = await ava.extract(question);
43-
const advises = await ava.advise(dataShards);
44-
const { spec } = advises?.[0]?.charts?.[0] || {};
45-
const success = isPass(spec, answer);
46-
expect(success).toEqual(true);
63+
const promises = dataset.map((data: TestData, i: number) => {
64+
return new Promise((resolve) => {
65+
it(`evaluate ${chartId} case ${i}`, async () => {
66+
console.log(`evaluate ${chartId} case ${i}`);
67+
const { answer } = data;
68+
const question = selectQuestion(data);
69+
let dataShards = [];
70+
try {
71+
dataShards = await ava.extract(question);
72+
} catch (e) {
73+
logger.error({
74+
msg: 'extract error',
75+
input: question,
76+
});
77+
}
78+
if (dataShards.length === 0) {
79+
logger.error({
80+
msg: 'extract empty',
81+
input: question,
82+
});
83+
resolve(null);
84+
return;
85+
}
86+
try {
87+
const advises = await ava.advise(dataShards);
88+
const { spec } = advises?.[0]?.charts?.[0] || {};
89+
logger.info({
90+
msg: 'advise success',
91+
input: question,
92+
dataShards,
93+
output: spec,
94+
source: answer,
95+
});
96+
} catch (e) {
97+
logger.error({
98+
msg: 'advise error',
99+
input: question,
100+
});
101+
}
102+
resolve(null);
103+
});
47104
});
48105
});
106+
107+
await Promise.all(promises);
108+
109+
it('evaluate pass rate should >= 0.95', () => {
110+
const data = fs.readFileSync(loggerPath?.info);
111+
const lines = data
112+
.toString()
113+
.split('\n')
114+
.filter((v) => !!v.length);
115+
const passCount = lines.filter((line) => {
116+
try {
117+
const log = JSON.parse(line);
118+
return isPass(log.spec, log.source);
119+
} catch (e) {
120+
return false;
121+
}
122+
}).length;
123+
console.log('pass rate: ', passCount / lines.length);
124+
expect(passCount / lines.length).toBeGreaterThanOrEqual(0.95);
125+
});
49126
};
50127

51128
const chartIds = [

__tests__/evaluation/advise/specify-chart.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,9 @@ runAdviseEvaluation(
1010
(spec: Spec, answer: TestData['answer']) => {
1111
const { type, ...finalSpec } = spec || {};
1212
return type === answer.type && CHARTS[type] && validateObject(CHARTS[type].zodSchema, finalSpec) === true;
13+
},
14+
{
15+
info: '__tests__/evaluation/advise/specify-chart-info.jsonl',
16+
error: '__tests__/evaluation/advise/specify-chart-error.jsonl',
1317
}
1418
);

__tests__/evaluation/advise/unspecified-chart.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,9 @@ runAdviseEvaluation(
2626
(spec: Spec, answer: TestData['answer']) => {
2727
const { type, ...finalSpec } = spec || {};
2828
return typeMatches(type, answer.type) && validateObject(CHARTS[type].zodSchema, finalSpec) === true;
29+
},
30+
{
31+
info: '__tests__/evaluation/advise/unspecified-chart-info.jsonl',
32+
error: '__tests__/evaluation/advise/unspecified-chart-error.jsonl',
2933
}
3034
);

__tests__/evaluation/extract/index.test.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { generate } from './generate';
44
import { evaluate } from './evaluate';
55
import { EVALUATE_RESULT_LOG_PATH } from './constants';
66

7-
describe('expect evaluate pass rate >= 90%', () => {
7+
describe('expect evaluate pass rate >= 95%', () => {
88
it('generate success rate >= 95%', async () => {
99
const { success, total } = await generate();
1010
expect(success / total).toBeGreaterThanOrEqual(0.95);
@@ -15,9 +15,12 @@ describe('expect evaluate pass rate >= 90%', () => {
1515
expect(success / total).toBeGreaterThanOrEqual(0.95);
1616
}, 6000000);
1717

18-
it('evaluate extract pass rate >= 90%', () => {
18+
it('evaluate extract pass rate >= 95%', () => {
1919
const content = fs.readFileSync(EVALUATE_RESULT_LOG_PATH);
20-
const lines = content.toString().split('\n').filter(line => line.length > 0);
20+
const lines = content
21+
.toString()
22+
.split('\n')
23+
.filter((line) => line.length > 0);
2124
let total = lines.length;
2225
let pass = 0;
2326

0 commit comments

Comments
 (0)