1+ import winston from 'winston' ;
2+ import fs from 'fs' ;
13import { loadDataset } from '../utils/load-dataset' ;
24import { AVA , Spec } from '../../../src' ;
35
@@ -20,32 +22,107 @@ export type TestData = {
2022 */
2123export const runAdviseEvaluation = (
2224 selectQuestion : ( data : TestData ) => string ,
23- isPass : ( spec : Spec , answer : TestData [ 'answer' ] ) => boolean
25+ isPass : ( spec : Spec , answer : TestData [ 'answer' ] ) => boolean ,
26+ loggerPath ?: {
27+ info ?: string ;
28+ error ?: string ;
29+ }
2430) => {
2531 jest . setTimeout ( 3600000 ) ;
2632
33+ const errorLogPath = loggerPath ?. error || '__tests__/evaluation/advise/error.jsonl' ;
34+ const infoLogPath = loggerPath ?. info || '__tests__/evaluation/advise/info.jsonl' ;
35+
36+ // remove old log file
37+ if ( fs . existsSync ( errorLogPath ) ) {
38+ fs . rmSync ( errorLogPath ) ;
39+ }
40+ if ( fs . existsSync ( infoLogPath ) ) {
41+ fs . rmSync ( infoLogPath ) ;
42+ }
43+
44+ const logger = winston . createLogger ( {
45+ format : winston . format . json ( ) ,
46+ defaultMeta : { service : 'evaluate-service' } ,
47+ transports : [
48+ new winston . transports . File ( { filename : errorLogPath , level : 'error' } ) ,
49+ new winston . transports . File ( { filename : infoLogPath , level : 'info' } ) ,
50+ ] ,
51+ } ) ;
52+
2753 const ava = new AVA ( {
2854 llm : {
29- appId : process . env . TBOX_APP_ID ! ,
30- authorization : process . env . TBOX_AUTHORIZATION ! ,
55+ appId : process . env . TBOX_LLM_APP_ID ! ,
56+ authorization : process . env . TBOX_LLM_AUTH ! ,
3157 } ,
3258 } ) ;
3359
34- const evaluateChartAdvise = ( chartId : string ) => {
60+ const evaluateChartAdvise = async ( chartId : string ) => {
3561 const dataset = loadDataset ( chartId ) ;
3662
37- dataset . forEach ( ( data : TestData , i : number ) => {
38- it ( `evaluate ${ chartId } case ${ i } ` , async ( ) => {
39- console . log ( `evaluate ${ chartId } case ${ i } ` ) ;
40- const { answer } = data ;
41- const question = selectQuestion ( data ) ;
42- const dataShards = await ava . extract ( question ) ;
43- const advises = await ava . advise ( dataShards ) ;
44- const { spec } = advises ?. [ 0 ] ?. charts ?. [ 0 ] || { } ;
45- const success = isPass ( spec , answer ) ;
46- expect ( success ) . toEqual ( true ) ;
63+ const promises = dataset . map ( ( data : TestData , i : number ) => {
64+ return new Promise ( ( resolve ) => {
65+ it ( `evaluate ${ chartId } case ${ i } ` , async ( ) => {
66+ console . log ( `evaluate ${ chartId } case ${ i } ` ) ;
67+ const { answer } = data ;
68+ const question = selectQuestion ( data ) ;
69+ let dataShards = [ ] ;
70+ try {
71+ dataShards = await ava . extract ( question ) ;
72+ } catch ( e ) {
73+ logger . error ( {
74+ msg : 'extract error' ,
75+ input : question ,
76+ } ) ;
77+ }
78+ if ( dataShards . length === 0 ) {
79+ logger . error ( {
80+ msg : 'extract empty' ,
81+ input : question ,
82+ } ) ;
83+ resolve ( null ) ;
84+ return ;
85+ }
86+ try {
87+ const advises = await ava . advise ( dataShards ) ;
88+ const { spec } = advises ?. [ 0 ] ?. charts ?. [ 0 ] || { } ;
89+ logger . info ( {
90+ msg : 'advise success' ,
91+ input : question ,
92+ dataShards,
93+ output : spec ,
94+ source : answer ,
95+ } ) ;
96+ } catch ( e ) {
97+ logger . error ( {
98+ msg : 'advise error' ,
99+ input : question ,
100+ } ) ;
101+ }
102+ resolve ( null ) ;
103+ } ) ;
47104 } ) ;
48105 } ) ;
106+
107+ await Promise . all ( promises ) ;
108+
109+ it ( 'evaluate pass rate should >= 0.95' , ( ) => {
110+ const data = fs . readFileSync ( loggerPath ?. info ) ;
111+ const lines = data
112+ . toString ( )
113+ . split ( '\n' )
114+ . filter ( ( v ) => ! ! v . length ) ;
115+ const passCount = lines . filter ( ( line ) => {
116+ try {
117+ const log = JSON . parse ( line ) ;
118+ return isPass ( log . spec , log . source ) ;
119+ } catch ( e ) {
120+ return false ;
121+ }
122+ } ) . length ;
123+ console . log ( 'pass rate: ' , passCount / lines . length ) ;
124+ expect ( passCount / lines . length ) . toBeGreaterThanOrEqual ( 0.95 ) ;
125+ } ) ;
49126 } ;
50127
51128 const chartIds = [
0 commit comments