@@ -2,12 +2,11 @@ import { create as mutate } from "mutative";
22import type { StoreApi } from "zustand" ;
33
44import type { StreamResponse , Word } from "@hypr/plugin-listener" ;
5- import * as main from "../../tinybase/main " ;
5+ import type { WordLike } from "../../../utils/segment " ;
66
7- type PartialWord = Pick < main . Word , "text" | "start_ms" | "end_ms" | "channel" > ;
8- type WordsByChannel = Record < number , PartialWord [ ] > ;
7+ type WordsByChannel = Record < number , WordLike [ ] > ;
98
10- export type HandlePersistCallback = ( words : PartialWord [ ] ) => void ;
9+ export type HandlePersistCallback = ( words : WordLike [ ] ) => void ;
1110
1211export type TranscriptState = {
1312 partialWordsByChannel : WordsByChannel ;
@@ -25,127 +24,118 @@ const initialState: TranscriptState = {
2524 handlePersist : undefined ,
2625} ;
2726
28- const sanitizeWords = (
29- rawWords : Word [ ] ,
30- channelIndex : number ,
31- ) : { words : PartialWord [ ] } => {
32- const trimmed = rawWords . reduce < PartialWord [ ] > ( ( acc , word ) => {
33- const text = word . word . trim ( ) ;
34- if ( ! text ) {
35- return acc ;
36- }
37-
38- const start_ms = Math . round ( word . start * 1000 ) ;
39- const end_ms = Math . round ( word . end * 1000 ) ;
40-
41- acc . push ( {
42- text,
43- start_ms,
44- end_ms,
45- channel : channelIndex ,
46- } ) ;
47-
48- return acc ;
49- } , [ ] ) ;
50-
51- if ( ! trimmed . length ) {
52- return { words : trimmed } ;
53- }
54-
55- const merged : PartialWord [ ] = [ ] ;
56-
57- for ( let i = 0 ; i < trimmed . length ; i ++ ) {
58- const word = trimmed [ i ] ;
59- if ( merged . length > 0 && word . text . startsWith ( "'" ) ) {
60- const previous = merged [ merged . length - 1 ] ;
61- merged [ merged . length - 1 ] = {
62- ...previous ,
63- text : `${ previous . text } ${ word . text } ` ,
64- end_ms : word . end_ms ,
65- } ;
66- continue ;
67- }
68-
69- merged . push ( word ) ;
70- }
71-
72- return { words : merged } ;
73- } ;
74-
7527export const createTranscriptSlice = < T extends TranscriptState & TranscriptActions > (
7628 set : StoreApi < T > [ "setState" ] ,
7729 get : StoreApi < T > [ "getState" ] ,
78- ) : TranscriptState & TranscriptActions => ( {
79- ...initialState ,
80- setTranscriptPersist : ( callback ) => {
30+ ) : TranscriptState & TranscriptActions => {
31+ const handleFinalWords = (
32+ channelIndex : number ,
33+ words : WordLike [ ] ,
34+ ) : void => {
35+ const { partialWordsByChannel, handlePersist } = get ( ) ;
36+
37+ const remaining = ( partialWordsByChannel [ channelIndex ] ?? [ ] )
38+ . filter ( ( word ) => word . start_ms > getLastEndMs ( words ) ) ;
39+
8140 set ( ( state ) =>
8241 mutate ( state , ( draft ) => {
83- draft . handlePersist = callback ;
42+ draft . partialWordsByChannel [ channelIndex ] = remaining ;
8443 } )
8544 ) ;
86- } ,
87- handleTranscriptResponse : ( response ) => {
88- if ( response . type !== "Results" ) {
89- return ;
90- }
9145
92- const channelIndex = response . channel_index [ 0 ] ;
93- const alternative = response . channel . alternatives [ 0 ] ;
46+ handlePersist ?. ( words ) ;
47+ } ;
9448
95- if ( channelIndex === undefined || ! alternative ) {
96- return ;
97- }
98-
99- const { partialWordsByChannel, handlePersist } = get ( ) ;
49+ const handlePartialWords = (
50+ channelIndex : number ,
51+ words : WordLike [ ] ,
52+ ) : void => {
53+ const { partialWordsByChannel } = get ( ) ;
54+ const existing = partialWordsByChannel [ channelIndex ] ?? [ ] ;
10055
101- const { words } = sanitizeWords ( alternative . words ?? [ ] , channelIndex ) ;
56+ const [
57+ before ,
58+ after ,
59+ ] = [
60+ existing . filter ( ( word ) => word . end_ms <= getFirstStartMs ( words ) ) ,
61+ existing . filter ( ( word ) => word . start_ms >= getLastEndMs ( words ) ) ,
62+ ] ;
10263
103- if ( ! words . length ) {
104- return ;
105- }
64+ set ( ( state ) =>
65+ mutate ( state , ( draft ) => {
66+ draft . partialWordsByChannel [ channelIndex ] = [ ...before , ...words , ...after ] ;
67+ } )
68+ ) ;
69+ } ;
10670
107- if ( response . is_final ) {
108- const lastEndMs = words [ words . length - 1 ] ?. end_ms ?? 0 ;
109- const remaining = ( partialWordsByChannel [ channelIndex ] ?? [ ] ) . filter (
110- ( word ) => word . start_ms > lastEndMs ,
71+ return {
72+ ...initialState ,
73+ setTranscriptPersist : ( callback ) => {
74+ set ( ( state ) =>
75+ mutate ( state , ( draft ) => {
76+ draft . handlePersist = callback ;
77+ } )
11178 ) ;
79+ } ,
80+ handleTranscriptResponse : ( response ) => {
81+ if ( response . type !== "Results" ) {
82+ return ;
83+ }
84+
85+ const channelIndex = response . channel_index [ 0 ] ;
86+ const alternative = response . channel . alternatives [ 0 ] ;
87+ if ( channelIndex === undefined || ! alternative ) {
88+ return ;
89+ }
90+
91+ const words = transformWords ( alternative . words ?? [ ] , channelIndex ) ;
92+ if ( ! words . length ) {
93+ return ;
94+ }
95+
96+ if ( response . is_final ) {
97+ handleFinalWords ( channelIndex , words ) ;
98+ } else {
99+ handlePartialWords ( channelIndex , words ) ;
100+ }
101+ } ,
102+ resetTranscript : ( ) => {
103+ const { partialWordsByChannel, handlePersist } = get ( ) ;
104+
105+ const remainingWords = Object . values ( partialWordsByChannel ) . flat ( ) ;
106+ if ( remainingWords . length > 0 ) {
107+ handlePersist ?.( remainingWords ) ;
108+ }
112109
113110 set ( ( state ) =>
114111 mutate ( state , ( draft ) => {
115- draft . partialWordsByChannel [ channelIndex ] = remaining ;
112+ draft . partialWordsByChannel = { } ;
113+ draft . handlePersist = undefined ;
116114 } )
117115 ) ;
116+ } ,
117+ } ;
118+ } ;
118119
119- handlePersist ?.( words ) ;
120- return ;
121- }
122-
123- const existing = partialWordsByChannel [ channelIndex ] ?? [ ] ;
124- const firstStartMs = words [ 0 ] ?. start_ms ?? 0 ;
125- const lastEndMs = words [ words . length - 1 ] ?. end_ms ?? 0 ;
120+ const getLastEndMs = ( words : WordLike [ ] ) : number => words [ words . length - 1 ] ?. end_ms ?? 0 ;
121+ const getFirstStartMs = ( words : WordLike [ ] ) : number => words [ 0 ] ?. start_ms ?? 0 ;
126122
127- const before = existing . filter ( ( word ) => word . end_ms <= firstStartMs ) ;
128- const after = existing . filter ( ( word ) => word . start_ms >= lastEndMs ) ;
123+ function transformWords (
124+ rawWords : Word [ ] ,
125+ channelIndex : number ,
126+ ) : WordLike [ ] {
127+ const result : WordLike [ ] = [ ] ;
129128
130- set ( ( state ) =>
131- mutate ( state , ( draft ) => {
132- draft . partialWordsByChannel [ channelIndex ] = [ ...before , ...words , ...after ] ;
133- } )
134- ) ;
135- } ,
136- resetTranscript : ( ) => {
137- const { partialWordsByChannel, handlePersist } = get ( ) ;
129+ for ( const word of rawWords ) {
130+ const text = word . word ;
138131
139- const remainingWords = Object . values ( partialWordsByChannel ) . flat ( ) ;
140- if ( remainingWords . length > 0 && handlePersist ) {
141- handlePersist ( remainingWords ) ;
142- }
132+ result . push ( {
133+ text,
134+ start_ms : Math . round ( word . start * 1000 ) ,
135+ end_ms : Math . round ( word . end * 1000 ) ,
136+ channel : channelIndex ,
137+ } ) ;
138+ }
143139
144- set ( ( state ) =>
145- mutate ( state , ( draft ) => {
146- draft . partialWordsByChannel = { } ;
147- draft . handlePersist = undefined ;
148- } )
149- ) ;
150- } ,
151- } ) ;
140+ return result ;
141+ }
0 commit comments