@@ -19,175 +19,63 @@ type SegmentKey = { channel: number } | { speaker_index: number; channel: number
1919export function buildSegments <
2020 TFinal extends WordLike ,
2121 TPartial extends WordLike ,
22- TWord extends SegmentWord = SegmentWord ,
2322> (
2423 finalWords : readonly TFinal [ ] ,
2524 partialWords : readonly TPartial [ ] ,
26- transform ?: ( word : SegmentWord ) => TWord ,
27- ) : Segment < TWord > [ ] {
28- const mapWord = transform ?? ( ( word ) => word as TWord ) ;
29- const wordsByChannel = groupWordsByChannel ( finalWords , partialWords , mapWord ) ;
30- return createSpeakerTurns ( wordsByChannel ) ;
31- }
32-
33- function toSegmentWord ( word : WordLike , isFinal : boolean ) : SegmentWord {
34- return {
35- text : word . text ,
36- start_ms : word . start_ms ,
37- end_ms : word . end_ms ,
38- channel : word . channel ,
39- isFinal,
40- } ;
41- }
42-
43- function addWordToChannel < TWord extends SegmentWord > (
44- channels : Map < number , TWord [ ] > ,
45- word : TWord ,
46- ) : void {
47- const channelWords = channels . get ( word . channel ) ?? [ ] ;
48- channelWords . push ( word ) ;
49- channels . set ( word . channel , channelWords ) ;
50- }
51-
52- function groupWordsByChannel <
53- TFinal extends WordLike ,
54- TPartial extends WordLike ,
55- TWord extends SegmentWord ,
56- > (
57- finalWords : readonly TFinal [ ] ,
58- partialWords : readonly TPartial [ ] ,
59- mapWord : ( word : SegmentWord ) => TWord ,
60- ) : Map < number , TWord [ ] > {
61- const channels = new Map < number , TWord [ ] > ( ) ;
62-
63- for ( const word of finalWords ) {
64- addWordToChannel ( channels , mapWord ( toSegmentWord ( word , true ) ) ) ;
65- }
66-
67- for ( const word of partialWords ) {
68- addWordToChannel ( channels , mapWord ( toSegmentWord ( word , false ) ) ) ;
69- }
70-
71- for ( const words of channels . values ( ) ) {
72- words . sort ( ( a , b ) => a . start_ms - b . start_ms ) ;
73- }
74-
75- return channels ;
25+ ) : Segment [ ] {
26+ const allWords : SegmentWord [ ] = [
27+ ...finalWords . map ( ( word ) => ( {
28+ text : word . text ,
29+ start_ms : word . start_ms ,
30+ end_ms : word . end_ms ,
31+ channel : word . channel ,
32+ isFinal : true ,
33+ } ) ) ,
34+ ...partialWords . map ( ( word ) => ( {
35+ text : word . text ,
36+ start_ms : word . start_ms ,
37+ end_ms : word . end_ms ,
38+ channel : word . channel ,
39+ isFinal : false ,
40+ } ) ) ,
41+ ] ;
42+
43+ return createSpeakerTurns ( allWords ) ;
7644}
7745
78- function flattenAndSortWords < TWord extends SegmentWord > (
79- wordsByChannel : Map < number , TWord [ ] > ,
80- ) : TWord [ ] {
81- const allWords : TWord [ ] = [ ] ;
82- wordsByChannel . forEach ( ( words ) => allWords . push ( ...words ) ) ;
83- allWords . sort ( ( a , b ) => a . start_ms - b . start_ms ) ;
84- return allWords ;
85- }
86-
87- function splitIntoInitialTurns < TWord extends SegmentWord > (
88- sortedWords : TWord [ ] ,
46+ function createSpeakerTurns < TWord extends SegmentWord > (
47+ words : TWord [ ] ,
48+ maxGapMs = 2000 ,
8949) : Segment < TWord > [ ] {
90- if ( sortedWords . length === 0 ) {
50+ if ( words . length === 0 ) {
9151 return [ ] ;
9252 }
9353
94- const turns : Segment < TWord > [ ] = [ ] ;
95- let currentTurn : Segment < TWord > = {
96- key : { channel : sortedWords [ 0 ] . channel } ,
97- words : [ sortedWords [ 0 ] ] ,
98- } ;
54+ const sortedWords = [ ...words ] . sort ( ( a , b ) => a . start_ms - b . start_ms ) ;
55+ const segments : Segment < TWord > [ ] = [ ] ;
56+ const currentByChannel = new Map < number , Segment < TWord > > ( ) ;
9957
100- for ( let i = 1 ; i < sortedWords . length ; i ++ ) {
101- const word = sortedWords [ i ] ;
58+ for ( const word of sortedWords ) {
59+ const current = currentByChannel . get ( word . channel ) ;
10260
103- if ( word . channel === currentTurn . key . channel ) {
104- currentTurn . words . push ( word ) ;
105- } else {
106- turns . push ( currentTurn ) ;
107- currentTurn = { key : { channel : word . channel } , words : [ word ] } ;
61+ if ( ! current ) {
62+ const newSegment = { key : { channel : word . channel } , words : [ word ] } ;
63+ currentByChannel . set ( word . channel , newSegment ) ;
64+ segments . push ( newSegment ) ;
65+ continue ;
10866 }
109- }
110-
111- turns . push ( currentTurn ) ;
112- return turns ;
113- }
114-
115- function groupSegmentsByChannel < TWord extends SegmentWord > (
116- segments : Segment < TWord > [ ] ,
117- ) : Map < number , Segment < TWord > [ ] > {
118- const byChannel = new Map < number , Segment < TWord > [ ] > ( ) ;
119-
120- for ( const segment of segments ) {
121- const channelSegments = byChannel . get ( segment . key . channel ) ?? [ ] ;
122- channelSegments . push ( segment ) ;
123- byChannel . set ( segment . key . channel , channelSegments ) ;
124- }
125-
126- return byChannel ;
127- }
128-
129- function getSegmentStartTime < TWord extends SegmentWord > (
130- segment : Segment < TWord > ,
131- ) : number {
132- return segment . words [ 0 ] ?. start_ms ?? 0 ;
133- }
134-
135- function calculateTimingGap < TWord extends SegmentWord > (
136- firstSegment : Segment < TWord > ,
137- secondSegment : Segment < TWord > ,
138- ) : number {
139- if ( firstSegment . words . length === 0 || secondSegment . words . length === 0 ) {
140- return Infinity ;
141- }
142-
143- const lastWordOfFirst = firstSegment . words [ firstSegment . words . length - 1 ] ;
144- const firstWordOfSecond = secondSegment . words [ 0 ] ;
145- return firstWordOfSecond . start_ms - lastWordOfFirst . end_ms ;
146- }
147-
148- function mergeSegmentsByGap < TWord extends SegmentWord > (
149- segments : Segment < TWord > [ ] ,
150- channel : number ,
151- maxGapMs : number ,
152- ) : Segment < TWord > [ ] {
153- segments . sort ( ( a , b ) => getSegmentStartTime ( a ) - getSegmentStartTime ( b ) ) ;
154-
155- const merged : Segment < TWord > [ ] = [ ] ;
156- let currentMerged = { key : { channel } , words : [ ...segments [ 0 ] . words ] } ;
15767
158- for ( let i = 1 ; i < segments . length ; i ++ ) {
159- const nextSegment = segments [ i ] ;
160- const gap = calculateTimingGap ( currentMerged , nextSegment ) ;
68+ const lastWord = current . words [ current . words . length - 1 ] ;
69+ const gap = word . start_ms - lastWord . end_ms ;
16170
162- if ( gap < maxGapMs ) {
163- currentMerged . words . push ( ... nextSegment . words ) ;
71+ if ( gap <= maxGapMs ) {
72+ current . words . push ( word ) ;
16473 } else {
165- merged . push ( currentMerged ) ;
166- currentMerged = { key : { channel } , words : [ ...nextSegment . words ] } ;
74+ const newSegment = { key : { channel : word . channel } , words : [ word ] } ;
75+ currentByChannel . set ( word . channel , newSegment ) ;
76+ segments . push ( newSegment ) ;
16777 }
16878 }
16979
170- merged . push ( currentMerged ) ;
171- return merged ;
172- }
173-
174- function createSpeakerTurns < TWord extends SegmentWord > (
175- wordsByChannel : Map < number , TWord [ ] > ,
176- maxGapMs = 2000 ,
177- ) : Segment < TWord > [ ] {
178- const sortedWords = flattenAndSortWords ( wordsByChannel ) ;
179- if ( sortedWords . length === 0 ) {
180- return [ ] ;
181- }
182-
183- const initialTurns = splitIntoInitialTurns ( sortedWords ) ;
184- const turnsByChannel = groupSegmentsByChannel ( initialTurns ) ;
185-
186- const finalSegments : Segment < TWord > [ ] = [ ] ;
187- for ( const [ channel , channelTurns ] of turnsByChannel ) {
188- finalSegments . push ( ...mergeSegmentsByGap ( channelTurns , channel , maxGapMs ) ) ;
189- }
190-
191- finalSegments . sort ( ( a , b ) => getSegmentStartTime ( a ) - getSegmentStartTime ( b ) ) ;
192- return finalSegments ;
80+ return segments ;
19381}
0 commit comments