Skip to content

Commit 8d3a392

Browse files
committed
more segementing simplication
1 parent 50c74a7 commit 8d3a392

File tree

2 files changed

+46
-177
lines changed

2 files changed

+46
-177
lines changed

apps/desktop/src/utils/segment.test.ts

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, expect, test } from "vitest";
2-
import { buildSegments, type SegmentWord } from "./segment";
2+
import { buildSegments } from "./segment";
33

44
describe("buildSegments", () => {
55
const testCases = [
@@ -82,10 +82,10 @@ describe("buildSegments", () => {
8282
],
8383
},
8484
{
85-
name: "does not merge speaker turns once the max gap is reached",
85+
name: "does not merge speaker turns once it exceeds the max gap",
8686
finalWords: [
8787
{ text: "first", start_ms: 0, end_ms: 100, channel: 0 },
88-
{ text: "return", start_ms: 2100, end_ms: 2200, channel: 0 },
88+
{ text: "return", start_ms: 2101, end_ms: 2201, channel: 0 },
8989
{ text: "other", start_ms: 150, end_ms: 200, channel: 1 },
9090
],
9191
partialWords: [],
@@ -104,25 +104,6 @@ describe("buildSegments", () => {
104104
}),
105105
],
106106
},
107-
{
108-
name: "applies the provided transform to every word",
109-
finalWords: [
110-
{ text: "final", start_ms: 0, end_ms: 100, channel: 0 },
111-
],
112-
partialWords: [
113-
{ text: "partial", start_ms: 120, end_ms: 160, channel: 0 },
114-
],
115-
transform: (word: SegmentWord) => ({ ...word, label: word.isFinal ? "final" : "partial" }),
116-
expected: [
117-
expect.objectContaining({
118-
key: { channel: 0 },
119-
words: [
120-
expect.objectContaining({ text: "final", label: "final" }),
121-
expect.objectContaining({ text: "partial", label: "partial" }),
122-
],
123-
}),
124-
],
125-
},
126107
{
127108
name: "merges when gap is exactly at maxGapMs threshold (2000ms)",
128109
finalWords: [
@@ -197,8 +178,8 @@ describe("buildSegments", () => {
197178
},
198179
];
199180

200-
test.each(testCases)("$name", ({ finalWords, partialWords, transform, expected }) => {
201-
const segments = buildSegments(finalWords, partialWords, transform);
181+
test.each(testCases)("$name", ({ finalWords, partialWords, expected }) => {
182+
const segments = buildSegments(finalWords, partialWords);
202183
expect(segments).toEqual(expected);
203184
});
204185
});

apps/desktop/src/utils/segment.ts

Lines changed: 41 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -19,175 +19,63 @@ type SegmentKey = { channel: number } | { speaker_index: number; channel: number
1919
export function buildSegments<
2020
TFinal extends WordLike,
2121
TPartial extends WordLike,
22-
TWord extends SegmentWord = SegmentWord,
2322
>(
2423
finalWords: readonly TFinal[],
2524
partialWords: readonly TPartial[],
26-
transform?: (word: SegmentWord) => TWord,
27-
): Segment<TWord>[] {
28-
const mapWord = transform ?? ((word) => word as TWord);
29-
const wordsByChannel = groupWordsByChannel(finalWords, partialWords, mapWord);
30-
return createSpeakerTurns(wordsByChannel);
31-
}
32-
33-
function toSegmentWord(word: WordLike, isFinal: boolean): SegmentWord {
34-
return {
35-
text: word.text,
36-
start_ms: word.start_ms,
37-
end_ms: word.end_ms,
38-
channel: word.channel,
39-
isFinal,
40-
};
41-
}
42-
43-
function addWordToChannel<TWord extends SegmentWord>(
44-
channels: Map<number, TWord[]>,
45-
word: TWord,
46-
): void {
47-
const channelWords = channels.get(word.channel) ?? [];
48-
channelWords.push(word);
49-
channels.set(word.channel, channelWords);
50-
}
51-
52-
function groupWordsByChannel<
53-
TFinal extends WordLike,
54-
TPartial extends WordLike,
55-
TWord extends SegmentWord,
56-
>(
57-
finalWords: readonly TFinal[],
58-
partialWords: readonly TPartial[],
59-
mapWord: (word: SegmentWord) => TWord,
60-
): Map<number, TWord[]> {
61-
const channels = new Map<number, TWord[]>();
62-
63-
for (const word of finalWords) {
64-
addWordToChannel(channels, mapWord(toSegmentWord(word, true)));
65-
}
66-
67-
for (const word of partialWords) {
68-
addWordToChannel(channels, mapWord(toSegmentWord(word, false)));
69-
}
70-
71-
for (const words of channels.values()) {
72-
words.sort((a, b) => a.start_ms - b.start_ms);
73-
}
74-
75-
return channels;
25+
): Segment[] {
26+
const allWords: SegmentWord[] = [
27+
...finalWords.map((word) => ({
28+
text: word.text,
29+
start_ms: word.start_ms,
30+
end_ms: word.end_ms,
31+
channel: word.channel,
32+
isFinal: true,
33+
})),
34+
...partialWords.map((word) => ({
35+
text: word.text,
36+
start_ms: word.start_ms,
37+
end_ms: word.end_ms,
38+
channel: word.channel,
39+
isFinal: false,
40+
})),
41+
];
42+
43+
return createSpeakerTurns(allWords);
7644
}
7745

78-
function flattenAndSortWords<TWord extends SegmentWord>(
79-
wordsByChannel: Map<number, TWord[]>,
80-
): TWord[] {
81-
const allWords: TWord[] = [];
82-
wordsByChannel.forEach((words) => allWords.push(...words));
83-
allWords.sort((a, b) => a.start_ms - b.start_ms);
84-
return allWords;
85-
}
86-
87-
function splitIntoInitialTurns<TWord extends SegmentWord>(
88-
sortedWords: TWord[],
46+
function createSpeakerTurns<TWord extends SegmentWord>(
47+
words: TWord[],
48+
maxGapMs = 2000,
8949
): Segment<TWord>[] {
90-
if (sortedWords.length === 0) {
50+
if (words.length === 0) {
9151
return [];
9252
}
9353

94-
const turns: Segment<TWord>[] = [];
95-
let currentTurn: Segment<TWord> = {
96-
key: { channel: sortedWords[0].channel },
97-
words: [sortedWords[0]],
98-
};
54+
const sortedWords = [...words].sort((a, b) => a.start_ms - b.start_ms);
55+
const segments: Segment<TWord>[] = [];
56+
const currentByChannel = new Map<number, Segment<TWord>>();
9957

100-
for (let i = 1; i < sortedWords.length; i++) {
101-
const word = sortedWords[i];
58+
for (const word of sortedWords) {
59+
const current = currentByChannel.get(word.channel);
10260

103-
if (word.channel === currentTurn.key.channel) {
104-
currentTurn.words.push(word);
105-
} else {
106-
turns.push(currentTurn);
107-
currentTurn = { key: { channel: word.channel }, words: [word] };
61+
if (!current) {
62+
const newSegment = { key: { channel: word.channel }, words: [word] };
63+
currentByChannel.set(word.channel, newSegment);
64+
segments.push(newSegment);
65+
continue;
10866
}
109-
}
110-
111-
turns.push(currentTurn);
112-
return turns;
113-
}
114-
115-
function groupSegmentsByChannel<TWord extends SegmentWord>(
116-
segments: Segment<TWord>[],
117-
): Map<number, Segment<TWord>[]> {
118-
const byChannel = new Map<number, Segment<TWord>[]>();
119-
120-
for (const segment of segments) {
121-
const channelSegments = byChannel.get(segment.key.channel) ?? [];
122-
channelSegments.push(segment);
123-
byChannel.set(segment.key.channel, channelSegments);
124-
}
125-
126-
return byChannel;
127-
}
128-
129-
function getSegmentStartTime<TWord extends SegmentWord>(
130-
segment: Segment<TWord>,
131-
): number {
132-
return segment.words[0]?.start_ms ?? 0;
133-
}
134-
135-
function calculateTimingGap<TWord extends SegmentWord>(
136-
firstSegment: Segment<TWord>,
137-
secondSegment: Segment<TWord>,
138-
): number {
139-
if (firstSegment.words.length === 0 || secondSegment.words.length === 0) {
140-
return Infinity;
141-
}
142-
143-
const lastWordOfFirst = firstSegment.words[firstSegment.words.length - 1];
144-
const firstWordOfSecond = secondSegment.words[0];
145-
return firstWordOfSecond.start_ms - lastWordOfFirst.end_ms;
146-
}
147-
148-
function mergeSegmentsByGap<TWord extends SegmentWord>(
149-
segments: Segment<TWord>[],
150-
channel: number,
151-
maxGapMs: number,
152-
): Segment<TWord>[] {
153-
segments.sort((a, b) => getSegmentStartTime(a) - getSegmentStartTime(b));
154-
155-
const merged: Segment<TWord>[] = [];
156-
let currentMerged = { key: { channel }, words: [...segments[0].words] };
15767

158-
for (let i = 1; i < segments.length; i++) {
159-
const nextSegment = segments[i];
160-
const gap = calculateTimingGap(currentMerged, nextSegment);
68+
const lastWord = current.words[current.words.length - 1];
69+
const gap = word.start_ms - lastWord.end_ms;
16170

162-
if (gap < maxGapMs) {
163-
currentMerged.words.push(...nextSegment.words);
71+
if (gap <= maxGapMs) {
72+
current.words.push(word);
16473
} else {
165-
merged.push(currentMerged);
166-
currentMerged = { key: { channel }, words: [...nextSegment.words] };
74+
const newSegment = { key: { channel: word.channel }, words: [word] };
75+
currentByChannel.set(word.channel, newSegment);
76+
segments.push(newSegment);
16777
}
16878
}
16979

170-
merged.push(currentMerged);
171-
return merged;
172-
}
173-
174-
function createSpeakerTurns<TWord extends SegmentWord>(
175-
wordsByChannel: Map<number, TWord[]>,
176-
maxGapMs = 2000,
177-
): Segment<TWord>[] {
178-
const sortedWords = flattenAndSortWords(wordsByChannel);
179-
if (sortedWords.length === 0) {
180-
return [];
181-
}
182-
183-
const initialTurns = splitIntoInitialTurns(sortedWords);
184-
const turnsByChannel = groupSegmentsByChannel(initialTurns);
185-
186-
const finalSegments: Segment<TWord>[] = [];
187-
for (const [channel, channelTurns] of turnsByChannel) {
188-
finalSegments.push(...mergeSegmentsByGap(channelTurns, channel, maxGapMs));
189-
}
190-
191-
finalSegments.sort((a, b) => getSegmentStartTime(a) - getSegmentStartTime(b));
192-
return finalSegments;
80+
return segments;
19381
}

0 commit comments

Comments
 (0)