-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_export_functionality.py
More file actions
154 lines (132 loc) · 5.19 KB
/
test_export_functionality.py
File metadata and controls
154 lines (132 loc) · 5.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env python3
"""
Test script to verify export functionality with diarized transcription.
"""
import sys
import os
import logging
import time
from pathlib import Path
# Add the app directory to the path
sys.path.insert(0, str(Path(__file__).parent / "app"))
# Set up logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def test_export_functionality():
"""Test the export service with diarized transcription."""
from app.services.export_service import ExportService
from app.services.diarization_service import DiarizationService
from app.core.database import get_database
from app.models.transcription import Transcription
# Initialize services
export_service = ExportService()
diarization_service = DiarizationService()
# Get database session
db = get_database()
session = next(db)
try:
logger.info("=== Testing export functionality with diarized transcription ===")
# Create a mock transcription with segments and speaker assignments
transcription = Transcription(
session_id="test_export_" + str(int(time.time())),
original_filename="stealth.mp3",
file_path="/Users/cmorgan/Devel/Personal/SecureTranscribe/stealth.mp3",
file_size=82184,
file_duration=10.27,
file_format="mp3",
status="completed",
whisper_model="base",
pyannote_model="pyannote/speaker-diarization-3.1",
speakers_assigned=True,
num_speakers=2,
)
# Add mock transcription segments with speaker assignments
mock_segments = [
{
"start_time": 0.0,
"end_time": 0.8,
"text": "Who's there?",
"confidence": 0.95,
"speaker": "John Doe",
},
{
"start_time": 0.9,
"end_time": 1.7,
"text": "It's me.",
"confidence": 0.93,
"speaker": "Jane Smith",
},
{
"start_time": 1.8,
"end_time": 2.5,
"text": "What do you want?",
"confidence": 0.91,
"speaker": "John Doe",
},
{
"start_time": 2.6,
"end_time": 3.4,
"text": "I need help.",
"confidence": 0.94,
"speaker": "Jane Smith",
},
{
"start_time": 3.5,
"end_time": 4.2,
"text": "Help with what?",
"confidence": 0.92,
"speaker": "John Doe",
},
]
transcription.segments = mock_segments
session.add(transcription)
session.commit()
session.refresh(transcription)
logger.info(
f"Created transcription with {len(transcription.segments)} segments"
)
logger.info("Speaker assignments:")
for i, segment in enumerate(transcription.segments):
logger.info(f" Segment {i}: {segment['speaker']} - '{segment['text']}'")
# Test export to different formats
formats = ["txt", "csv", "json"]
for export_format in formats:
logger.info(f"\n--- Testing {export_format.upper()} export ---")
try:
export_data = export_service.export_transcription(
transcription, export_format, session=session
)
if export_format == "txt":
content = export_data.decode("utf-8")
logger.info(f"TXT export (first 500 chars):\n{content[:500]}...")
elif export_format == "csv":
content = export_data.decode("utf-8")
logger.info(f"CSV export (first 500 chars):\n{content[:500]}...")
elif export_format == "json":
import json
data = json.loads(export_data.decode("utf-8"))
logger.info(f"JSON export structure:")
logger.info(f" Transcription ID: {data['transcription']['id']}")
logger.info(
f" Number of speakers: {data['transcription']['num_speakers']}"
)
if "segments" in data["transcription"]:
logger.info(
f" Number of segments: {len(data['transcription']['segments'])}"
)
if data["transcription"]["segments"]:
logger.info(" Sample segments:")
for i, seg in enumerate(
data["transcription"]["segments"][:3]
):
logger.info(f" {seg['speaker']}: '{seg['text']}'")
except Exception as e:
logger.error(f"Export to {export_format} failed: {e}")
import traceback
traceback.print_exc()
finally:
session.close()
if __name__ == "__main__":
test_export_functionality()