-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimprove_speech_signal.m
96 lines (75 loc) · 3.23 KB
/
improve_speech_signal.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
%Recommended to use .wav files but .mp3 will work too
[noisy_audio, fs] = audioread('Hallelujah.wav');
% Convert to mono if necessary
if size(noisy_audio, 2) > 1
noisy_audio = mean(noisy_audio, 2);
end
% Pre-processing: High-pass filtering
cutoff_frequency = 200; % Adjust based on speech characteristics
[b, a] = butter(4, cutoff_frequency/(fs/2), 'high');
filtered_audio = filter(b, a, noisy_audio);
% Spectral analysis: STFT
window_size = 256; % Adjust based on desired time-frequency resolution
overlap = round(0.5 * window_size); % Convert overlap fraction to integer
[filtered_spectrum, f, t] = spectrogram(filtered_audio, window_size, overlap, fs);
% Noise reduction: Spectral subtraction with noise estimation
estimated_noise_spectrum = estimate_noise_spectrum(filtered_spectrum);
enhanced_spectrum = subtract_noise(filtered_spectrum, estimated_noise_spectrum);
% Post-processing: Inverse STFT and spectral tilt compensation (optional)
enhanced_audio = inverse_stft(enhanced_spectrum, window_size, overlap);
% Ensure both signals have the same length
min_length = min(length(noisy_audio), length(enhanced_audio));
noisy_audio = noisy_audio(1:min_length);
enhanced_audio = enhanced_audio(1:min_length);
% Analysis and evaluation
% Calculate SNR
enhanced_SNR = snr(noisy_audio, enhanced_audio);
fprintf('Enhanced SNR: %.2f dB\n', enhanced_SNR);
% Plot time-domain signals and spectrograms
figure;
% Plot original and filtered signals
subplot(3, 1, 1);
plot(noisy_audio, 'b');
title('Original Speech Signal');
subplot(3, 1, 2);
plot(filtered_audio, 'g');
title('Filtered Speech Signal');
% Plot enhanced signal and its spectrogram
subplot(3, 1, 3);
plot(enhanced_audio, 'r');
title('Enhanced Speech Signal');
% Plot time-domain signals and spectrograms
figure;
subplot(2, 1, 1);
plot(noisy_audio, 'b');
hold on;
plot(enhanced_audio, 'r');
title('Original (blue) vs Enhanced (red) Speech Signal');
legend('Original', 'Enhanced');
subplot(2, 1, 2);
imagesc(t, f, abs(enhanced_spectrum));
title('Spectrogram of Enhanced Speech');
% Subjective listening
soundsc([noisy_audio, enhanced_audio]); % Play original and enhanced speech
function noise_spectrum = estimate_noise_spectrum(filtered_spectrum)
% Estimate noise spectrum by averaging across frequency bins
noise_spectrum = mean(filtered_spectrum, 2);
end
function enhanced_spectrum = subtract_noise(filtered_spectrum, noise_spectrum)
% Perform spectral subtraction
alpha = 0.0001; % Adjust based on noise characteristics
enhanced_spectrum = filtered_spectrum - alpha * noise_spectrum;
enhanced_spectrum(enhanced_spectrum < 0) = 0; % Thresholding to avoid negative values
end
function enhanced_audio = inverse_stft(spectrum, window_size, overlap)
% Perform inverse STFT
[rows, cols] = size(spectrum);
signal_length = (cols - 1) * overlap + window_size;
enhanced_audio = zeros(signal_length, 1);
for col = 1:cols
start_idx = (col - 1) * overlap + 1;
end_idx = start_idx + window_size - 1;
enhanced_audio(start_idx:end_idx) = enhanced_audio(start_idx:end_idx) + ifft(spectrum(:, col), window_size);
end
enhanced_audio = real(enhanced_audio);
end