Skip to content

Commit

Permalink
VAD
Browse files Browse the repository at this point in the history
  • Loading branch information
aishoot authored Jan 9, 2018
1 parent cbd4851 commit 6591ce0
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 0 deletions.
Binary file added VAD/VAD01.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
55 changes: 55 additions & 0 deletions VAD/VAD01.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import wave
import numpy as np
import matplotlib.pyplot as plt
import Volume as vp

def findIndex(vol,thres):
l = len(vol)
ii = 0
index = np.zeros(4,dtype=np.int16)
for i in range(l-1):
if((vol[i]-thres)*(vol[i+1]-thres)<0):
index[ii]=i
ii = ii+1
return index[[0,-1]]

fw = wave.open('../sounds/sunday.wav','r')
params = fw.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]
strData = fw.readframes(nframes)
waveData = np.fromstring(strData, dtype=np.int16)
waveData = waveData*1.0/max(abs(waveData)) # normalization
fw.close()

frameSize = 256
overLap = 128
vol = vp.calVolume(waveData,frameSize,overLap)
threshold1 = max(vol)*0.10
threshold2 = min(vol)*10.0
threshold3 = max(vol)*0.05+min(vol)*5.0

time = np.arange(0,nframes) * (1.0/framerate)
vols = np.arange(0,len(vol)) * (nframes*1.0/len(vol)/framerate)
index1 = findIndex(vol,threshold1)*(nframes*1.0/len(vol)/framerate)
index2 = findIndex(vol,threshold2)*(nframes*1.0/len(vol)/framerate)
index3 = findIndex(vol,threshold3)*(nframes*1.0/len(vol)/framerate)
end = nframes * (1.0/framerate)

plt.subplot(211)
plt.title("VAD01 using volume")
plt.plot(time,waveData,color="black")
plt.plot([index1,index1],[-1,1],'-r')
plt.plot([index2,index2],[-1,1],'-g')
plt.plot([index3,index3],[-1,1],'-b')
plt.ylabel('Amplitude')

plt.subplot(212)
plt.plot(vols,vol,color="black")
plt.plot([0,end],[threshold1,threshold1],'-r', label="threshold 1")
plt.plot([0,end],[threshold2,threshold2],'-g', label="threshold 2")
plt.plot([0,end],[threshold3,threshold3],'-b', label="threshold 3")
plt.legend()
plt.ylabel('Volume(absSum)')
plt.xlabel('time(seconds)')
plt.savefig("VAD01")
plt.show()
Binary file added VAD/VAD02.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
52 changes: 52 additions & 0 deletions VAD/VAD02.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import wave
import numpy as np
import matplotlib.pyplot as plt
import Volume as vp
from ZeroCR import ZeroCR

fw = wave.open('../sounds/sunday.wav','r')
params = fw.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]
strData = fw.readframes(nframes)
waveData = np.fromstring(strData, dtype=np.int16)
waveData = waveData*1.0/max(abs(waveData)) # normalization
fw.close()

frameSize = 256
overLap = 128
vol = vp.calVolume(waveData,frameSize,overLap)
zcr = ZeroCR(waveData,frameSize,overLap)
threshold1 = max(vol)*0.10
threshold2 = min(vol)*10.0
threshold3 = max(vol)*0.05+min(vol)*5.0
threshold12 = max(zcr)*0.10
threshold22 = min(zcr)*10.0
threshold32 = max(zcr)*0.05+min(zcr)*5.0

time = np.arange(0,nframes) * (1.0/framerate)
vols = np.arange(0,len(vol)) * (nframes*1.0/len(vol)/framerate)
zcrs = np.arange(0,len(zcr))*(nframes*1.0/len(zcr)/framerate)
end = nframes * (1.0/framerate)
plt.subplot(311)
plt.title("VAD02 using volume and ZeroCR")
plt.plot(time,waveData,color="black")
plt.ylabel('Amplitude')

plt.subplot(312)
plt.plot(vols,vol,color="black")
plt.plot([0,end],[threshold1,threshold1],'-r', label="threshold 1")
plt.plot([0,end],[threshold2,threshold2],'-g', label="threshold 2")
plt.plot([0,end],[threshold3,threshold3],'-b', label="threshold 3")
plt.legend()
plt.ylabel('Volume(absSum)')

plt.subplot(313)
plt.plot(zcrs,zcr,color="black")
plt.plot([0,end],[threshold12,threshold12],'-r', label="threshold 12")
plt.plot([0,end],[threshold22,threshold22],'-g', label="threshold 22")
plt.plot([0,end],[threshold32,threshold32],'-b', label="threshold 32")
plt.legend()
plt.ylabel('Zero-Crossing Rate')
plt.xlabel('time(seconds)')
plt.savefig("VAD02")
plt.show()
27 changes: 27 additions & 0 deletions VAD/Volume.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import math
import numpy as np

# method 1: absSum
def calVolume(waveData, frameSize, overLap):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen*1.0/step))
volume = np.zeros((frameNum,1))
for i in range(frameNum):
curFrame = waveData[np.arange(i*step,min(i*step+frameSize,wlen))]
#curFrame = curFrame - np.median(curFrame) # False
curFrame = curFrame - np.mean(curFrame) # zero-justified
volume[i] = np.sum(np.abs(curFrame))
return volume

# method 2: log10 of square sum
def calVolumeDB(waveData, frameSize, overLap):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen*1.0/step))
volume = np.zeros((frameNum,1))
for i in range(frameNum):
curFrame = waveData[np.arange(i*step,min(i*step+frameSize,wlen))]
curFrame = curFrame - np.mean(curFrame) # zero-justified
volume[i] = 10*np.log10(np.sum(curFrame*curFrame))
return volume
14 changes: 14 additions & 0 deletions VAD/ZeroCR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import math
import numpy as np

def ZeroCR(waveData,frameSize,overLap):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen*1.0/step))
zcr = np.zeros((frameNum,1))
for i in range(frameNum):
curFrame = waveData[np.arange(i*step, min(i*step+frameSize,wlen))]
#To avoid DC bias, usually we need to perform mean subtraction on each frame
curFrame = curFrame - np.mean(curFrame) # zero-justified
zcr[i] = sum(curFrame[0:-1]*curFrame[1:]<=0)
return zcr

0 comments on commit 6591ce0

Please sign in to comment.