Skip to content

Commit

Permalink
pitch
Browse files Browse the repository at this point in the history
  • Loading branch information
aishoot authored Jan 9, 2018
1 parent 4f9ef88 commit d73e47d
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 0 deletions.
46 changes: 46 additions & 0 deletions Pitch/PitchTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import wave
import numpy as np
import pylab as pl

# read wave file and get parameters.
fw = wave.open('../sounds/a.wav','rb')
params = fw.getparams()
print(params)
nchannels, sampwidth, framerate, nframes = params[:4]
strData = fw.readframes(nframes)
waveData = np.fromstring(strData, dtype=np.int16)
waveData = waveData*1.0/max(abs(waveData)) # normalization
fw.close()

# plot the wave
time = np.arange(0, len(waveData)) * (1.0 / framerate)

index1 = 10000.0 / framerate
index2 = 10512.0 / framerate
index3 = 15000.0 / framerate
index4 = 15512.0 / framerate

pl.subplot(311)
pl.title("pitch")
pl.plot(time, waveData)
pl.plot([index1,index1],[-1,1],'r')
pl.plot([index2,index2],[-1,1],'r')
pl.plot([index3,index3],[-1,1],'g')
pl.plot([index4,index4],[-1,1],'g')
pl.xlabel("time (seconds)")
pl.ylabel("Amplitude")

pl.subplot(312)
pl.plot(np.arange(512),waveData[10000:10512],'r')
pl.plot([59,59],[-1,1],'b')
pl.plot([169,169],[-1,1],'b')
print(1/( (169-59)*1.0/framerate ))
pl.xlabel("index in 1 frame")
pl.ylabel("Amplitude")

pl.subplot(313)
pl.plot(np.arange(512),waveData[15000:15512],'g')
pl.xlabel("index in 1 frame")
pl.ylabel("Amplitude")
pl.savefig("pitch.png")
pl.show()
75 changes: 75 additions & 0 deletions Pitch/PitchTracking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import math
import numpy as np

# Orignal auto-correlation function(ACF)
def ACF(frame):
flen = len(frame)
acf = np.zeros(flen)
for i in range(flen):
acf[i] = np.sum(frame[i:flen]*frame[0:flen-i])
return acf

# ACF with weight
def ACF2(frame):
flen = len(frame)
acf = np.zeros(flen)
for i in range(flen):
acf[i] = np.sum(frame[i:flen]*frame[0:flen-i])/(flen-i)
return acf

# ACF to half frame length
def ACF3(frame):
flen = len(frame)
acf = np.zeros(flen/2)
for i in range(flen/2):
acf[i] = np.sum(frame[i:flen]*frame[0:flen-i])
return acf

# normalized squared difference function(NSDF)
def NSDF(frame):
flen = len(frame)
nsdf = np.zeros(flen)
for i in range(flen):
s1 = np.sum(frame[i:flen]*frame[0:flen-i])
s2 = np.sum(frame[i:flen]*frame[i:flen])
s3 = np.sum(frame[0:flen-i]*frame[0:flen-i])
nsdf[i] = 2.0*s1/(s2+s3)
return nsdf

# AMDF (average magnitude difference function)
def AMDF(frame):
flen = len(frame)
amdf = np.zeros(flen)
for i in range(flen):
amdf[i] = -np.sum(np.abs(frame[i:flen]-frame[0:flen-i])) # to adjust to ACF, I use the -AMDF
return amdf

# AMDF with weight
def AMDF2(frame):
flen = len(frame)
amdf = np.zeros(flen)
for i in range(flen):
amdf[i] = -np.sum(np.abs(frame[i:flen]-frame[0:flen-i]))/(flen-i) # to adjust to ACF, I use the -AMDF
return amdf

# AMDF to half frame length
def AMDF3(frame):
flen = len(frame)
amdf = np.zeros(flen/2)
for i in range(flen/2):
amdf[i] = -np.sum(np.abs(frame[i:flen]-frame[0:flen-i])) # to adjust to ACF, I use the -AMDF
return amdf

# Pitch Tracking
def PitchTrack(waveData,frameRate,frameSize,overLap,acfmethod):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen*1.0/step))
pitchtrack = np.zeros(frameNum)
for i in range(frameNum):
#acf = acfmethod(waveData[i*step : i*step+frameSize]) # Bug
acf = acfmethod(waveData[int(i*step):int(i*step+frameSize)])
acf[0:30] = np.min(acf)
acfmax = np.argmax(acf)
pitchtrack[i] = frameRate*1.0/acfmax
return pitchtrack
59 changes: 59 additions & 0 deletions Pitch/PitchTrackingTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import wave
import numpy as np
import pylab as pl
import PitchTracking as pt

# read wave file and get parameters.
fw = wave.open('../sounds/aeiou.wav','rb')
params = fw.getparams()
print(params)
nchannels, sampwidth, framerate, nframes = params[:4]
strData = fw.readframes(nframes)
waveData = np.fromstring(strData, dtype=np.int16)
waveData = waveData*1.0/max(abs(waveData)) # normalization
fw.close()

# plot the wave
time = np.arange(0, len(waveData)) * (1.0 / framerate)

frameSize = 512
overLap = frameSize/2
idx1 = 10000
idx2 = idx1+frameSize
index1 = idx1*1.0 / framerate
index2 = idx2*1.0 / framerate
acf = pt.ACF(waveData[idx1:idx2])
acf[0:10] = -acf[0]
acfmax = np.argmax(acf)
print(acfmax)
print(framerate*1.0/acfmax)

pl.subplot(411)
pl.title("pitchTrack")
pl.plot(time, waveData)
pl.plot([index1,index1],[-1,1],'r')
pl.plot([index2,index2],[-1,1],'r')
pl.xlabel("time (seconds)")
pl.ylabel("Amplitude")

pl.subplot(412)
pl.plot(np.arange(frameSize),waveData[idx1:idx2],'r')
pl.xlabel("index in 1 frame")
pl.ylabel("Amplitude")

pl.subplot(413)
pl.plot(np.arange(frameSize),acf,'g')
pl.xlabel("index in 1 frame")
pl.ylabel("ACF")

# pitch tracking
acfmethod = pt.ACF
pitchtrack = pt.PitchTrack(waveData, framerate, frameSize, overLap, acfmethod)
xpt = np.arange(0, len(pitchtrack)) *( len(waveData) *1.0/ len(pitchtrack) / framerate )
pl.subplot(414)
pl.plot(xpt,pitchtrack,'-*')
pl.xlabel('time (seconds)')
pl.ylabel('Frequency (Hz)')

#pl.savefig("pitchTrack.png")
pl.show()
Binary file added Pitch/pitch.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Pitch/pitchTrack.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit d73e47d

Please sign in to comment.