Skip to content

Commit

Permalink
Start working on TIMIT dataset for Pylearn2
Browse files Browse the repository at this point in the history
  • Loading branch information
vdumoulin committed Feb 9, 2014
1 parent 246309f commit c062007
Show file tree
Hide file tree
Showing 7 changed files with 703 additions and 0 deletions.
Empty file added code/pylearn2/__init__.py
Empty file.
Empty file.
119 changes: 119 additions & 0 deletions code/pylearn2/datasets/timit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
Pylearn2 wrapper for the TIMIT dataset
"""
__authors__ = ["Vincent Dumoulin"]
__copyright__ = "Copyright 2014, Universite de Montreal"
__credits__ = ["Laurent Dinh", "Vincent Dumoulin"]
__license__ = "3-clause BSD"
__maintainer__ = "Vincent Dumoulin"
__email__ = "dumouliv@iro"


import os.path
import cPickle
import numpy
from pylearn2.utils import serial
from pylearn2.utils.iteration import resolve_iterator_class
from pylearn2.datasets.dataset import Dataset
from research.code.scripts.segmentaxis import segment_axis


class TIMIT(Dataset):
"""
TIMIT dataset
"""
_default_seed = (17, 2, 946)

def __init__(self, which_set, frame_length, overlap=0,
frames_per_example=1, rng=_default_seed):
"""
Parameters
----------
which_set : str
Either "train", "valid" or "test"
frame_length : int
Number of acoustic samples contained in a frame
overlap : int, optional
Number of overlapping acoustic samples for two consecutive frames.
Defaults to 0, meaning frames don't overlap.
frames_per_example : int, optional
Number of frames in a training example. Defaults to 1.
rng : object, optional
A random number generator used for picking random indices into the
design matrix when choosing minibatches.
"""
# Check which_set
if which_set not in ['train', 'valid', 'test']:
raise ValueError(which_set + " is not a recognized value. " +
"Valid values are ['train', 'valid', 'test'].")

self.frame_length = frame_length
self.overlap = overlap
self.frames_per_example = frames_per_example

# Create file paths
timit_base_path = os.path.join(os.environ["PYLEARN2_DATA_PATH"],
"timit/readable")
speaker_info_list_path = os.path.join(timit_base_path, "spkrinfo.npy")
phonemes_list_path = os.path.join(timit_base_path,
"reduced_phonemes.pkl")
words_list_path = os.path.join(timit_base_path, "words.pkl")
speaker_features_list_path = os.path.join(timit_base_path,
"spkr_feature_names.pkl")
speaker_id_list_path = os.path.join(timit_base_path,
"speakers_ids.pkl")
raw_wav_path = os.path.join(timit_base_path, which_set + "_x_raw.npy")
phonemes_path = os.path.join(timit_base_path,
which_set + "_redux_phn.npy")
sequences_to_phonemes_path = os.path.join(timit_base_path,
which_set +
"_seq_to_phn.npy")
words_path = os.path.join(timit_base_path, which_set + "_wrd.npy")
sequences_to_words_path = os.path.join(timit_base_path,
which_set + "_seq_to_wrd.npy")
speaker_path = os.path.join(timit_base_path,
which_set + "_spkr.npy")

# Load data
self.speaker_info_list = serial.load(speaker_info_list_path).tolist().toarray()
self.speaker_id_list = serial.load(speaker_id_list_path)
self.speaker_features_list = serial.load(speaker_features_list_path)
self.words_list = serial.load(words_list_path)
self.phonemes_list = serial.load(phonemes_list_path)
self.raw_wav = serial.load(raw_wav_path)
self.phonemes = serial.load(phonemes_path)
self.sequences_to_phonemes = serial.load(sequences_to_phonemes_path)
self.words = serial.load(words_path)
sequences_to_words = serial.load(sequences_to_words_path)
speaker_id = numpy.asarray(serial.load(speaker_path), 'int')

# Transform data in DenseDesignMatrix format
visiting_order = []
for i, sequence in enumerate(self.raw_wav):
segmented_sequence = segment_axis(sequence, self.frame_length,
self.overlap)
self.raw_wav[i] = segmented_sequence
for j in xrange(0, segmented_sequence.shape[0] - self.frames_per_example):
visiting_order.append((i, j))
self.visiting_order = visiting_order

# DataSpecs
X_space = VectorSpace(dim=self.frame_length * self.frames_per_example)
X_source = 'features'
y_space = VectorSpace(dim=self.frame_length)
y_source = 'targets'
space = CompositeSpace((X_space, y_space))
source = (X_source, y_source)
self.data_specs = (space, source)

def get_data_specs(self):
"""
Returns the data_specs specifying how the data is internally stored.
This is the format the data returned by `self.get_data()` will be.
"""
return self.data_specs


if __name__ == "__main__":
timit = TIMIT("train", frame_length=20, overlap=10, frames_per_example=4)
Empty file added code/pylearn2/utils/__init__.py
Empty file.
Empty file.
110 changes: 110 additions & 0 deletions code/scripts/segmentaxis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import numpy as np
from numpy.lib.stride_tricks import as_strided

def segment_axis(a, length, overlap=0, axis=None, end='cut', endvalue=0):
"""Generate a new array that chops the given array along the given axis
into overlapping frames.
Parameters
----------
a : array-like
The array to segment
length : int
The length of each frame
overlap : int, optional
The number of array elements by which the frames should overlap
axis : int, optional
The axis to operate on; if None, act on the flattened array
end : {'cut', 'wrap', 'end'}, optional
What to do with the last frame, if the array is not evenly
divisible into pieces.
- 'cut' Simply discard the extra values
- 'wrap' Copy values from the beginning of the array
- 'pad' Pad with a constant value
endvalue : object
The value to use for end='pad'
Examples
--------
>>> segment_axis(arange(10), 4, 2)
array([[0, 1, 2, 3],
[2, 3, 4, 5],
[4, 5, 6, 7],
[6, 7, 8, 9]])
Notes
-----
The array is not copied unless necessary (either because it is
unevenly strided and being flattened or because end is set to
'pad' or 'wrap').
use as_strided
"""

if axis is None:
a = np.ravel(a) # may copy
axis = 0

l = a.shape[axis]

if overlap>=length:
raise ValueError, "frames cannot overlap by more than 100%"
if overlap<0 or length<=0:
raise ValueError, "overlap must be nonnegative and length must be "\
"positive"

if l<length or (l-length)%(length-overlap):
if l>length:
roundup = length + \
(1+(l-length)//(length-overlap))*(length-overlap)
rounddown = length + \
((l-length)//(length-overlap))*(length-overlap)
else:
roundup = length
rounddown = 0
assert rounddown<l<roundup
assert roundup==rounddown+(length-overlap) or \
(roundup==length and rounddown==0)
a = a.swapaxes(-1,axis)

if end=='cut':
a = a[...,:rounddown]
elif end in ['pad','wrap']: # copying will be necessary
s = list(a.shape)
s[-1]=roundup
b = np.empty(s,dtype=a.dtype)
b[...,:l] = a
if end=='pad':
b[...,l:] = endvalue
elif end=='wrap':
b[...,l:] = a[...,:roundup-l]
a = b

a = a.swapaxes(-1,axis)


l = a.shape[axis]
if l==0:
raise ValueError, "Not enough data points to segment array in 'cut' "\
"mode; try 'pad' or 'wrap'"
assert l>=length
assert (l-length)%(length-overlap) == 0
n = 1+(l-length)//(length-overlap)
s = a.strides[axis]
newshape = a.shape[:axis] + (n,length) + a.shape[axis+1:]
newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \
a.strides[axis+1:]

try:
return as_strided(a, strides=newstrides, shape=newshape)
except TypeError:
warnings.warn("Problem with ndarray creation forces copy.")
a = a.copy()
# Shape doesn't change but strides does
newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \
a.strides[axis+1:]
return as_strided(a, strides=newstrides, shape=newshape)
Loading

0 comments on commit c062007

Please sign in to comment.