-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Start working on TIMIT dataset for Pylearn2
- Loading branch information
Showing
7 changed files
with
703 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
""" | ||
Pylearn2 wrapper for the TIMIT dataset | ||
""" | ||
__authors__ = ["Vincent Dumoulin"] | ||
__copyright__ = "Copyright 2014, Universite de Montreal" | ||
__credits__ = ["Laurent Dinh", "Vincent Dumoulin"] | ||
__license__ = "3-clause BSD" | ||
__maintainer__ = "Vincent Dumoulin" | ||
__email__ = "dumouliv@iro" | ||
|
||
|
||
import os.path | ||
import cPickle | ||
import numpy | ||
from pylearn2.utils import serial | ||
from pylearn2.utils.iteration import resolve_iterator_class | ||
from pylearn2.datasets.dataset import Dataset | ||
from research.code.scripts.segmentaxis import segment_axis | ||
|
||
|
||
class TIMIT(Dataset): | ||
""" | ||
TIMIT dataset | ||
""" | ||
_default_seed = (17, 2, 946) | ||
|
||
def __init__(self, which_set, frame_length, overlap=0, | ||
frames_per_example=1, rng=_default_seed): | ||
""" | ||
Parameters | ||
---------- | ||
which_set : str | ||
Either "train", "valid" or "test" | ||
frame_length : int | ||
Number of acoustic samples contained in a frame | ||
overlap : int, optional | ||
Number of overlapping acoustic samples for two consecutive frames. | ||
Defaults to 0, meaning frames don't overlap. | ||
frames_per_example : int, optional | ||
Number of frames in a training example. Defaults to 1. | ||
rng : object, optional | ||
A random number generator used for picking random indices into the | ||
design matrix when choosing minibatches. | ||
""" | ||
# Check which_set | ||
if which_set not in ['train', 'valid', 'test']: | ||
raise ValueError(which_set + " is not a recognized value. " + | ||
"Valid values are ['train', 'valid', 'test'].") | ||
|
||
self.frame_length = frame_length | ||
self.overlap = overlap | ||
self.frames_per_example = frames_per_example | ||
|
||
# Create file paths | ||
timit_base_path = os.path.join(os.environ["PYLEARN2_DATA_PATH"], | ||
"timit/readable") | ||
speaker_info_list_path = os.path.join(timit_base_path, "spkrinfo.npy") | ||
phonemes_list_path = os.path.join(timit_base_path, | ||
"reduced_phonemes.pkl") | ||
words_list_path = os.path.join(timit_base_path, "words.pkl") | ||
speaker_features_list_path = os.path.join(timit_base_path, | ||
"spkr_feature_names.pkl") | ||
speaker_id_list_path = os.path.join(timit_base_path, | ||
"speakers_ids.pkl") | ||
raw_wav_path = os.path.join(timit_base_path, which_set + "_x_raw.npy") | ||
phonemes_path = os.path.join(timit_base_path, | ||
which_set + "_redux_phn.npy") | ||
sequences_to_phonemes_path = os.path.join(timit_base_path, | ||
which_set + | ||
"_seq_to_phn.npy") | ||
words_path = os.path.join(timit_base_path, which_set + "_wrd.npy") | ||
sequences_to_words_path = os.path.join(timit_base_path, | ||
which_set + "_seq_to_wrd.npy") | ||
speaker_path = os.path.join(timit_base_path, | ||
which_set + "_spkr.npy") | ||
|
||
# Load data | ||
self.speaker_info_list = serial.load(speaker_info_list_path).tolist().toarray() | ||
self.speaker_id_list = serial.load(speaker_id_list_path) | ||
self.speaker_features_list = serial.load(speaker_features_list_path) | ||
self.words_list = serial.load(words_list_path) | ||
self.phonemes_list = serial.load(phonemes_list_path) | ||
self.raw_wav = serial.load(raw_wav_path) | ||
self.phonemes = serial.load(phonemes_path) | ||
self.sequences_to_phonemes = serial.load(sequences_to_phonemes_path) | ||
self.words = serial.load(words_path) | ||
sequences_to_words = serial.load(sequences_to_words_path) | ||
speaker_id = numpy.asarray(serial.load(speaker_path), 'int') | ||
|
||
# Transform data in DenseDesignMatrix format | ||
visiting_order = [] | ||
for i, sequence in enumerate(self.raw_wav): | ||
segmented_sequence = segment_axis(sequence, self.frame_length, | ||
self.overlap) | ||
self.raw_wav[i] = segmented_sequence | ||
for j in xrange(0, segmented_sequence.shape[0] - self.frames_per_example): | ||
visiting_order.append((i, j)) | ||
self.visiting_order = visiting_order | ||
|
||
# DataSpecs | ||
X_space = VectorSpace(dim=self.frame_length * self.frames_per_example) | ||
X_source = 'features' | ||
y_space = VectorSpace(dim=self.frame_length) | ||
y_source = 'targets' | ||
space = CompositeSpace((X_space, y_space)) | ||
source = (X_source, y_source) | ||
self.data_specs = (space, source) | ||
|
||
def get_data_specs(self): | ||
""" | ||
Returns the data_specs specifying how the data is internally stored. | ||
This is the format the data returned by `self.get_data()` will be. | ||
""" | ||
return self.data_specs | ||
|
||
|
||
if __name__ == "__main__": | ||
timit = TIMIT("train", frame_length=20, overlap=10, frames_per_example=4) |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import numpy as np | ||
from numpy.lib.stride_tricks import as_strided | ||
|
||
def segment_axis(a, length, overlap=0, axis=None, end='cut', endvalue=0): | ||
"""Generate a new array that chops the given array along the given axis | ||
into overlapping frames. | ||
Parameters | ||
---------- | ||
a : array-like | ||
The array to segment | ||
length : int | ||
The length of each frame | ||
overlap : int, optional | ||
The number of array elements by which the frames should overlap | ||
axis : int, optional | ||
The axis to operate on; if None, act on the flattened array | ||
end : {'cut', 'wrap', 'end'}, optional | ||
What to do with the last frame, if the array is not evenly | ||
divisible into pieces. | ||
- 'cut' Simply discard the extra values | ||
- 'wrap' Copy values from the beginning of the array | ||
- 'pad' Pad with a constant value | ||
endvalue : object | ||
The value to use for end='pad' | ||
Examples | ||
-------- | ||
>>> segment_axis(arange(10), 4, 2) | ||
array([[0, 1, 2, 3], | ||
[2, 3, 4, 5], | ||
[4, 5, 6, 7], | ||
[6, 7, 8, 9]]) | ||
Notes | ||
----- | ||
The array is not copied unless necessary (either because it is | ||
unevenly strided and being flattened or because end is set to | ||
'pad' or 'wrap'). | ||
use as_strided | ||
""" | ||
|
||
if axis is None: | ||
a = np.ravel(a) # may copy | ||
axis = 0 | ||
|
||
l = a.shape[axis] | ||
|
||
if overlap>=length: | ||
raise ValueError, "frames cannot overlap by more than 100%" | ||
if overlap<0 or length<=0: | ||
raise ValueError, "overlap must be nonnegative and length must be "\ | ||
"positive" | ||
|
||
if l<length or (l-length)%(length-overlap): | ||
if l>length: | ||
roundup = length + \ | ||
(1+(l-length)//(length-overlap))*(length-overlap) | ||
rounddown = length + \ | ||
((l-length)//(length-overlap))*(length-overlap) | ||
else: | ||
roundup = length | ||
rounddown = 0 | ||
assert rounddown<l<roundup | ||
assert roundup==rounddown+(length-overlap) or \ | ||
(roundup==length and rounddown==0) | ||
a = a.swapaxes(-1,axis) | ||
|
||
if end=='cut': | ||
a = a[...,:rounddown] | ||
elif end in ['pad','wrap']: # copying will be necessary | ||
s = list(a.shape) | ||
s[-1]=roundup | ||
b = np.empty(s,dtype=a.dtype) | ||
b[...,:l] = a | ||
if end=='pad': | ||
b[...,l:] = endvalue | ||
elif end=='wrap': | ||
b[...,l:] = a[...,:roundup-l] | ||
a = b | ||
|
||
a = a.swapaxes(-1,axis) | ||
|
||
|
||
l = a.shape[axis] | ||
if l==0: | ||
raise ValueError, "Not enough data points to segment array in 'cut' "\ | ||
"mode; try 'pad' or 'wrap'" | ||
assert l>=length | ||
assert (l-length)%(length-overlap) == 0 | ||
n = 1+(l-length)//(length-overlap) | ||
s = a.strides[axis] | ||
newshape = a.shape[:axis] + (n,length) + a.shape[axis+1:] | ||
newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \ | ||
a.strides[axis+1:] | ||
|
||
try: | ||
return as_strided(a, strides=newstrides, shape=newshape) | ||
except TypeError: | ||
warnings.warn("Problem with ndarray creation forces copy.") | ||
a = a.copy() | ||
# Shape doesn't change but strides does | ||
newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \ | ||
a.strides[axis+1:] | ||
return as_strided(a, strides=newstrides, shape=newshape) |
Oops, something went wrong.