Start working on TIMIT dataset for Pylearn2

vdumoulin · Feb 9, 2014 · c062007 · c062007
1 parent 246309f
commit c062007
Show file tree

Hide file tree

Showing 7 changed files with 703 additions and 0 deletions.
diff --git a/code/pylearn2/__init__.py b/code/pylearn2/__init__.py
diff --git a/code/pylearn2/datasets/__init__.py b/code/pylearn2/datasets/__init__.py
diff --git a/code/pylearn2/datasets/timit.py b/code/pylearn2/datasets/timit.py
@@ -0,0 +1,119 @@
+"""
+Pylearn2 wrapper for the TIMIT dataset
+"""
+__authors__ = ["Vincent Dumoulin"]
+__copyright__ = "Copyright 2014, Universite de Montreal"
+__credits__ = ["Laurent Dinh", "Vincent Dumoulin"]
+__license__ = "3-clause BSD"
+__maintainer__ = "Vincent Dumoulin"
+__email__ = "dumouliv@iro"
+
+
+import os.path
+import cPickle
+import numpy
+from pylearn2.utils import serial
+from pylearn2.utils.iteration import resolve_iterator_class
+from pylearn2.datasets.dataset import Dataset
+from research.code.scripts.segmentaxis import segment_axis
+
+
+class TIMIT(Dataset):
+    """
+    TIMIT dataset
+    """
+    _default_seed = (17, 2, 946)
+
+    def __init__(self, which_set, frame_length, overlap=0,
+                 frames_per_example=1, rng=_default_seed):
+        """
+        Parameters
+        ----------
+        which_set : str
+            Either "train", "valid" or "test"
+        frame_length : int
+            Number of acoustic samples contained in a frame
+        overlap : int, optional
+            Number of overlapping acoustic samples for two consecutive frames.
+            Defaults to 0, meaning frames don't overlap.
+        frames_per_example : int, optional
+            Number of frames in a training example. Defaults to 1.
+        rng : object, optional
+            A random number generator used for picking random indices into the
+            design matrix when choosing minibatches.
+        """
+        # Check which_set
+        if which_set not in ['train', 'valid', 'test']:
+            raise ValueError(which_set + " is not a recognized value. " +
+                             "Valid values are ['train', 'valid', 'test'].")
+
+        self.frame_length = frame_length
+        self.overlap = overlap
+        self.frames_per_example = frames_per_example
+
+        # Create file paths
+        timit_base_path = os.path.join(os.environ["PYLEARN2_DATA_PATH"],
+                                       "timit/readable")
+        speaker_info_list_path = os.path.join(timit_base_path, "spkrinfo.npy")
+        phonemes_list_path = os.path.join(timit_base_path,
+                                          "reduced_phonemes.pkl")
+        words_list_path = os.path.join(timit_base_path, "words.pkl")
+        speaker_features_list_path = os.path.join(timit_base_path,
+                                                  "spkr_feature_names.pkl")
+        speaker_id_list_path = os.path.join(timit_base_path,
+                                            "speakers_ids.pkl")
+        raw_wav_path = os.path.join(timit_base_path, which_set + "_x_raw.npy")
+        phonemes_path = os.path.join(timit_base_path,
+                                     which_set + "_redux_phn.npy")
+        sequences_to_phonemes_path = os.path.join(timit_base_path,
+                                                  which_set +
+                                                  "_seq_to_phn.npy")
+        words_path = os.path.join(timit_base_path, which_set + "_wrd.npy")
+        sequences_to_words_path = os.path.join(timit_base_path,
+                                               which_set + "_seq_to_wrd.npy")
+        speaker_path = os.path.join(timit_base_path,
+                                    which_set + "_spkr.npy")
+
+        # Load data
+        self.speaker_info_list = serial.load(speaker_info_list_path).tolist().toarray()
+        self.speaker_id_list = serial.load(speaker_id_list_path)
+        self.speaker_features_list = serial.load(speaker_features_list_path)
+        self.words_list = serial.load(words_list_path)
+        self.phonemes_list = serial.load(phonemes_list_path)
+        self.raw_wav = serial.load(raw_wav_path)
+        self.phonemes = serial.load(phonemes_path) 
+        self.sequences_to_phonemes = serial.load(sequences_to_phonemes_path)
+        self.words = serial.load(words_path) 
+        sequences_to_words = serial.load(sequences_to_words_path)
+        speaker_id = numpy.asarray(serial.load(speaker_path), 'int')
+
+        # Transform data in DenseDesignMatrix format
+        visiting_order = []
+        for i, sequence in enumerate(self.raw_wav):
+            segmented_sequence = segment_axis(sequence, self.frame_length,
+                                              self.overlap)
+            self.raw_wav[i] = segmented_sequence
+            for j in xrange(0, segmented_sequence.shape[0] - self.frames_per_example):
+                visiting_order.append((i, j))
+        self.visiting_order = visiting_order
+
+        # DataSpecs
+        X_space = VectorSpace(dim=self.frame_length * self.frames_per_example)
+        X_source = 'features'
+        y_space = VectorSpace(dim=self.frame_length)
+        y_source = 'targets'
+        space = CompositeSpace((X_space, y_space))
+        source = (X_source, y_source)
+        self.data_specs = (space, source)
+
+    def get_data_specs(self):
+        """
+        Returns the data_specs specifying how the data is internally stored.
+
+        This is the format the data returned by `self.get_data()` will be.
+        """
+        return self.data_specs
+
+
+if __name__ == "__main__":
+    timit = TIMIT("train", frame_length=20, overlap=10, frames_per_example=4)
diff --git a/code/pylearn2/utils/__init__.py b/code/pylearn2/utils/__init__.py
diff --git a/code/pylearn2/utils/iteration.py b/code/pylearn2/utils/iteration.py
diff --git a/code/scripts/segmentaxis.py b/code/scripts/segmentaxis.py
@@ -0,0 +1,110 @@
+import numpy as np
+from numpy.lib.stride_tricks import as_strided
+
+def segment_axis(a, length, overlap=0, axis=None, end='cut', endvalue=0):
+    """Generate a new array that chops the given array along the given axis
+    into overlapping frames.
+
+    Parameters
+    ----------
+    a : array-like
+        The array to segment
+    length : int
+        The length of each frame
+    overlap : int, optional
+        The number of array elements by which the frames should overlap
+    axis : int, optional
+        The axis to operate on; if None, act on the flattened array
+    end : {'cut', 'wrap', 'end'}, optional
+        What to do with the last frame, if the array is not evenly
+        divisible into pieces. 
+
+            - 'cut'   Simply discard the extra values
+            - 'wrap'  Copy values from the beginning of the array
+            - 'pad'   Pad with a constant value
+
+    endvalue : object
+        The value to use for end='pad'
+
+
+    Examples
+    --------
+    >>> segment_axis(arange(10), 4, 2)
+    array([[0, 1, 2, 3],
+           [2, 3, 4, 5],
+           [4, 5, 6, 7],
+           [6, 7, 8, 9]])
+
+    Notes
+    -----
+    The array is not copied unless necessary (either because it is
+    unevenly strided and being flattened or because end is set to
+    'pad' or 'wrap').
+
+    use as_strided
+
+    """
+
+    if axis is None:
+        a = np.ravel(a) # may copy
+        axis = 0
+
+    l = a.shape[axis]
+
+    if overlap>=length:
+        raise ValueError, "frames cannot overlap by more than 100%"
+    if overlap<0 or length<=0:
+        raise ValueError, "overlap must be nonnegative and length must be "\
+                          "positive"
+
+    if l<length or (l-length)%(length-overlap):
+        if l>length:
+            roundup = length + \
+                      (1+(l-length)//(length-overlap))*(length-overlap)
+            rounddown = length + \
+                        ((l-length)//(length-overlap))*(length-overlap)
+        else:
+            roundup = length
+            rounddown = 0
+        assert rounddown<l<roundup
+        assert roundup==rounddown+(length-overlap) or \
+               (roundup==length and rounddown==0)
+        a = a.swapaxes(-1,axis)
+
+        if end=='cut':
+            a = a[...,:rounddown]
+        elif end in ['pad','wrap']: # copying will be necessary
+            s = list(a.shape)
+            s[-1]=roundup
+            b = np.empty(s,dtype=a.dtype)
+            b[...,:l] = a
+            if end=='pad':
+                b[...,l:] = endvalue
+            elif end=='wrap':
+                b[...,l:] = a[...,:roundup-l]
+            a = b
+
+        a = a.swapaxes(-1,axis)
+
+
+    l = a.shape[axis]
+    if l==0:
+        raise ValueError, "Not enough data points to segment array in 'cut' "\
+                          "mode; try 'pad' or 'wrap'"
+    assert l>=length
+    assert (l-length)%(length-overlap) == 0
+    n = 1+(l-length)//(length-overlap)
+    s = a.strides[axis]
+    newshape = a.shape[:axis] + (n,length) + a.shape[axis+1:]
+    newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \
+                 a.strides[axis+1:]
+
+    try:
+        return as_strided(a, strides=newstrides, shape=newshape)
+    except TypeError:
+        warnings.warn("Problem with ndarray creation forces copy.")
+        a = a.copy()
+        # Shape doesn't change but strides does
+        newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \
+                     a.strides[axis+1:]
+        return as_strided(a, strides=newstrides, shape=newshape)