From c062007585a47904415551ebe95b6610d4efd6da Mon Sep 17 00:00:00 2001
From: Vincent Dumoulin <vi.dumoulin@gmail.com>
Date: Sun, 9 Feb 2014 16:15:57 -0500
Subject: [PATCH] Start working on TIMIT dataset for Pylearn2

---
 code/pylearn2/__init__.py          |   0
 code/pylearn2/datasets/__init__.py |   0
 code/pylearn2/datasets/timit.py    | 119 ++++++++
 code/pylearn2/utils/__init__.py    |   0
 code/pylearn2/utils/iteration.py   |   0
 code/scripts/segmentaxis.py        | 110 +++++++
 code/scripts/timit.py              | 474 +++++++++++++++++++++++++++++
 7 files changed, 703 insertions(+)
 create mode 100644 code/pylearn2/__init__.py
 create mode 100644 code/pylearn2/datasets/__init__.py
 create mode 100644 code/pylearn2/datasets/timit.py
 create mode 100644 code/pylearn2/utils/__init__.py
 create mode 100644 code/pylearn2/utils/iteration.py
 create mode 100644 code/scripts/segmentaxis.py
 create mode 100644 code/scripts/timit.py

diff --git a/code/pylearn2/__init__.py b/code/pylearn2/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/code/pylearn2/datasets/__init__.py b/code/pylearn2/datasets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/code/pylearn2/datasets/timit.py b/code/pylearn2/datasets/timit.py
new file mode 100644
index 0000000..a7719ab
--- /dev/null
+++ b/code/pylearn2/datasets/timit.py
@@ -0,0 +1,119 @@
+"""
+Pylearn2 wrapper for the TIMIT dataset
+"""
+__authors__ = ["Vincent Dumoulin"]
+__copyright__ = "Copyright 2014, Universite de Montreal"
+__credits__ = ["Laurent Dinh", "Vincent Dumoulin"]
+__license__ = "3-clause BSD"
+__maintainer__ = "Vincent Dumoulin"
+__email__ = "dumouliv@iro"
+
+
+import os.path
+import cPickle
+import numpy
+from pylearn2.utils import serial
+from pylearn2.utils.iteration import resolve_iterator_class
+from pylearn2.datasets.dataset import Dataset
+from research.code.scripts.segmentaxis import segment_axis
+
+
+class TIMIT(Dataset):
+    """
+    TIMIT dataset
+    """
+    _default_seed = (17, 2, 946)
+
+    def __init__(self, which_set, frame_length, overlap=0,
+                 frames_per_example=1, rng=_default_seed):
+        """
+        Parameters
+        ----------
+        which_set : str
+            Either "train", "valid" or "test"
+        frame_length : int
+            Number of acoustic samples contained in a frame
+        overlap : int, optional
+            Number of overlapping acoustic samples for two consecutive frames.
+            Defaults to 0, meaning frames don't overlap.
+        frames_per_example : int, optional
+            Number of frames in a training example. Defaults to 1.
+        rng : object, optional
+            A random number generator used for picking random indices into the
+            design matrix when choosing minibatches.
+        """
+        # Check which_set
+        if which_set not in ['train', 'valid', 'test']:
+            raise ValueError(which_set + " is not a recognized value. " +
+                             "Valid values are ['train', 'valid', 'test'].")
+        
+        self.frame_length = frame_length
+        self.overlap = overlap
+        self.frames_per_example = frames_per_example
+
+        # Create file paths
+        timit_base_path = os.path.join(os.environ["PYLEARN2_DATA_PATH"],
+                                       "timit/readable")
+        speaker_info_list_path = os.path.join(timit_base_path, "spkrinfo.npy")
+        phonemes_list_path = os.path.join(timit_base_path,
+                                          "reduced_phonemes.pkl")
+        words_list_path = os.path.join(timit_base_path, "words.pkl")
+        speaker_features_list_path = os.path.join(timit_base_path,
+                                                  "spkr_feature_names.pkl")
+        speaker_id_list_path = os.path.join(timit_base_path,
+                                            "speakers_ids.pkl")
+        raw_wav_path = os.path.join(timit_base_path, which_set + "_x_raw.npy")
+        phonemes_path = os.path.join(timit_base_path,
+                                     which_set + "_redux_phn.npy")
+        sequences_to_phonemes_path = os.path.join(timit_base_path,
+                                                  which_set +
+                                                  "_seq_to_phn.npy")
+        words_path = os.path.join(timit_base_path, which_set + "_wrd.npy")
+        sequences_to_words_path = os.path.join(timit_base_path,
+                                               which_set + "_seq_to_wrd.npy")
+        speaker_path = os.path.join(timit_base_path,
+                                    which_set + "_spkr.npy")
+        
+        # Load data
+        self.speaker_info_list = serial.load(speaker_info_list_path).tolist().toarray()
+        self.speaker_id_list = serial.load(speaker_id_list_path)
+        self.speaker_features_list = serial.load(speaker_features_list_path)
+        self.words_list = serial.load(words_list_path)
+        self.phonemes_list = serial.load(phonemes_list_path)
+        self.raw_wav = serial.load(raw_wav_path)
+        self.phonemes = serial.load(phonemes_path) 
+        self.sequences_to_phonemes = serial.load(sequences_to_phonemes_path)
+        self.words = serial.load(words_path) 
+        sequences_to_words = serial.load(sequences_to_words_path)
+        speaker_id = numpy.asarray(serial.load(speaker_path), 'int')
+
+        # Transform data in DenseDesignMatrix format
+        visiting_order = []
+        for i, sequence in enumerate(self.raw_wav):
+            segmented_sequence = segment_axis(sequence, self.frame_length,
+                                              self.overlap)
+            self.raw_wav[i] = segmented_sequence
+            for j in xrange(0, segmented_sequence.shape[0] - self.frames_per_example):
+                visiting_order.append((i, j))
+        self.visiting_order = visiting_order
+
+        # DataSpecs
+        X_space = VectorSpace(dim=self.frame_length * self.frames_per_example)
+        X_source = 'features'
+        y_space = VectorSpace(dim=self.frame_length)
+        y_source = 'targets'
+        space = CompositeSpace((X_space, y_space))
+        source = (X_source, y_source)
+        self.data_specs = (space, source)
+
+    def get_data_specs(self):
+        """
+        Returns the data_specs specifying how the data is internally stored.
+
+        This is the format the data returned by `self.get_data()` will be.
+        """
+        return self.data_specs
+
+
+if __name__ == "__main__":
+    timit = TIMIT("train", frame_length=20, overlap=10, frames_per_example=4)
diff --git a/code/pylearn2/utils/__init__.py b/code/pylearn2/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/code/pylearn2/utils/iteration.py b/code/pylearn2/utils/iteration.py
new file mode 100644
index 0000000..e69de29
diff --git a/code/scripts/segmentaxis.py b/code/scripts/segmentaxis.py
new file mode 100644
index 0000000..1bba1c7
--- /dev/null
+++ b/code/scripts/segmentaxis.py
@@ -0,0 +1,110 @@
+import numpy as np
+from numpy.lib.stride_tricks import as_strided
+
+def segment_axis(a, length, overlap=0, axis=None, end='cut', endvalue=0):
+    """Generate a new array that chops the given array along the given axis
+    into overlapping frames.
+
+    Parameters
+    ----------
+    a : array-like
+        The array to segment
+    length : int
+        The length of each frame
+    overlap : int, optional
+        The number of array elements by which the frames should overlap
+    axis : int, optional
+        The axis to operate on; if None, act on the flattened array
+    end : {'cut', 'wrap', 'end'}, optional
+        What to do with the last frame, if the array is not evenly
+        divisible into pieces. 
+
+            - 'cut'   Simply discard the extra values
+            - 'wrap'  Copy values from the beginning of the array
+            - 'pad'   Pad with a constant value
+
+    endvalue : object
+        The value to use for end='pad'
+
+
+    Examples
+    --------
+    >>> segment_axis(arange(10), 4, 2)
+    array([[0, 1, 2, 3],
+           [2, 3, 4, 5],
+           [4, 5, 6, 7],
+           [6, 7, 8, 9]])
+
+    Notes
+    -----
+    The array is not copied unless necessary (either because it is
+    unevenly strided and being flattened or because end is set to
+    'pad' or 'wrap').
+
+    use as_strided
+
+    """
+
+    if axis is None:
+        a = np.ravel(a) # may copy
+        axis = 0
+
+    l = a.shape[axis]
+
+    if overlap>=length:
+        raise ValueError, "frames cannot overlap by more than 100%"
+    if overlap<0 or length<=0:
+        raise ValueError, "overlap must be nonnegative and length must be "\
+                          "positive"
+
+    if l<length or (l-length)%(length-overlap):
+        if l>length:
+            roundup = length + \
+                      (1+(l-length)//(length-overlap))*(length-overlap)
+            rounddown = length + \
+                        ((l-length)//(length-overlap))*(length-overlap)
+        else:
+            roundup = length
+            rounddown = 0
+        assert rounddown<l<roundup
+        assert roundup==rounddown+(length-overlap) or \
+               (roundup==length and rounddown==0)
+        a = a.swapaxes(-1,axis)
+
+        if end=='cut':
+            a = a[...,:rounddown]
+        elif end in ['pad','wrap']: # copying will be necessary
+            s = list(a.shape)
+            s[-1]=roundup
+            b = np.empty(s,dtype=a.dtype)
+            b[...,:l] = a
+            if end=='pad':
+                b[...,l:] = endvalue
+            elif end=='wrap':
+                b[...,l:] = a[...,:roundup-l]
+            a = b
+
+        a = a.swapaxes(-1,axis)
+
+
+    l = a.shape[axis]
+    if l==0:
+        raise ValueError, "Not enough data points to segment array in 'cut' "\
+                          "mode; try 'pad' or 'wrap'"
+    assert l>=length
+    assert (l-length)%(length-overlap) == 0
+    n = 1+(l-length)//(length-overlap)
+    s = a.strides[axis]
+    newshape = a.shape[:axis] + (n,length) + a.shape[axis+1:]
+    newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \
+                 a.strides[axis+1:]
+
+    try:
+        return as_strided(a, strides=newstrides, shape=newshape)
+    except TypeError:
+        warnings.warn("Problem with ndarray creation forces copy.")
+        a = a.copy()
+        # Shape doesn't change but strides does
+        newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \
+                     a.strides[axis+1:]
+        return as_strided(a, strides=newstrides, shape=newshape)
diff --git a/code/scripts/timit.py b/code/scripts/timit.py
new file mode 100644
index 0000000..409b01f
--- /dev/null
+++ b/code/scripts/timit.py
@@ -0,0 +1,474 @@
+import numpy as np
+import os
+import os.path
+import cPickle
+from exceptions import *
+from research.code.scripts.segmentaxis import segment_axis
+import scipy.stats
+
+
+class TIMIT(object):
+    """
+    This class will encapsulate the interactions that we will have with TIMIT.
+    You should have the environment variable MUMBLER_DATA_PATH set. One way to 
+    do this is to put 'export MUMBLER_DATA_PATH=/path/to/your/datasets/folder/' 
+    in your .bashrc file so that $MUMBLER_DATA_PATH/readable_timit link to 
+    /data/lisa/data/timit/readable
+    
+    """
+    def __init__(self, mmap_mode = None):
+        """
+        Initialize the TIMIT class. 
+        """
+        timit_path = os.path.join(os.environ["PYLEARN2_DATA_PATH"], \
+                                  "timit")
+        
+        if os.path.isdir(timit_path):
+            self.timit_path = timit_path
+        else:
+            raise IOError(timit_path + " is not a valid path !")
+        
+        self.has_train = False
+        self.has_valid = False
+        self.has_test = False
+        
+        spkrinfo_path = os.path.join(self.timit_path, "spkrinfo.npy")
+        phns_path = os.path.join(self.timit_path, "reduced_phonemes.pkl")
+        wrds_path = os.path.join(self.timit_path, "words.pkl")
+        spkrfeat_path = os.path.join(self.timit_path, "spkr_feature_names.pkl")
+        spkrid_path = os.path.join(self.timit_path, "speakers_ids.pkl")
+        
+        for p in [spkrinfo_path, wrds_path, phns_path, spkrfeat_path, \
+                  spkrid_path]:
+            if not os.path.isfile(p):
+                raise IOError(p + " is not a valid path !")
+        
+        ## Speaker information
+        print "Loading speaker information...", 
+        self.spkrinfo = np.load(spkrinfo_path).tolist().toarray()
+        print "Done !"
+        # print str(self.spkrinfo.shape[0]) + " different speakers."
+        
+        print "Loading speakers list...", 
+        self.spkrid = cPickle.load(open(spkrid_path, "r"))
+        print "Done !"
+        print str(len(self.spkrid)) + " different speakers."
+        
+        print "Loading speakers list...", 
+        self.spkrfeat = cPickle.load(open(spkrfeat_path, "r"))
+        print "Done !"
+        print str(len(self.spkrfeat)) + " different features per speaker."
+        
+        # Words
+        print "Loading words list...", 
+        self.words = cPickle.load(open(wrds_path, "r"))
+        print "Done !"
+        print str(len(self.words)) + " different word."
+        
+        # Phonemes
+        print "Loading phonemes list...", 
+        self.phonemes = np.load(open(phns_path, "r"))
+        print "Done !"
+        print str(len(self.phonemes)) + " different phonemes."
+        
+        
+    def load(self, subset):
+        """
+        Extract the data from the files given the path of the preprocessed 
+        TIMIT. It also prints some information on the dataset. 
+        timit_path: path to the preprocessed TIMIT. 
+        subset: either "train", "valid" or "test".
+        """
+        self.check_subset_value(subset)
+        
+        print "Loading dataset subset."
+        # Build paths
+        print "Building paths...", 
+        raw_wav_path = os.path.join(self.timit_path, subset+"_x_raw.npy")
+        phn_path = os.path.join(self.timit_path, subset+"_redux_phn.npy")
+        seq_to_phn_path = os.path.join(self.timit_path, \
+                                       subset+"_seq_to_phn.npy")
+        wrd_path = os.path.join(self.timit_path, subset+"_wrd.npy")
+        seq_to_wrd_path = os.path.join(self.timit_path, \
+                                       subset+"_seq_to_wrd.npy")
+        spkr_path = os.path.join(self.timit_path, subset+"_spkr.npy")
+        print "Done !"
+        
+        # Checking the validity of the paths
+        print "Checking path validity...", 
+        for p in [raw_wav_path, phn_path, seq_to_phn_path, wrd_path, \
+                  seq_to_wrd_path, spkr_path]:
+            if not os.path.isfile(p):
+                raise IOError(p + " is not a valid path !")
+        
+        print "Done !"
+        
+        # Acoustic samples
+        print "Loading accoustic samples...", 
+        raw_wav = np.load(raw_wav_path)
+        raw_wav_len = map(lambda x:len(x), raw_wav)
+        print "Done !"
+        print str(raw_wav.shape[0]) + " sentences."
+        
+        # Side information
+        ## Phonemes
+        print "Loading phonemes...", 
+        phn = np.load(phn_path) 
+        seq_to_phn = np.load(seq_to_phn_path)
+        print "Done !"
+        
+        ## Words
+        print "Loading words...", 
+        wrd = np.load(wrd_path) 
+        seq_to_wrd = np.load(seq_to_wrd_path)
+        print "Done !"
+        
+        ## Speaker information
+        print "Loading speaker information...", 
+        spkr_id = np.asarray(np.load(spkr_path), 'int')
+        print "Done !"
+        
+        
+        data = {}
+        data[subset+"_raw_wav"] = raw_wav
+        data[subset+"_raw_wav_len"] = raw_wav_len
+        data[subset+"_n_seq"] = raw_wav.shape[0]
+        data[subset+"_phn"] = phn
+        data[subset+"_seq_to_phn"] = seq_to_phn
+        data[subset+"_wrd"] = wrd
+        data[subset+"_seq_to_wrd"] = seq_to_wrd
+        data[subset+"_spkr"] = spkr_id
+        
+        # Raise the flag advertising the presence of data
+        data["has_"+subset] = True
+        
+        self.__dict__.update(data)
+        
+        self.sanity_check(subset)
+    
+    def clear(self, subset):
+        """
+        Given the subset id, this method will unload the subset from the class. 
+        
+        """
+        self.check_subset_value(subset)
+        self.check_subset_presence(subset)
+        
+        
+        del self.__dict__[subset+"_raw_wav"]
+        del self.__dict__[subset+"_raw_wav_len"]
+        del self.__dict__[subset+"_n_seq"]
+        del self.__dict__[subset+"_phn"]
+        del self.__dict__[subset+"_seq_to_phn"]
+        del self.__dict__[subset+"_wrd"]
+        del self.__dict__[subset+"_seq_to_wrd"]
+        del self.__dict__[subset+"_spkr"]
+        
+        # Lower the flag advertising the presence of data
+        data["has_"+subset] = False
+    
+    def check_subset_value(self, subset):
+        if subset not in {"train", "valid", "test"}:
+            raise ValueError("Invalid subset !")
+    
+    def check_subset_presence(self, subset):
+        if not self.__dict__["has_"+subset]:
+            raise AssertionError("The data was not loaded yet !")
+    
+    def sanity_check(self, subset):
+        """
+        Test of a given set for the consistency of our hypotheses. 
+        
+        """
+        self.check_subset_value(subset)
+        self.check_subset_presence(subset)
+        
+        print "Check the number of speakers..."
+        if self.spkrinfo.shape[0] == len(self.spkrid):
+            print "OK."
+        else:
+            print "KO."
+        
+        print "Check lengths..."
+        short = ["phn", "wrd"]
+        long = ["phonemes", "words"]
+        for i in range(len(short)):
+            if self.__dict__[subset+"_seq_to_"+short[i]][-1,-1] == \
+               self.__dict__[subset+"_"+short[i]].shape[0]:
+                print "OK for "+long[i]+"."
+            else:
+                print "KO for "+long[i]+"."
+        
+        print "Check multinomial constraints..."
+        feature_name = ["dialect", "education", "race", "sex"]
+        feature_interval = [(1,9), (9,15), (16,24), (24,26)]
+        for i in range(len(feature_name)):
+            start = feature_interval[i][0]
+            end = feature_interval[i][1]
+            if self.spkrinfo[:,start:end].sum() == self.spkrinfo.shape[0]:
+                print "OK for "+feature_name[i]+"."
+            else:
+                print "KO for "+feature_name[i]+"."
+    
+    """
+    This section is about extracting sequences of varying size.
+    
+    """
+    
+    def get_raw_seq(self, subset, seq_id, frame_length, overlap):
+        """
+        Given the id of the subset, the id of the sequence, the frame length and 
+        the overlap between frames, this method will return a frames sequence 
+        from a given set, the associated phonemes and words sequences (including 
+        a binary variable indicating change) and the information vector on the 
+        speaker.
+        
+        """
+        self.check_subset_value(subset)
+        self.check_subset_presence(subset)
+        
+        # Check if the id is valid
+        n_seq = self.__dict__[subset+"_n_seq"]
+        if seq_id >= n_seq:
+            raise ValueError("This sequence does not exist.")
+        
+        # Get the sequence
+        wav_seq = self.__dict__[subset+"_raw_wav"][seq_id]
+        
+        # Get the phonemes
+        phn_l_start = self.__dict__[subset+"_seq_to_phn"][seq_id][0]
+        phn_l_end = self.__dict__[subset+"_seq_to_phn"][seq_id][1]
+        phn_start_end = self.__dict__[subset+"_phn"][phn_l_start:phn_l_end]
+        phn_seq = np.zeros_like(wav_seq)
+        # Some timestamp does not correspond to any phoneme so 0 is 
+        # the index for "NO_PHONEME" and the other index are shifted by one
+        for (phn_start, phn_end, phn) in phn_start_end:
+            phn_seq[phn_start:phn_end] = phn+1
+        
+        # Get the words
+        wrd_l_start = self.__dict__[subset+"_seq_to_wrd"][seq_id][0]
+        wrd_l_end = self.__dict__[subset+"_seq_to_wrd"][seq_id][1]
+        wrd_start_end = self.__dict__[subset+"_wrd"][wrd_l_start:wrd_l_end]
+        wrd_seq = np.zeros_like(wav_seq)
+        # Some timestamp does not correspond to any word so 0 is 
+        # the index for "NO_WORD" and the other index are shifted by one
+        for (wrd_start, wrd_end, wrd) in wrd_start_end:
+            wrd_seq[wrd_start:wrd_end] = wrd+1
+        
+        # Binary variable announcing the end of the word or phoneme
+        end_phn = np.zeros_like(phn_seq)
+        end_wrd = np.zeros_like(wrd_seq)
+        
+        for i in range(len(phn_seq) - 1):
+            if phn_seq[i] != phn_seq[i+1]:
+                end_phn[i] = 1
+            if wrd_seq[i] != wrd_seq[i+1]:
+                end_wrd[i] = 1
+        
+        end_phn[-1] = 1
+        end_wrd[-1] = 1
+        
+        # Find the speaker id
+        spkr_id = self.__dict__[subset+"_spkr"][seq_id]
+        # Find the speaker info
+        spkr_info = self.spkrinfo[spkr_id]
+        
+        # Segment into frames
+        wav_seq = segment_axis(wav_seq, frame_length, overlap)
+        
+        # Take the most occurring phoneme in a sequence
+        phn_seq = segment_axis(phn_seq, frame_length, overlap)
+        phn_seq = scipy.stats.mode(phn_seq, axis=1)[0].flatten()
+        phn_seq = np.asarray(phn_seq, dtype='int')
+        
+        # Take the most occurring word in a sequence
+        wrd_seq = segment_axis(wrd_seq, frame_length, overlap)
+        wrd_seq = scipy.stats.mode(wrd_seq, axis=1)[0].flatten()
+        wrd_seq = np.asarray(wrd_seq, dtype='int')
+        
+        # Announce the end if and only if it was announced in the current frame
+        end_phn = segment_axis(end_phn, frame_length, overlap)
+        end_phn = end_phn.max(axis=1)
+        end_wrd = segment_axis(end_wrd, frame_length, overlap)
+        end_wrd = end_wrd.max(axis=1)
+        
+        return [wav_seq, phn_seq, end_phn, wrd_seq, end_wrd, spkr_info]
+    
+    def get_n_seq(self, subset):
+        """
+        Given the subset id, return the number of sequence in it.
+        
+        """
+        self.check_subset_value(subset)
+        self.check_subset_presence(subset)
+        
+        return self.__dict__[subset+"_n_seq"]
+    
+    """
+    This section is about extracting sequences of fixed size. 
+    
+    """
+    
+    def init_markov_frames(self, subset, n_frames_in, frame_length, overlap):
+        """
+        Given the subset id, the frame length, the overlap between frames and 
+        the number of frames we take as input to predict the next, this method 
+        initializes the get_markov_frames method
+        
+        """
+        self.check_subset_value(subset)
+        self.check_subset_presence(subset)
+        
+        # Compute the required length to build a frame sequence of fixed size
+        wav_length = n_frames_in*(frame_length - overlap) + frame_length
+        
+        # Compute the number of unique frame sequence we can extract from a 
+        # acoustic samples sequence
+        actual_seq_length = np.array(self.__dict__[subset+"_raw_wav_len"]) \
+                            - (frame_length - overlap) + 1
+        
+        self.__dict__[subset+"_n_frames_in"] = n_frames_in
+        self.__dict__[subset+"_frame_length"] = frame_length
+        self.__dict__[subset+"_overlap"] = overlap
+        self.__dict__[subset+"_wav_length"] = wav_length
+        
+        self.__dict__[subset+"_intervals_seq"] = \
+                                    np.zeros((actual_seq_length.shape[0] + 1))
+        self.__dict__[subset+"_intervals_seq"][1:] = \
+                                    np.cumsum(actual_seq_length)
+    
+    def get_markov_frames(self, subset, id):
+        """
+        Given the subset and an id, this method returns the list [input_frames, 
+        input_phonemes, input_words, output_phoneme, output_word, spkr_info, 
+        output_frame, ending_phoneme, ending_word]. 
+        
+        """
+        assert subset+"_intervals_seq" in self.__dict__.keys()
+        assert id < self.__dict__[subset+"_intervals_seq"][-1]
+        
+        n_frames_in = self.__dict__[subset+"_n_frames_in"]
+        frame_length = self.__dict__[subset+"_frame_length"]
+        overlap = self.__dict__[subset+"_overlap"]
+        wav_length = self.__dict__[subset+"_wav_length"]
+        intervals_seq = self.__dict__[subset+"_intervals_seq"]
+        
+        # Find the acoustic samples sequence we are looking for
+        seq_id = np.digitize([id], intervals_seq) - 1
+        seq_id = seq_id[0]
+        
+        # Find the position in this sequence
+        idx_in_seq = id - intervals_seq[seq_id] - (wav_length - frame_length \
+                     + overlap)
+            
+        
+        # Get the sequence
+        wav_seq = self.__dict__[subset+"_raw_wav"][seq_id]
+        
+        # Get the phonemes
+        phn_l_start = self.__dict__[subset+"_seq_to_phn"][seq_id][0]
+        phn_l_end = self.__dict__[subset+"_seq_to_phn"][seq_id][1]
+        phn_start_end = self.__dict__[subset+"_phn"][phn_l_start:phn_l_end]
+        phn_seq = np.zeros_like(wav_seq)
+        # Some timestamp does not correspond to any phoneme so 0 is 
+        # the index for "NO_PHONEME" and the other index are shifted by one
+        for (phn_start, phn_end, phn) in phn_start_end:
+            phn_seq[phn_start:phn_end] = phn+1
+        
+        # Get the words
+        wrd_l_start = self.__dict__[subset+"_seq_to_wrd"][seq_id][0]
+        wrd_l_end = self.__dict__[subset+"_seq_to_wrd"][seq_id][1]
+        wrd_start_end = self.__dict__[subset+"_wrd"][wrd_l_start:wrd_l_end]
+        wrd_seq = np.zeros_like(wav_seq)
+        # Some timestamp does not correspond to any word so 0 is 
+        # the index for "NO_WORD" and the other index are shifted by one
+        for (wrd_start, wrd_end, wrd) in wrd_start_end:
+            wrd_seq[wrd_start:wrd_end] = wrd+1
+        
+        # Binary variable announcing the end of the word or phoneme
+        end_phn = np.zeros_like(phn_seq)
+        end_wrd = np.zeros_like(wrd_seq)
+        
+        for i in range(len(phn_seq) - 1):
+            if phn_seq[i] != phn_seq[i+1]:
+                end_phn[i] = 1
+            if wrd_seq[i] != wrd_seq[i+1]:
+                end_wrd[i] = 1
+        
+        end_phn[-1] = 1
+        end_wrd[-1] = 1
+        
+        # Find the speaker id
+        spkr_id = self.__dict__[subset+"_spkr"][seq_id]
+        # Find the speaker info
+        spkr_info = self.spkrinfo[spkr_id]
+        
+        # Pick the selected segment
+        padded_wav_seq = np.zeros((wav_length))
+        if idx_in_seq < 0:
+            padded_wav_seq[-idx_in_seq:] = wav_seq[0:(wav_length+idx_in_seq)]
+        else:
+            padded_wav_seq = wav_seq[idx_in_seq:(idx_in_seq + wav_length)]
+        
+        padded_phn_seq = np.zeros((wav_length))
+        if idx_in_seq < 0:
+            padded_phn_seq[-idx_in_seq:] = phn_seq[0:(wav_length+idx_in_seq)]
+        else:
+            padded_phn_seq = phn_seq[idx_in_seq:(idx_in_seq + wav_length)]
+        
+        padded_wrd_seq = np.zeros((wav_length))
+        if idx_in_seq < 0:
+            padded_wrd_seq[-idx_in_seq:] = wrd_seq[0:(wav_length+idx_in_seq)]
+        else:
+            padded_wrd_seq = wrd_seq[idx_in_seq:(idx_in_seq + wav_length)]
+        
+        # Segment into frames
+        wav_seq = segment_axis(padded_wav_seq, frame_length, overlap)
+        
+        # Take the most occurring phoneme in a sequence
+        phn_seq = segment_axis(padded_phn_seq, frame_length, overlap)
+        phn_seq = scipy.stats.mode(phn_seq, axis=1)[0].flatten()
+        phn_seq = np.asarray(phn_seq, dtype='int')
+        
+        # Take the most occurring word in a sequence
+        wrd_seq = segment_axis(padded_wrd_seq, frame_length, overlap)
+        wrd_seq = scipy.stats.mode(wrd_seq, axis=1)[0].flatten()
+        wrd_seq = np.asarray(wrd_seq, dtype='int')
+        
+        # Announce the end if and only if it was announced in the current frame
+        end_phn = segment_axis(end_phn, frame_length, overlap)
+        end_phn = end_phn.max(axis=1)
+        end_wrd = segment_axis(end_wrd, frame_length, overlap)
+        end_wrd = end_wrd.max(axis=1)
+        
+        # Put names on the output
+        input_frames = wav_seq[:-1]
+        input_phonemes = phn_seq[:-1]
+        input_words = wrd_seq[:-1]
+        output_phoneme = phn_seq[-1]
+        output_word = wrd_seq[-1]
+        output_frame = wav_seq[-1]
+        ending_phoneme = end_phn[-1]
+        ending_word = end_wrd[-1]
+        
+        return [input_frames, input_phonemes, input_words, output_phoneme, \
+                output_word, spkr_info, output_frame, ending_phoneme, \
+                ending_word]
+
+    def get_n_markov_frames(self, subset):
+        """
+        Given the subset id, return the number of frame segments of fixed size 
+        in it.
+        
+        """
+        self.check_subset_value(subset)
+        self.check_subset_presence(subset)
+        assert subset+"_intervals_seq" in self.__dict__.keys()
+        
+        return self.__dict__[subset+"_intervals_seq"][-1]
+
+
+if __name__ == "__main__":
+    timit = TIMIT()
+
+    import pdb; pdb.set_trace()