From c062007585a47904415551ebe95b6610d4efd6da Mon Sep 17 00:00:00 2001 From: Vincent Dumoulin Date: Sun, 9 Feb 2014 16:15:57 -0500 Subject: [PATCH] Start working on TIMIT dataset for Pylearn2 --- code/pylearn2/__init__.py | 0 code/pylearn2/datasets/__init__.py | 0 code/pylearn2/datasets/timit.py | 119 ++++++++ code/pylearn2/utils/__init__.py | 0 code/pylearn2/utils/iteration.py | 0 code/scripts/segmentaxis.py | 110 +++++++ code/scripts/timit.py | 474 +++++++++++++++++++++++++++++ 7 files changed, 703 insertions(+) create mode 100644 code/pylearn2/__init__.py create mode 100644 code/pylearn2/datasets/__init__.py create mode 100644 code/pylearn2/datasets/timit.py create mode 100644 code/pylearn2/utils/__init__.py create mode 100644 code/pylearn2/utils/iteration.py create mode 100644 code/scripts/segmentaxis.py create mode 100644 code/scripts/timit.py diff --git a/code/pylearn2/__init__.py b/code/pylearn2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/code/pylearn2/datasets/__init__.py b/code/pylearn2/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/code/pylearn2/datasets/timit.py b/code/pylearn2/datasets/timit.py new file mode 100644 index 0000000..a7719ab --- /dev/null +++ b/code/pylearn2/datasets/timit.py @@ -0,0 +1,119 @@ +""" +Pylearn2 wrapper for the TIMIT dataset +""" +__authors__ = ["Vincent Dumoulin"] +__copyright__ = "Copyright 2014, Universite de Montreal" +__credits__ = ["Laurent Dinh", "Vincent Dumoulin"] +__license__ = "3-clause BSD" +__maintainer__ = "Vincent Dumoulin" +__email__ = "dumouliv@iro" + + +import os.path +import cPickle +import numpy +from pylearn2.utils import serial +from pylearn2.utils.iteration import resolve_iterator_class +from pylearn2.datasets.dataset import Dataset +from research.code.scripts.segmentaxis import segment_axis + + +class TIMIT(Dataset): + """ + TIMIT dataset + """ + _default_seed = (17, 2, 946) + + def __init__(self, which_set, frame_length, overlap=0, + frames_per_example=1, rng=_default_seed): + """ + Parameters + ---------- + which_set : str + Either "train", "valid" or "test" + frame_length : int + Number of acoustic samples contained in a frame + overlap : int, optional + Number of overlapping acoustic samples for two consecutive frames. + Defaults to 0, meaning frames don't overlap. + frames_per_example : int, optional + Number of frames in a training example. Defaults to 1. + rng : object, optional + A random number generator used for picking random indices into the + design matrix when choosing minibatches. + """ + # Check which_set + if which_set not in ['train', 'valid', 'test']: + raise ValueError(which_set + " is not a recognized value. " + + "Valid values are ['train', 'valid', 'test'].") + + self.frame_length = frame_length + self.overlap = overlap + self.frames_per_example = frames_per_example + + # Create file paths + timit_base_path = os.path.join(os.environ["PYLEARN2_DATA_PATH"], + "timit/readable") + speaker_info_list_path = os.path.join(timit_base_path, "spkrinfo.npy") + phonemes_list_path = os.path.join(timit_base_path, + "reduced_phonemes.pkl") + words_list_path = os.path.join(timit_base_path, "words.pkl") + speaker_features_list_path = os.path.join(timit_base_path, + "spkr_feature_names.pkl") + speaker_id_list_path = os.path.join(timit_base_path, + "speakers_ids.pkl") + raw_wav_path = os.path.join(timit_base_path, which_set + "_x_raw.npy") + phonemes_path = os.path.join(timit_base_path, + which_set + "_redux_phn.npy") + sequences_to_phonemes_path = os.path.join(timit_base_path, + which_set + + "_seq_to_phn.npy") + words_path = os.path.join(timit_base_path, which_set + "_wrd.npy") + sequences_to_words_path = os.path.join(timit_base_path, + which_set + "_seq_to_wrd.npy") + speaker_path = os.path.join(timit_base_path, + which_set + "_spkr.npy") + + # Load data + self.speaker_info_list = serial.load(speaker_info_list_path).tolist().toarray() + self.speaker_id_list = serial.load(speaker_id_list_path) + self.speaker_features_list = serial.load(speaker_features_list_path) + self.words_list = serial.load(words_list_path) + self.phonemes_list = serial.load(phonemes_list_path) + self.raw_wav = serial.load(raw_wav_path) + self.phonemes = serial.load(phonemes_path) + self.sequences_to_phonemes = serial.load(sequences_to_phonemes_path) + self.words = serial.load(words_path) + sequences_to_words = serial.load(sequences_to_words_path) + speaker_id = numpy.asarray(serial.load(speaker_path), 'int') + + # Transform data in DenseDesignMatrix format + visiting_order = [] + for i, sequence in enumerate(self.raw_wav): + segmented_sequence = segment_axis(sequence, self.frame_length, + self.overlap) + self.raw_wav[i] = segmented_sequence + for j in xrange(0, segmented_sequence.shape[0] - self.frames_per_example): + visiting_order.append((i, j)) + self.visiting_order = visiting_order + + # DataSpecs + X_space = VectorSpace(dim=self.frame_length * self.frames_per_example) + X_source = 'features' + y_space = VectorSpace(dim=self.frame_length) + y_source = 'targets' + space = CompositeSpace((X_space, y_space)) + source = (X_source, y_source) + self.data_specs = (space, source) + + def get_data_specs(self): + """ + Returns the data_specs specifying how the data is internally stored. + + This is the format the data returned by `self.get_data()` will be. + """ + return self.data_specs + + +if __name__ == "__main__": + timit = TIMIT("train", frame_length=20, overlap=10, frames_per_example=4) diff --git a/code/pylearn2/utils/__init__.py b/code/pylearn2/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/code/pylearn2/utils/iteration.py b/code/pylearn2/utils/iteration.py new file mode 100644 index 0000000..e69de29 diff --git a/code/scripts/segmentaxis.py b/code/scripts/segmentaxis.py new file mode 100644 index 0000000..1bba1c7 --- /dev/null +++ b/code/scripts/segmentaxis.py @@ -0,0 +1,110 @@ +import numpy as np +from numpy.lib.stride_tricks import as_strided + +def segment_axis(a, length, overlap=0, axis=None, end='cut', endvalue=0): + """Generate a new array that chops the given array along the given axis + into overlapping frames. + + Parameters + ---------- + a : array-like + The array to segment + length : int + The length of each frame + overlap : int, optional + The number of array elements by which the frames should overlap + axis : int, optional + The axis to operate on; if None, act on the flattened array + end : {'cut', 'wrap', 'end'}, optional + What to do with the last frame, if the array is not evenly + divisible into pieces. + + - 'cut' Simply discard the extra values + - 'wrap' Copy values from the beginning of the array + - 'pad' Pad with a constant value + + endvalue : object + The value to use for end='pad' + + + Examples + -------- + >>> segment_axis(arange(10), 4, 2) + array([[0, 1, 2, 3], + [2, 3, 4, 5], + [4, 5, 6, 7], + [6, 7, 8, 9]]) + + Notes + ----- + The array is not copied unless necessary (either because it is + unevenly strided and being flattened or because end is set to + 'pad' or 'wrap'). + + use as_strided + + """ + + if axis is None: + a = np.ravel(a) # may copy + axis = 0 + + l = a.shape[axis] + + if overlap>=length: + raise ValueError, "frames cannot overlap by more than 100%" + if overlap<0 or length<=0: + raise ValueError, "overlap must be nonnegative and length must be "\ + "positive" + + if llength: + roundup = length + \ + (1+(l-length)//(length-overlap))*(length-overlap) + rounddown = length + \ + ((l-length)//(length-overlap))*(length-overlap) + else: + roundup = length + rounddown = 0 + assert rounddown=length + assert (l-length)%(length-overlap) == 0 + n = 1+(l-length)//(length-overlap) + s = a.strides[axis] + newshape = a.shape[:axis] + (n,length) + a.shape[axis+1:] + newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \ + a.strides[axis+1:] + + try: + return as_strided(a, strides=newstrides, shape=newshape) + except TypeError: + warnings.warn("Problem with ndarray creation forces copy.") + a = a.copy() + # Shape doesn't change but strides does + newstrides = a.strides[:axis] + ((length-overlap)*s, s) + \ + a.strides[axis+1:] + return as_strided(a, strides=newstrides, shape=newshape) diff --git a/code/scripts/timit.py b/code/scripts/timit.py new file mode 100644 index 0000000..409b01f --- /dev/null +++ b/code/scripts/timit.py @@ -0,0 +1,474 @@ +import numpy as np +import os +import os.path +import cPickle +from exceptions import * +from research.code.scripts.segmentaxis import segment_axis +import scipy.stats + + +class TIMIT(object): + """ + This class will encapsulate the interactions that we will have with TIMIT. + You should have the environment variable MUMBLER_DATA_PATH set. One way to + do this is to put 'export MUMBLER_DATA_PATH=/path/to/your/datasets/folder/' + in your .bashrc file so that $MUMBLER_DATA_PATH/readable_timit link to + /data/lisa/data/timit/readable + + """ + def __init__(self, mmap_mode = None): + """ + Initialize the TIMIT class. + """ + timit_path = os.path.join(os.environ["PYLEARN2_DATA_PATH"], \ + "timit") + + if os.path.isdir(timit_path): + self.timit_path = timit_path + else: + raise IOError(timit_path + " is not a valid path !") + + self.has_train = False + self.has_valid = False + self.has_test = False + + spkrinfo_path = os.path.join(self.timit_path, "spkrinfo.npy") + phns_path = os.path.join(self.timit_path, "reduced_phonemes.pkl") + wrds_path = os.path.join(self.timit_path, "words.pkl") + spkrfeat_path = os.path.join(self.timit_path, "spkr_feature_names.pkl") + spkrid_path = os.path.join(self.timit_path, "speakers_ids.pkl") + + for p in [spkrinfo_path, wrds_path, phns_path, spkrfeat_path, \ + spkrid_path]: + if not os.path.isfile(p): + raise IOError(p + " is not a valid path !") + + ## Speaker information + print "Loading speaker information...", + self.spkrinfo = np.load(spkrinfo_path).tolist().toarray() + print "Done !" + # print str(self.spkrinfo.shape[0]) + " different speakers." + + print "Loading speakers list...", + self.spkrid = cPickle.load(open(spkrid_path, "r")) + print "Done !" + print str(len(self.spkrid)) + " different speakers." + + print "Loading speakers list...", + self.spkrfeat = cPickle.load(open(spkrfeat_path, "r")) + print "Done !" + print str(len(self.spkrfeat)) + " different features per speaker." + + # Words + print "Loading words list...", + self.words = cPickle.load(open(wrds_path, "r")) + print "Done !" + print str(len(self.words)) + " different word." + + # Phonemes + print "Loading phonemes list...", + self.phonemes = np.load(open(phns_path, "r")) + print "Done !" + print str(len(self.phonemes)) + " different phonemes." + + + def load(self, subset): + """ + Extract the data from the files given the path of the preprocessed + TIMIT. It also prints some information on the dataset. + timit_path: path to the preprocessed TIMIT. + subset: either "train", "valid" or "test". + """ + self.check_subset_value(subset) + + print "Loading dataset subset." + # Build paths + print "Building paths...", + raw_wav_path = os.path.join(self.timit_path, subset+"_x_raw.npy") + phn_path = os.path.join(self.timit_path, subset+"_redux_phn.npy") + seq_to_phn_path = os.path.join(self.timit_path, \ + subset+"_seq_to_phn.npy") + wrd_path = os.path.join(self.timit_path, subset+"_wrd.npy") + seq_to_wrd_path = os.path.join(self.timit_path, \ + subset+"_seq_to_wrd.npy") + spkr_path = os.path.join(self.timit_path, subset+"_spkr.npy") + print "Done !" + + # Checking the validity of the paths + print "Checking path validity...", + for p in [raw_wav_path, phn_path, seq_to_phn_path, wrd_path, \ + seq_to_wrd_path, spkr_path]: + if not os.path.isfile(p): + raise IOError(p + " is not a valid path !") + + print "Done !" + + # Acoustic samples + print "Loading accoustic samples...", + raw_wav = np.load(raw_wav_path) + raw_wav_len = map(lambda x:len(x), raw_wav) + print "Done !" + print str(raw_wav.shape[0]) + " sentences." + + # Side information + ## Phonemes + print "Loading phonemes...", + phn = np.load(phn_path) + seq_to_phn = np.load(seq_to_phn_path) + print "Done !" + + ## Words + print "Loading words...", + wrd = np.load(wrd_path) + seq_to_wrd = np.load(seq_to_wrd_path) + print "Done !" + + ## Speaker information + print "Loading speaker information...", + spkr_id = np.asarray(np.load(spkr_path), 'int') + print "Done !" + + + data = {} + data[subset+"_raw_wav"] = raw_wav + data[subset+"_raw_wav_len"] = raw_wav_len + data[subset+"_n_seq"] = raw_wav.shape[0] + data[subset+"_phn"] = phn + data[subset+"_seq_to_phn"] = seq_to_phn + data[subset+"_wrd"] = wrd + data[subset+"_seq_to_wrd"] = seq_to_wrd + data[subset+"_spkr"] = spkr_id + + # Raise the flag advertising the presence of data + data["has_"+subset] = True + + self.__dict__.update(data) + + self.sanity_check(subset) + + def clear(self, subset): + """ + Given the subset id, this method will unload the subset from the class. + + """ + self.check_subset_value(subset) + self.check_subset_presence(subset) + + + del self.__dict__[subset+"_raw_wav"] + del self.__dict__[subset+"_raw_wav_len"] + del self.__dict__[subset+"_n_seq"] + del self.__dict__[subset+"_phn"] + del self.__dict__[subset+"_seq_to_phn"] + del self.__dict__[subset+"_wrd"] + del self.__dict__[subset+"_seq_to_wrd"] + del self.__dict__[subset+"_spkr"] + + # Lower the flag advertising the presence of data + data["has_"+subset] = False + + def check_subset_value(self, subset): + if subset not in {"train", "valid", "test"}: + raise ValueError("Invalid subset !") + + def check_subset_presence(self, subset): + if not self.__dict__["has_"+subset]: + raise AssertionError("The data was not loaded yet !") + + def sanity_check(self, subset): + """ + Test of a given set for the consistency of our hypotheses. + + """ + self.check_subset_value(subset) + self.check_subset_presence(subset) + + print "Check the number of speakers..." + if self.spkrinfo.shape[0] == len(self.spkrid): + print "OK." + else: + print "KO." + + print "Check lengths..." + short = ["phn", "wrd"] + long = ["phonemes", "words"] + for i in range(len(short)): + if self.__dict__[subset+"_seq_to_"+short[i]][-1,-1] == \ + self.__dict__[subset+"_"+short[i]].shape[0]: + print "OK for "+long[i]+"." + else: + print "KO for "+long[i]+"." + + print "Check multinomial constraints..." + feature_name = ["dialect", "education", "race", "sex"] + feature_interval = [(1,9), (9,15), (16,24), (24,26)] + for i in range(len(feature_name)): + start = feature_interval[i][0] + end = feature_interval[i][1] + if self.spkrinfo[:,start:end].sum() == self.spkrinfo.shape[0]: + print "OK for "+feature_name[i]+"." + else: + print "KO for "+feature_name[i]+"." + + """ + This section is about extracting sequences of varying size. + + """ + + def get_raw_seq(self, subset, seq_id, frame_length, overlap): + """ + Given the id of the subset, the id of the sequence, the frame length and + the overlap between frames, this method will return a frames sequence + from a given set, the associated phonemes and words sequences (including + a binary variable indicating change) and the information vector on the + speaker. + + """ + self.check_subset_value(subset) + self.check_subset_presence(subset) + + # Check if the id is valid + n_seq = self.__dict__[subset+"_n_seq"] + if seq_id >= n_seq: + raise ValueError("This sequence does not exist.") + + # Get the sequence + wav_seq = self.__dict__[subset+"_raw_wav"][seq_id] + + # Get the phonemes + phn_l_start = self.__dict__[subset+"_seq_to_phn"][seq_id][0] + phn_l_end = self.__dict__[subset+"_seq_to_phn"][seq_id][1] + phn_start_end = self.__dict__[subset+"_phn"][phn_l_start:phn_l_end] + phn_seq = np.zeros_like(wav_seq) + # Some timestamp does not correspond to any phoneme so 0 is + # the index for "NO_PHONEME" and the other index are shifted by one + for (phn_start, phn_end, phn) in phn_start_end: + phn_seq[phn_start:phn_end] = phn+1 + + # Get the words + wrd_l_start = self.__dict__[subset+"_seq_to_wrd"][seq_id][0] + wrd_l_end = self.__dict__[subset+"_seq_to_wrd"][seq_id][1] + wrd_start_end = self.__dict__[subset+"_wrd"][wrd_l_start:wrd_l_end] + wrd_seq = np.zeros_like(wav_seq) + # Some timestamp does not correspond to any word so 0 is + # the index for "NO_WORD" and the other index are shifted by one + for (wrd_start, wrd_end, wrd) in wrd_start_end: + wrd_seq[wrd_start:wrd_end] = wrd+1 + + # Binary variable announcing the end of the word or phoneme + end_phn = np.zeros_like(phn_seq) + end_wrd = np.zeros_like(wrd_seq) + + for i in range(len(phn_seq) - 1): + if phn_seq[i] != phn_seq[i+1]: + end_phn[i] = 1 + if wrd_seq[i] != wrd_seq[i+1]: + end_wrd[i] = 1 + + end_phn[-1] = 1 + end_wrd[-1] = 1 + + # Find the speaker id + spkr_id = self.__dict__[subset+"_spkr"][seq_id] + # Find the speaker info + spkr_info = self.spkrinfo[spkr_id] + + # Segment into frames + wav_seq = segment_axis(wav_seq, frame_length, overlap) + + # Take the most occurring phoneme in a sequence + phn_seq = segment_axis(phn_seq, frame_length, overlap) + phn_seq = scipy.stats.mode(phn_seq, axis=1)[0].flatten() + phn_seq = np.asarray(phn_seq, dtype='int') + + # Take the most occurring word in a sequence + wrd_seq = segment_axis(wrd_seq, frame_length, overlap) + wrd_seq = scipy.stats.mode(wrd_seq, axis=1)[0].flatten() + wrd_seq = np.asarray(wrd_seq, dtype='int') + + # Announce the end if and only if it was announced in the current frame + end_phn = segment_axis(end_phn, frame_length, overlap) + end_phn = end_phn.max(axis=1) + end_wrd = segment_axis(end_wrd, frame_length, overlap) + end_wrd = end_wrd.max(axis=1) + + return [wav_seq, phn_seq, end_phn, wrd_seq, end_wrd, spkr_info] + + def get_n_seq(self, subset): + """ + Given the subset id, return the number of sequence in it. + + """ + self.check_subset_value(subset) + self.check_subset_presence(subset) + + return self.__dict__[subset+"_n_seq"] + + """ + This section is about extracting sequences of fixed size. + + """ + + def init_markov_frames(self, subset, n_frames_in, frame_length, overlap): + """ + Given the subset id, the frame length, the overlap between frames and + the number of frames we take as input to predict the next, this method + initializes the get_markov_frames method + + """ + self.check_subset_value(subset) + self.check_subset_presence(subset) + + # Compute the required length to build a frame sequence of fixed size + wav_length = n_frames_in*(frame_length - overlap) + frame_length + + # Compute the number of unique frame sequence we can extract from a + # acoustic samples sequence + actual_seq_length = np.array(self.__dict__[subset+"_raw_wav_len"]) \ + - (frame_length - overlap) + 1 + + self.__dict__[subset+"_n_frames_in"] = n_frames_in + self.__dict__[subset+"_frame_length"] = frame_length + self.__dict__[subset+"_overlap"] = overlap + self.__dict__[subset+"_wav_length"] = wav_length + + self.__dict__[subset+"_intervals_seq"] = \ + np.zeros((actual_seq_length.shape[0] + 1)) + self.__dict__[subset+"_intervals_seq"][1:] = \ + np.cumsum(actual_seq_length) + + def get_markov_frames(self, subset, id): + """ + Given the subset and an id, this method returns the list [input_frames, + input_phonemes, input_words, output_phoneme, output_word, spkr_info, + output_frame, ending_phoneme, ending_word]. + + """ + assert subset+"_intervals_seq" in self.__dict__.keys() + assert id < self.__dict__[subset+"_intervals_seq"][-1] + + n_frames_in = self.__dict__[subset+"_n_frames_in"] + frame_length = self.__dict__[subset+"_frame_length"] + overlap = self.__dict__[subset+"_overlap"] + wav_length = self.__dict__[subset+"_wav_length"] + intervals_seq = self.__dict__[subset+"_intervals_seq"] + + # Find the acoustic samples sequence we are looking for + seq_id = np.digitize([id], intervals_seq) - 1 + seq_id = seq_id[0] + + # Find the position in this sequence + idx_in_seq = id - intervals_seq[seq_id] - (wav_length - frame_length \ + + overlap) + + + # Get the sequence + wav_seq = self.__dict__[subset+"_raw_wav"][seq_id] + + # Get the phonemes + phn_l_start = self.__dict__[subset+"_seq_to_phn"][seq_id][0] + phn_l_end = self.__dict__[subset+"_seq_to_phn"][seq_id][1] + phn_start_end = self.__dict__[subset+"_phn"][phn_l_start:phn_l_end] + phn_seq = np.zeros_like(wav_seq) + # Some timestamp does not correspond to any phoneme so 0 is + # the index for "NO_PHONEME" and the other index are shifted by one + for (phn_start, phn_end, phn) in phn_start_end: + phn_seq[phn_start:phn_end] = phn+1 + + # Get the words + wrd_l_start = self.__dict__[subset+"_seq_to_wrd"][seq_id][0] + wrd_l_end = self.__dict__[subset+"_seq_to_wrd"][seq_id][1] + wrd_start_end = self.__dict__[subset+"_wrd"][wrd_l_start:wrd_l_end] + wrd_seq = np.zeros_like(wav_seq) + # Some timestamp does not correspond to any word so 0 is + # the index for "NO_WORD" and the other index are shifted by one + for (wrd_start, wrd_end, wrd) in wrd_start_end: + wrd_seq[wrd_start:wrd_end] = wrd+1 + + # Binary variable announcing the end of the word or phoneme + end_phn = np.zeros_like(phn_seq) + end_wrd = np.zeros_like(wrd_seq) + + for i in range(len(phn_seq) - 1): + if phn_seq[i] != phn_seq[i+1]: + end_phn[i] = 1 + if wrd_seq[i] != wrd_seq[i+1]: + end_wrd[i] = 1 + + end_phn[-1] = 1 + end_wrd[-1] = 1 + + # Find the speaker id + spkr_id = self.__dict__[subset+"_spkr"][seq_id] + # Find the speaker info + spkr_info = self.spkrinfo[spkr_id] + + # Pick the selected segment + padded_wav_seq = np.zeros((wav_length)) + if idx_in_seq < 0: + padded_wav_seq[-idx_in_seq:] = wav_seq[0:(wav_length+idx_in_seq)] + else: + padded_wav_seq = wav_seq[idx_in_seq:(idx_in_seq + wav_length)] + + padded_phn_seq = np.zeros((wav_length)) + if idx_in_seq < 0: + padded_phn_seq[-idx_in_seq:] = phn_seq[0:(wav_length+idx_in_seq)] + else: + padded_phn_seq = phn_seq[idx_in_seq:(idx_in_seq + wav_length)] + + padded_wrd_seq = np.zeros((wav_length)) + if idx_in_seq < 0: + padded_wrd_seq[-idx_in_seq:] = wrd_seq[0:(wav_length+idx_in_seq)] + else: + padded_wrd_seq = wrd_seq[idx_in_seq:(idx_in_seq + wav_length)] + + # Segment into frames + wav_seq = segment_axis(padded_wav_seq, frame_length, overlap) + + # Take the most occurring phoneme in a sequence + phn_seq = segment_axis(padded_phn_seq, frame_length, overlap) + phn_seq = scipy.stats.mode(phn_seq, axis=1)[0].flatten() + phn_seq = np.asarray(phn_seq, dtype='int') + + # Take the most occurring word in a sequence + wrd_seq = segment_axis(padded_wrd_seq, frame_length, overlap) + wrd_seq = scipy.stats.mode(wrd_seq, axis=1)[0].flatten() + wrd_seq = np.asarray(wrd_seq, dtype='int') + + # Announce the end if and only if it was announced in the current frame + end_phn = segment_axis(end_phn, frame_length, overlap) + end_phn = end_phn.max(axis=1) + end_wrd = segment_axis(end_wrd, frame_length, overlap) + end_wrd = end_wrd.max(axis=1) + + # Put names on the output + input_frames = wav_seq[:-1] + input_phonemes = phn_seq[:-1] + input_words = wrd_seq[:-1] + output_phoneme = phn_seq[-1] + output_word = wrd_seq[-1] + output_frame = wav_seq[-1] + ending_phoneme = end_phn[-1] + ending_word = end_wrd[-1] + + return [input_frames, input_phonemes, input_words, output_phoneme, \ + output_word, spkr_info, output_frame, ending_phoneme, \ + ending_word] + + def get_n_markov_frames(self, subset): + """ + Given the subset id, return the number of frame segments of fixed size + in it. + + """ + self.check_subset_value(subset) + self.check_subset_presence(subset) + assert subset+"_intervals_seq" in self.__dict__.keys() + + return self.__dict__[subset+"_intervals_seq"][-1] + + +if __name__ == "__main__": + timit = TIMIT() + + import pdb; pdb.set_trace()