Skip to content

Commit f1bb472

Browse files
committed
Load existing hmm_dict
1 parent 796e998 commit f1bb472

File tree

1 file changed

+51
-47
lines changed

1 file changed

+51
-47
lines changed

preprocess.py

Lines changed: 51 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -53,68 +53,72 @@ def get_phoneme_dict(filepath):
5353

5454

5555
def get_hmm_dict(filepath):
56-
hmm_dict = {}
57-
hmm_txt = open(filepath, 'r')
58-
num_state = 3 # Each phoneme has 3 states except 'sp' (idx==20)
59-
for idx in range(21): # 21: Number of unique phonemes
60-
pronun_word = hmm_txt.readline().split('"')[1]
61-
hmm_dict[pronun_word] = {}
56+
if os.path.exists("./data/hmm_data.npy"):
57+
hmm_data = np.load("./data/hmm_data.npy").item()
58+
hmm_dict = hmm_data["hmm_dict"]
59+
else:
60+
hmm_dict = {}
61+
hmm_txt = open(filepath, 'r')
62+
num_state = 3 # Each phoneme has 3 states except 'sp' (idx==20)
63+
for idx in range(21): # 21: Number of unique phonemes
64+
pronun_word = hmm_txt.readline().split('"')[1]
65+
hmm_dict[pronun_word] = {}
6266

63-
hmm_txt.readline()
67+
hmm_txt.readline()
6468

65-
Numstates = hmm_txt.readline().split()
66-
hmm_dict[pronun_word][Numstates[0]] = Numstates[1]
69+
Numstates = hmm_txt.readline().split()
70+
hmm_dict[pronun_word][Numstates[0]] = Numstates[1]
6771

68-
if idx==20:
69-
num_state = 1
72+
if idx==20:
73+
num_state = 1
7074

71-
for st_idx in range(num_state):
72-
#state number
73-
state = hmm_txt.readline().split()[1]
74-
hmm_dict[pronun_word][state] = {}
75+
for st_idx in range(num_state):
76+
#state number
77+
state = hmm_txt.readline().split()[1]
78+
hmm_dict[pronun_word][state] = {}
7579

7680

77-
Num_Mixes = hmm_txt.readline().split()
78-
hmm_dict[pronun_word][state][Num_Mixes[0]] = Num_Mixes[1]
79-
hmm_dict[pronun_word][state]['<MIXTURES>'] = {}
81+
Num_Mixes = hmm_txt.readline().split()
82+
hmm_dict[pronun_word][state][Num_Mixes[0]] = Num_Mixes[1]
83+
hmm_dict[pronun_word][state]['<MIXTURES>'] = {}
8084

81-
for mix_idx in range(1,11):
85+
for mix_idx in range(1,11):
8286

83-
mixture = hmm_txt.readline().split()
87+
mixture = hmm_txt.readline().split()
8488

85-
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]] = {}
86-
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]][mixture[0]] = mixture[2]
89+
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]] = {}
90+
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]][mixture[0]] = mixture[2]
8791

88-
#Mean
89-
mean_dim = hmm_txt.readline().split()
90-
mean_num = hmm_txt.readline().split()
91-
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]][mean_dim[0]] = mean_num
92+
#Mean
93+
mean_dim = hmm_txt.readline().split()
94+
mean_num = hmm_txt.readline().split()
95+
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]][mean_dim[0]] = mean_num
9296

93-
#Variance
94-
variance_dim = hmm_txt.readline().split() #input dimension = 39
95-
variance_num = hmm_txt.readline().split()
96-
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]][variance_dim[0]] = variance_num
97+
#Variance
98+
variance_dim = hmm_txt.readline().split() #input dimension = 39
99+
variance_num = hmm_txt.readline().split()
100+
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]][variance_dim[0]] = variance_num
97101

98-
#GConst
99-
g_const = hmm_txt.readline().split()
100-
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]][g_const[0]] = g_const[1]
102+
#GConst
103+
g_const = hmm_txt.readline().split()
104+
hmm_dict[pronun_word][state]['<MIXTURES>'][mixture[1]][g_const[0]] = g_const[1]
101105

102-
#Transposition Probability
103-
hmm_txt.readline()
104-
trans_prob=[]
106+
#Transposition Probability
107+
hmm_txt.readline()
108+
trans_prob=[]
105109

106-
# a matrix
107-
if idx != 20:
108-
for trans_idx in range(1,6):
109-
trans_prob.append(hmm_txt.readline().split())
110-
else:
111-
for trans_idx in range(1,4):
112-
trans_prob.append(hmm_txt.readline().split())
110+
# a matrix
111+
if idx != 20:
112+
for trans_idx in range(1,6):
113+
trans_prob.append(hmm_txt.readline().split())
114+
else:
115+
for trans_idx in range(1,4):
116+
trans_prob.append(hmm_txt.readline().split())
113117

114-
hmm_dict[pronun_word]['<TRANSP>'] = trans_prob
115-
#ENDHMM
116-
hmm_txt.readline()
117-
hmm_txt.close()
118+
hmm_dict[pronun_word]['<TRANSP>'] = trans_prob
119+
#ENDHMM
120+
hmm_txt.readline()
121+
hmm_txt.close()
118122

119123
return hmm_dict
120124

0 commit comments

Comments
 (0)