Skip to content

Commit ca81a78

Browse files
committed
Changed type of input data matrix from double pointer to single pointer for better compatibility.
1 parent 531a28f commit ca81a78

File tree

4 files changed

+42
-51
lines changed

4 files changed

+42
-51
lines changed

gmm.c

+24-23
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ GMM* gmm_new(int M, int D, const char *cov_type)
6161
gmm->covars[k] = malloc(1*sizeof(double));
6262
}
6363

64+
6465
return gmm;
6566
}
6667

@@ -92,7 +93,7 @@ void gmm_set_initialization_method(GMM *gmm, const char *method)
9293
}
9394
}
9495

95-
void gmm_fit(GMM *gmm, const double * const *X, int N)
96+
void gmm_fit(GMM *gmm, const double *X, int N)
9697
{
9798
// Initialize GMM parameters
9899
_gmm_init_params(gmm, X, N);
@@ -133,7 +134,7 @@ void gmm_fit(GMM *gmm, const double * const *X, int N)
133134
free(gmm->P_k_giv_xt);
134135
}
135136

136-
double gmm_score(GMM *gmm, const double * const *X, int N)
137+
double gmm_score(GMM *gmm, const double *X, int N)
137138
{
138139
// Allocate memory for storing membership probabilities P(k | x_t)
139140
gmm->P_k_giv_xt = malloc(gmm->M*sizeof(double *));
@@ -152,7 +153,7 @@ double gmm_score(GMM *gmm, const double * const *X, int N)
152153
}
153154

154155
// TODO: Other initialization methods
155-
void _gmm_init_params(GMM *gmm, const double * const *X, int N)
156+
void _gmm_init_params(GMM *gmm, const double *X, int N)
156157
{
157158
if (gmm->init_method == RANDOM)
158159
{
@@ -172,14 +173,14 @@ void _gmm_init_params(GMM *gmm, const double * const *X, int N)
172173
}
173174

174175
// TODO: Unique sampling of data points for initializing component means
175-
void _gmm_init_params_random(GMM *gmm, const double * const *X, int N)
176+
void _gmm_init_params_random(GMM *gmm, const double *X, int N)
176177
{
177178
// Initialize means to randomly chosen samples
178179
srand(time(NULL));
179180
for (int k=0; k<gmm->M; k++)
180181
{
181182
int r = rand()%N;
182-
memcpy(gmm->means[k], X[r], gmm->D*sizeof(double));
183+
memcpy(gmm->means[k], &X[gmm->D*r], gmm->D*sizeof(double));
183184
}
184185

185186
// Initialize component weights to same value
@@ -189,7 +190,7 @@ void _gmm_init_params_random(GMM *gmm, const double * const *X, int N)
189190
// Initialize component variances to data variance
190191
double *mean = calloc(gmm->D, sizeof(double));
191192
for (int t=0; t<N; t++)
192-
_gmm_vec_add(mean, X[t], 1, 1, gmm->D);
193+
_gmm_vec_add(mean, &X[gmm->D*t], 1, 1, gmm->D);
193194
_gmm_vec_divide_by_scalar(mean, N, gmm->D);
194195
if (gmm->cov_type == DIAGONAL)
195196
{
@@ -198,7 +199,7 @@ void _gmm_init_params_random(GMM *gmm, const double * const *X, int N)
198199
{
199200
vars[i] = 0;
200201
for (int t=0; t<N; t++)
201-
vars[i] += _gmm_pow2(X[t][i] - mean[i]);
202+
vars[i] += _gmm_pow2(X[gmm->D*t+i] - mean[i]);
202203
vars[i] = vars[i]/N;
203204
}
204205
for (int k=0; k<gmm->M; k++)
@@ -209,7 +210,7 @@ void _gmm_init_params_random(GMM *gmm, const double * const *X, int N)
209210
{
210211
double var = 0;
211212
for (int t=0; t<N; t++)
212-
var += _gmm_pow2(_gmm_vec_l2_dist(X[t], mean, gmm->D));
213+
var += _gmm_pow2(_gmm_vec_l2_dist(&X[gmm->D*t], mean, gmm->D));
213214
var = var/(N*gmm->D);
214215
for (int k=0; k<gmm->M; k++)
215216
gmm->covars[k][0] = var;
@@ -222,7 +223,7 @@ void _gmm_init_params_random(GMM *gmm, const double * const *X, int N)
222223
// TODO: Handle empty clusters in K-means
223224
// TODO: Unique sampling of data points for initializing component means
224225
// TODO: Make K-means more efficient
225-
void _gmm_init_params_kmeans(GMM *gmm, const double * const *X, int N)
226+
void _gmm_init_params_kmeans(GMM *gmm, const double *X, int N)
226227
{
227228
const int num_iter = 10;
228229

@@ -231,7 +232,7 @@ void _gmm_init_params_kmeans(GMM *gmm, const double * const *X, int N)
231232
for (int k=0; k<gmm->M; k++)
232233
{
233234
int r = rand()%N;
234-
memcpy(gmm->means[k], X[r], gmm->D*sizeof(double));
235+
memcpy(gmm->means[k], &X[gmm->D*r], gmm->D*sizeof(double));
235236
}
236237

237238
// K-means iterative algorithm
@@ -243,11 +244,11 @@ void _gmm_init_params_kmeans(GMM *gmm, const double * const *X, int N)
243244
// Find assiciation of each data point
244245
for (int t = 0; t < N; t++)
245246
{
246-
double min_dist = _gmm_vec_l2_dist(X[t], gmm->means[0], gmm->D);
247+
double min_dist = _gmm_vec_l2_dist(&X[gmm->D*t], gmm->means[0], gmm->D);
247248
associations[t] = 0;
248249
for (int k=1; k<gmm->M; k++)
249250
{
250-
double dist = _gmm_vec_l2_dist(X[t], gmm->means[k], gmm->D);
251+
double dist = _gmm_vec_l2_dist(&X[gmm->D*t], gmm->means[k], gmm->D);
251252
if (dist < min_dist)
252253
{
253254
min_dist = dist;
@@ -266,7 +267,7 @@ void _gmm_init_params_kmeans(GMM *gmm, const double * const *X, int N)
266267
if (associations[t] == k)
267268
{
268269
nk++;
269-
_gmm_vec_add(gmm->means[k], X[t], 1, 1, gmm->D);
270+
_gmm_vec_add(gmm->means[k], &X[gmm->D*t], 1, 1, gmm->D);
270271
}
271272
}
272273
_gmm_vec_divide_by_scalar(gmm->means[k], nk, gmm->D);
@@ -293,11 +294,11 @@ void _gmm_init_params_kmeans(GMM *gmm, const double * const *X, int N)
293294
{
294295
nk++;
295296
if (gmm->cov_type == SPHERICAL)
296-
gmm->covars[k][0] += _gmm_pow2(_gmm_vec_l2_dist(X[t], gmm->means[k], gmm->D));
297+
gmm->covars[k][0] += _gmm_pow2(_gmm_vec_l2_dist(&X[gmm->D*t], gmm->means[k], gmm->D));
297298
else if (gmm->cov_type == DIAGONAL)
298299
{
299300
for (int i=0; i<gmm->D; i++)
300-
gmm->covars[k][i] += _gmm_pow2(X[t][i] - gmm->means[k][i]);
301+
gmm->covars[k][i] += _gmm_pow2(X[gmm->D*t+i] - gmm->means[k][i]);
301302
}
302303
}
303304
}
@@ -322,7 +323,7 @@ void _gmm_init_params_kmeans(GMM *gmm, const double * const *X, int N)
322323
free(associations);
323324
}
324325

325-
double _gmm_em_step(GMM *gmm, const double * const *X, int N)
326+
double _gmm_em_step(GMM *gmm, const double *X, int N)
326327
{
327328
double llh;
328329

@@ -339,7 +340,7 @@ double _gmm_em_step(GMM *gmm, const double * const *X, int N)
339340
return llh;
340341
}
341342

342-
double _gmm_compute_membership_prob(GMM *gmm, const double * const *X, int N)
343+
double _gmm_compute_membership_prob(GMM *gmm, const double *X, int N)
343344
{
344345
double llh = 0;
345346

@@ -350,7 +351,7 @@ double _gmm_compute_membership_prob(GMM *gmm, const double * const *X, int N)
350351
double max = -1;
351352
for (int k = 0; k < gmm->M; k++)
352353
{
353-
gmm->P_k_giv_xt[k][t] = log(gmm->weights[k]) + _gmm_log_gaussian_pdf(X[t], gmm->means[k], gmm->covars[k], gmm->D, gmm->cov_type);
354+
gmm->P_k_giv_xt[k][t] = log(gmm->weights[k]) + _gmm_log_gaussian_pdf(&X[gmm->D*t], gmm->means[k], gmm->covars[k], gmm->D, gmm->cov_type);
354355
if (gmm->P_k_giv_xt[k][t] > max)
355356
max = gmm->P_k_giv_xt[k][t];
356357
}
@@ -371,7 +372,7 @@ double _gmm_compute_membership_prob(GMM *gmm, const double * const *X, int N)
371372
return llh;
372373
}
373374

374-
void _gmm_update_params(GMM *gmm, const double * const *X, int N)
375+
void _gmm_update_params(GMM *gmm, const double *X, int N)
375376
{
376377
if (gmm->cov_type == SPHERICAL)
377378
{
@@ -384,8 +385,8 @@ void _gmm_update_params(GMM *gmm, const double * const *X, int N)
384385
for (int t=0; t<N; t++)
385386
{
386387
sum_P_k += gmm->P_k_giv_xt[k][t];
387-
sum_xxP_k += _gmm_vec_dot_prod(X[t], X[t], gmm->D) * gmm->P_k_giv_xt[k][t];
388-
_gmm_vec_add(gmm->means[k], X[t], 1, gmm->P_k_giv_xt[k][t], gmm->D);
388+
sum_xxP_k += _gmm_vec_dot_prod(&X[gmm->D*t], &X[gmm->D*t], gmm->D) * gmm->P_k_giv_xt[k][t];
389+
_gmm_vec_add(gmm->means[k], &X[gmm->D*t], 1, gmm->P_k_giv_xt[k][t], gmm->D);
389390
}
390391
_gmm_vec_divide_by_scalar(gmm->means[k], sum_P_k, gmm->D);
391392
gmm->weights[k] = sum_P_k/N;
@@ -404,7 +405,7 @@ void _gmm_update_params(GMM *gmm, const double * const *X, int N)
404405
for (int t=0; t<N; t++)
405406
{
406407
sum_P_k += gmm->P_k_giv_xt[k][t];
407-
_gmm_vec_add(gmm->means[k], X[t], 1, gmm->P_k_giv_xt[k][t], gmm->D);
408+
_gmm_vec_add(gmm->means[k], &X[gmm->D*t], 1, gmm->P_k_giv_xt[k][t], gmm->D);
408409
}
409410
gmm->weights[k] = sum_P_k/N;
410411
_gmm_vec_divide_by_scalar(gmm->means[k], sum_P_k, gmm->D);
@@ -413,7 +414,7 @@ void _gmm_update_params(GMM *gmm, const double * const *X, int N)
413414
for (int t=0; t<N; t++)
414415
{
415416
for (int i=0; i<gmm->D; i++)
416-
gmm->covars[k][i] += gmm->P_k_giv_xt[k][t]*_gmm_pow2(X[t][i] - gmm->means[k][i]);
417+
gmm->covars[k][i] += gmm->P_k_giv_xt[k][t]*_gmm_pow2(X[gmm->D*t+i] - gmm->means[k][i]);
417418
}
418419
_gmm_vec_divide_by_scalar(gmm->covars[k], sum_P_k, gmm->D);
419420
for (int i=0; i<gmm->D; i++)

gmm.h

+9-9
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ typedef struct _GMM
4141
/* --------------------------- GMM Parameters */
4242
double *weights; // Component weights
4343
double **means; // Component means
44-
double **covars; // Component variances
44+
double **covars; // Component covariances
4545
/* ---------------------- Auxiliary variables */
4646
double **P_k_giv_xt; // Membership probability matrix
4747
} GMM;
@@ -74,12 +74,12 @@ void gmm_set_initialization_method(GMM *gmm, const char *method);
7474
/*
7575
* Function to fit a GMM on a given set of data points
7676
*/
77-
void gmm_fit(GMM *gmm, const double * const *X, int N);
77+
void gmm_fit(GMM *gmm, const double *X, int N);
7878

7979
/*
8080
* Function to score a set of data points using the GMM
8181
*/
82-
double gmm_score(GMM *gmm, const double * const *X, int N);
82+
double gmm_score(GMM *gmm, const double *X, int N);
8383

8484
/*
8585
* Function to print the GMM parameters
@@ -94,12 +94,12 @@ void gmm_free(GMM *gmm);
9494
/*
9595
* Internal functions (do not call them!)
9696
*/
97-
void _gmm_init_params(GMM *gmm, const double * const *X, int N);
98-
void _gmm_init_params_random(GMM *gmm, const double * const *X, int N);
99-
void _gmm_init_params_kmeans(GMM *gmm, const double * const *X, int N);
100-
double _gmm_em_step(GMM *gmm, const double * const *X, int N);
101-
double _gmm_compute_membership_prob(GMM *gmm, const double * const *X, int N);
102-
void _gmm_update_params(GMM *gmm, const double * const *X, int N);
97+
void _gmm_init_params(GMM *gmm, const double *X, int N);
98+
void _gmm_init_params_random(GMM *gmm, const double *X, int N);
99+
void _gmm_init_params_kmeans(GMM *gmm, const double *X, int N);
100+
double _gmm_em_step(GMM *gmm, const double *X, int N);
101+
double _gmm_compute_membership_prob(GMM *gmm, const double *X, int N);
102+
void _gmm_update_params(GMM *gmm, const double *X, int N);
103103
double _gmm_log_gaussian_pdf(const double *x, const double *mean, const double *covar, int D, CovType cov_type);
104104
double _gmm_vec_l2_dist(const double *x, const double *y, int D);
105105
void _gmm_vec_add(double *x, const double *y, double a, double b, int D);

python/gmmmodule.c

+6-12
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ GMM_fit(GMMObject *self, PyObject *args, PyObject *keywds)
7777
return NULL;
7878
}
7979

80-
// Get data matrix from numpy array
80+
// Validate data matrix
8181
PyArrayObject *X_array = (PyArrayObject *) PyArray_ContiguousFromObject(X_obj, PyArray_DOUBLE, 2, 2);
8282
if (X_array == NULL)
8383
{
@@ -91,9 +91,7 @@ GMM_fit(GMMObject *self, PyObject *args, PyObject *keywds)
9191
}
9292
int N = (int) PyArray_DIM(X_array, 0);
9393
int D = (int) PyArray_DIM(X_array, 1);
94-
double **X = malloc(N*sizeof(double *));
95-
for (int t=0; t<N; t++)
96-
X[t] = (double *) X_array->data + D*t;
94+
double *X = (double *) X_array->data;
9795

9896
// Train the GMM
9997
GMM *gmm = gmm_new(self->k, D, PyString_AsString(self->cov_type));
@@ -127,8 +125,7 @@ GMM_fit(GMMObject *self, PyObject *args, PyObject *keywds)
127125
// Free the GMM object
128126
gmm_free(gmm);
129127

130-
// Free the data matrix and pyobjects
131-
free(X);
128+
// Free the pyobjects
132129
Py_DECREF(X_array);
133130

134131
return Py_BuildValue("");
@@ -146,7 +143,7 @@ GMM_score(GMMObject *self, PyObject *args, PyObject *keywds)
146143
return NULL;
147144
}
148145

149-
// Get data matrix from numpy array
146+
// Validate data matrix
150147
PyArrayObject *X_array = (PyArrayObject *) PyArray_ContiguousFromObject(X_obj, PyArray_DOUBLE, 2, 2);
151148
if (X_array == NULL)
152149
{
@@ -165,9 +162,7 @@ GMM_score(GMMObject *self, PyObject *args, PyObject *keywds)
165162
printf("Invalid dimensions of data matrix X.\n");
166163
return NULL;
167164
}
168-
double **X = malloc(N*sizeof(double *));
169-
for (int t=0; t<N; t++)
170-
X[t] = (double *) X_array->data + D*t;
165+
double *X = (double *) X_array->data;
171166

172167
// Initialize GMM from parameters
173168
GMM *gmm = malloc(sizeof(GMM));
@@ -206,8 +201,7 @@ GMM_score(GMMObject *self, PyObject *args, PyObject *keywds)
206201
free(means);
207202
free(covars);
208203

209-
// Free the data matrix and pyobjects
210-
free(X);
204+
// Free the pyobjects
211205
Py_DECREF(X_array);
212206

213207
return Py_BuildValue("d", llh);

test.c

+3-7
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,18 @@ int main()
3434
D++;
3535
token = strtok(NULL, " \n");
3636
}
37-
double **X = (double **) malloc(N*sizeof(double *));
38-
for (int t=0; t<N; t++)
39-
X[t] = (double *) malloc(D*sizeof(double));
37+
double *X = malloc(N*D*sizeof(double));
4038
rewind(fp);
4139
line = NULL; len = 0;
4240
for (int t=0; t<N; t++)
4341
{
4442
getline(&line, &len, fp);
4543
token = strtok(line, " \n");
46-
X[t][0] = atof(token);
44+
X[D*t+0] = atof(token);
4745
for (int i=1; i<D; i++)
4846
{
4947
token = strtok(NULL, " \n");
50-
X[t][i] = atof(token);
48+
X[D*t+i] = atof(token);
5149
}
5250
}
5351
fclose(fp);
@@ -68,8 +66,6 @@ int main()
6866
gmm_free(gmm);
6967

7068
// Free data
71-
for (int t=0; t<N; t++)
72-
free(X[t]);
7369
free(X);
7470

7571
return 0;

0 commit comments

Comments
 (0)