Skip to content

Commit 360c5e9

Browse files
committed
Merge remote-tracking branch 'origin/master'
2 parents 2cd1864 + 5be5852 commit 360c5e9

6 files changed

+141
-50
lines changed

.ipynb_checkpoints/Multivariate Analysis - Supervised Analysis with PLS-DA-checkpoint.ipynb

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@
223223
"outputs": [],
224224
"source": [
225225
"# Plot the spectra in the dataset\n",
226-
"_scatterplots(np.log(np.mean(x_data, axis=0) + 1), xaxis=retention_times, yaxis=mz_values)"
226+
"_scatterplots(np.log(np.mean(x_data, axis=0) + 1), xaxis=retention_times, yaxis=mz_values, marker_size=3)"
227227
]
228228
},
229229
{
@@ -420,16 +420,16 @@
420420
"source": [
421421
"# The remaining plots with the new features I added into the pyChemometrics - flsoares232\n",
422422
"# Plot the weights\n",
423-
"pls_da.plot_model_parameters(parameter='w', component=1, instrument = 'lcms', xaxis=retention_times, yaxis=mz_values)\n",
423+
"pls_da.plot_model_parameters(parameter='w', component=1, instrument = 'lcms', xaxis=retention_times, yaxis=mz_values, marker_size=5)\n",
424424
"\n",
425425
"# Plot the loadings\n",
426-
"pls_da.plot_model_parameters(parameter='p', component=1, instrument = 'lcms', xaxis=retention_times, yaxis=mz_values)\n",
426+
"pls_da.plot_model_parameters(parameter='p', component=1, instrument = 'lcms', xaxis=retention_times, yaxis=mz_values, marker_size=5)\n",
427427
"\n",
428428
"# Plot beta\n",
429-
"pls_da.plot_model_parameters(parameter='beta', instrument = 'lcms', xaxis=retention_times, yaxis=mz_values)\n",
429+
"pls_da.plot_model_parameters(parameter='beta', instrument = 'lcms', xaxis=retention_times, yaxis=mz_values, marker_size=5)\n",
430430
"\n",
431431
"# Plot VIP\n",
432-
"pls_da.plot_model_parameters(parameter='VIP', instrument = 'lcms', xaxis=retention_times, yaxis=mz_values)"
432+
"pls_da.plot_model_parameters(parameter='VIP', instrument = 'lcms', xaxis=retention_times, yaxis=mz_values, marker_size=5)"
433433
]
434434
},
435435
{
@@ -488,6 +488,15 @@
488488
"We apply a similar criterion as the one used with PCA: choosing as the number of components after which the $Q^{2}Y$ value reaches a plateau (less than 5% increase compared to previous number of components). "
489489
]
490490
},
491+
{
492+
"cell_type": "code",
493+
"execution_count": null,
494+
"metadata": {},
495+
"outputs": [],
496+
"source": [
497+
"pls_da.scree_cv(x_train_log, y_train, total_comps=10)"
498+
]
499+
},
491500
{
492501
"cell_type": "code",
493502
"execution_count": null,
@@ -499,7 +508,16 @@
499508
},
500509
"outputs": [],
501510
"source": [
502-
"pls_da.scree_plot(x_train_log, y_train, total_comps=10)"
511+
"pls_da.scree_plot(metric = ['Q2Y','R2Y','Accuracy']) \n",
512+
"# Possible metrics (1 to 3 options):\n",
513+
"# 'Q2Y'\n",
514+
"# 'R2Y'\n",
515+
"# 'AUC'\n",
516+
"# 'MCC'\n",
517+
"# 'Recall'\n",
518+
"# 'Precision'\n",
519+
"# 'F1'\n",
520+
"# 'Accuracy'"
503521
]
504522
},
505523
{

Multivariate Analysis - Supervised Analysis with PLS-DA.ipynb

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@
223223
"outputs": [],
224224
"source": [
225225
"# Plot the spectra in the dataset\n",
226-
"_scatterplots(np.log(np.mean(x_data, axis=0) + 1), xaxis=retention_times, yaxis=mz_values)"
226+
"_scatterplots(np.log(np.mean(x_data, axis=0) + 1), xaxis=retention_times, yaxis=mz_values, marker_size=3)"
227227
]
228228
},
229229
{
@@ -420,16 +420,16 @@
420420
"source": [
421421
"# The remaining plots with the new features I added into the pyChemometrics - flsoares232\n",
422422
"# Plot the weights\n",
423-
"pls_da.plot_model_parameters(parameter='w', component=1, instrument = 'lcms', xaxis=retention_times, yaxis=mz_values)\n",
423+
"pls_da.plot_model_parameters(parameter='w', component=1, instrument = 'lcms', xaxis=retention_times, yaxis=mz_values, marker_size=5)\n",
424424
"\n",
425425
"# Plot the loadings\n",
426-
"pls_da.plot_model_parameters(parameter='p', component=1, instrument = 'lcms', xaxis=retention_times, yaxis=mz_values)\n",
426+
"pls_da.plot_model_parameters(parameter='p', component=1, instrument = 'lcms', xaxis=retention_times, yaxis=mz_values, marker_size=5)\n",
427427
"\n",
428428
"# Plot beta\n",
429-
"pls_da.plot_model_parameters(parameter='beta', instrument = 'lcms', xaxis=retention_times, yaxis=mz_values)\n",
429+
"pls_da.plot_model_parameters(parameter='beta', instrument = 'lcms', xaxis=retention_times, yaxis=mz_values, marker_size=5)\n",
430430
"\n",
431431
"# Plot VIP\n",
432-
"pls_da.plot_model_parameters(parameter='VIP', instrument = 'lcms', xaxis=retention_times, yaxis=mz_values)"
432+
"pls_da.plot_model_parameters(parameter='VIP', instrument = 'lcms', xaxis=retention_times, yaxis=mz_values, marker_size=5)"
433433
]
434434
},
435435
{
@@ -488,6 +488,15 @@
488488
"We apply a similar criterion as the one used with PCA: choosing as the number of components after which the $Q^{2}Y$ value reaches a plateau (less than 5% increase compared to previous number of components). "
489489
]
490490
},
491+
{
492+
"cell_type": "code",
493+
"execution_count": null,
494+
"metadata": {},
495+
"outputs": [],
496+
"source": [
497+
"pls_da.scree_cv(x_train_log, y_train, total_comps=10)"
498+
]
499+
},
491500
{
492501
"cell_type": "code",
493502
"execution_count": null,
@@ -499,7 +508,16 @@
499508
},
500509
"outputs": [],
501510
"source": [
502-
"pls_da.scree_plot(x_train_log, y_train, total_comps=10)"
511+
"pls_da.scree_plot(metric = ['Q2Y','R2Y','Accuracy']) \n",
512+
"# Possible metrics (1 to 3 options):\n",
513+
"# 'Q2Y'\n",
514+
"# 'R2Y'\n",
515+
"# 'AUC'\n",
516+
"# 'MCC'\n",
517+
"# 'Recall'\n",
518+
"# 'Precision'\n",
519+
"# 'F1'\n",
520+
"# 'Accuracy'"
503521
]
504522
},
505523
{
@@ -1118,8 +1136,8 @@
11181136
],
11191137
"metadata": {
11201138
"kernelspec": {
1121-
"display_name": "Python 3 (ipykernel)",
1122-
"language": "python",
1139+
"display_name": "Python 3 (Spyder)",
1140+
"language": "python3",
11231141
"name": "python3"
11241142
},
11251143
"language_info": {

pyChemometrics/ChemometricsOrthogonalPLS.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ def plot_scores(self, orthogonal_component=1, color=None, discrete=False, label_
670670
return None
671671

672672
####### flsoares232 version - Updated 20-10-2023
673-
def plot_model_parameters(self, parameter='w_pred', orthogonal_component=1, cross_val=False, sigma=2, bar=False, xaxis=None, yaxis=None, instrument=None):
673+
def plot_model_parameters(self, parameter='w_pred', orthogonal_component=1, cross_val=False, sigma=2, bar=False, xaxis=None, yaxis=None, instrument=None, marker_size=3):
674674

675675
"""
676676
Plot different model parameters related with the variables
@@ -685,23 +685,23 @@ def plot_model_parameters(self, parameter='w_pred', orthogonal_component=1, cros
685685
:return:
686686
"""
687687

688-
choices = {'w_pred': self.w_pred, 'p_pred': self.p_pred, 'w_ortho': self.w_ortho, 'p_ortho': self.p_ortho}
688+
choices = {'w_pred': self.w_pred, 'p_pred': self.p_pred, 'w_ortho': self.w_ortho, 'p_ortho': self.p_ortho, 'beta': self.beta_coeffs, 'VIP': self.VIP()}
689689
choices_cv = {'wpred': 'Wpred_w_pred', }
690690

691691
# decrement component to adjust for python indexing
692692
orthogonal_component -= 1
693693

694694
# Beta and VIP don't depend on components so have an exception status here
695695
if cross_val is True:
696-
if parameter in ['w_pred', 'p_pred']:
696+
if parameter in ['w_pred', 'p_pred', 'beta', 'VIP']:
697697
mean = self.cvParameters['Mean_' + choices_cv[parameter]].squeeze()
698698
error = sigma * self.cvParameters['Stdev_' + choices_cv[parameter]].squeeze()
699699
else:
700700
mean = self.cvParameters['Mean_' + choices_cv[parameter]][:, orthogonal_component]
701701
error = sigma * self.cvParameters['Stdev_' + choices_cv[parameter]][:, orthogonal_component]
702702
else:
703703
error = None
704-
if parameter in ['w_pred', 'p_pred']:
704+
if parameter in ['w_pred', 'p_pred', 'beta', 'VIP']:
705705
mean = choices[parameter].squeeze()
706706
else:
707707
mean = choices[parameter][:, orthogonal_component]
@@ -725,7 +725,7 @@ def plot_model_parameters(self, parameter='w_pred', orthogonal_component=1, cros
725725
plt.xlabel("Variable No")
726726
elif instrument == 'lcms':
727727
if xaxis is not None and yaxis is not None:
728-
_scatterplots(mean, xaxis=xaxis, yaxis=yaxis)
728+
_scatterplots(mean, xaxis=xaxis, yaxis=yaxis, marker_size=marker_size)
729729
else:
730730
if bar is False:
731731
_lineplots(mean, error=error, xaxis=xaxis)

pyChemometrics/ChemometricsPLS.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,46 +1160,65 @@ def plot_scores(self, comps=[0, 1], color=None, discrete=False, label_outliers=F
11601160
ax.set_ylabel("T[{0}]".format((comps[1] + 1)))
11611161
plt.show()
11621162
return None
1163-
1164-
def scree_plot(self, x, y, total_comps=5):
1163+
1164+
def scree_cv(self, x, y, total_comps=5):
11651165
"""
11661166
1167-
:param x:
1167+
:param x: Data to use in the scree plot
11681168
:param y:
11691169
:param total_comps:
11701170
:return:
11711171
"""
1172-
fig, ax = plt.subplots()
1173-
11741172
models = list()
11751173
for n_components in range(1, total_comps + 1):
11761174
currmodel = deepcopy(self)
11771175
currmodel.n_components = n_components
11781176
currmodel.fit(x, y)
11791177
currmodel.cross_validation(x, y)
11801178
models.append(currmodel)
1181-
q2 = np.array([x.cvParameters['PLS']['Q2Y'] for x in models])
1182-
r2 = np.array([x.modelParameters['PLS']['R2Y'] for x in models])
11831179

1184-
ax.bar([x - 0.1 for x in range(1, total_comps + 1)], height=r2, width=0.2)
1185-
ax.bar([x + 0.1 for x in range(1, total_comps + 1)], height=q2, width=0.2)
1180+
q2 = np.array([x.cvParameters['PLS']['Q2Y'] for x in models])
1181+
r2 = np.array([x.modelParameters['PLS']['R2Y'] for x in models])
1182+
1183+
# Store everything...
1184+
self.screeCV = {'Q2Y': q2, 'R2Y': r2}
1185+
1186+
return None
1187+
1188+
def scree_plot(self, metric = ['Q2Y','R2Y']):
1189+
"""
1190+
1191+
:param x:
1192+
:param y:
1193+
:param total_comps:
1194+
:return:
1195+
"""
1196+
fig, ax = plt.subplots()
1197+
1198+
b1 = self.screeCV[metric[0]]
1199+
b2 = self.screeCV[metric[1]]
1200+
1201+
total_comps = np.shape(self.screeCV[metric[2]])[0]
1202+
1203+
ax.bar([x - 0.1 for x in range(1, total_comps + 1)], height=b2, width=0.2)
1204+
ax.bar([x + 0.1 for x in range(1, total_comps + 1)], height=b1, width=0.2)
11861205
ax.legend(['R2', 'Q2'])
11871206
ax.set_xlabel("Number of components")
11881207
ax.set_ylabel("R2/Q2Y")
11891208

11901209
# Specific case where n comps = 2
1191-
if q2.size == 2:
1192-
plateau_index = np.where(np.diff(q2) / q2[0] < 0.05)[0]
1210+
if b1.size == 2:
1211+
plateau_index = np.where(np.diff(b1) / b1[0] < 0.05)[0]
11931212
if plateau_index.size == 0:
11941213
print("Consider exploring a higher level of components")
11951214
else:
1196-
plateau = np.min(np.where(np.diff(q2)/q2[0] < 0.05)[0])
1215+
plateau = np.min(np.where(np.diff(b1)/b1[0] < 0.05)[0])
11971216
ax.vlines(x=(plateau + 1), ymin=0, ymax=1, colors='red', linestyles='dashed')
11981217
print("Q2Y measure stabilizes (increase of less than 5% of previous value or decrease) "
11991218
"at component {0}".format(plateau + 1))
12001219

12011220
else:
1202-
plateau_index = np.where((np.diff(q2) / q2[0:-1]) < 0.05)[0]
1221+
plateau_index = np.where((np.diff(b1) / b1[0:-1]) < 0.05)[0]
12031222
if plateau_index.size == 0:
12041223
print("Consider exploring a higher level of components")
12051224
else:
@@ -1258,7 +1277,7 @@ def plot_permutation_test(self, permt_res, metric='Q2Y'):
12581277
raise exp
12591278

12601279
####### flsoares232 version - Updated 20-10-2023
1261-
def plot_model_parameters(self, parameter='w', component=1, cross_val=False, sigma=2, bar=False, xaxis=None, yaxis=None, instrument=None):
1280+
def plot_model_parameters(self, parameter='w', component=1, cross_val=False, sigma=2, bar=False, xaxis=None, yaxis=None, instrument=None, marker_size=3):
12621281

12631282
"""
12641283
Plot different model parameters related with the variables
@@ -1317,7 +1336,7 @@ def plot_model_parameters(self, parameter='w', component=1, cross_val=False, sig
13171336
plt.xlabel("Variable No")
13181337
elif instrument == 'lcms':
13191338
if xaxis is not None and yaxis is not None:
1320-
_scatterplots(mean, xaxis=xaxis, yaxis=yaxis)
1339+
_scatterplots(mean, xaxis=xaxis, yaxis=yaxis, marker_size=marker_size)
13211340
else:
13221341
if bar is False:
13231342
_lineplots(mean, error=error, xaxis=xaxis)

pyChemometrics/ChemometricsPLSDA.py

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,16 +1251,15 @@ def _residual_ssx(self, x):
12511251
xscaled = self.scaler.transform(x)
12521252
residuals = np.sum(np.square(xscaled - x_reconstructed), axis=1)
12531253
return residuals
1254-
1255-
def scree_plot(self, x, y, total_comps=5):
1254+
1255+
def scree_cv(self, x, y, total_comps=5):
12561256
"""
12571257
12581258
:param x: Data to use in the scree plot
12591259
:param y:
12601260
:param total_comps:
12611261
:return:
12621262
"""
1263-
fig, ax = plt.subplots()
12641263
models = list()
12651264
for n_components in range(1, total_comps + 1):
12661265
currmodel = deepcopy(self)
@@ -1272,34 +1271,71 @@ def scree_plot(self, x, y, total_comps=5):
12721271
q2 = np.array([x.cvParameters['Q2Y'] for x in models])
12731272
r2 = np.array([x.modelParameters['R2Y'] for x in models])
12741273
auc = np.array([x.cvParameters['DA']['Mean_AUC'][0] for x in models])
1274+
mcc = np.array([x.cvParameters['DA']['Mean_MCC'] for x in models])
1275+
recall = np.array([x.cvParameters['DA']['Mean_Recall'] for x in models])
1276+
precision = np.array([x.cvParameters['DA']['Mean_Precision']for x in models])
1277+
f1 = np.array([x.cvParameters['DA']['Mean_f1'] for x in models])
1278+
accuracy = np.array([x.cvParameters['DA']['Mean_Accuracy'] for x in models])
1279+
1280+
# Store everything...
1281+
self.screeCV = {'Q2Y': q2, 'R2Y': r2, 'AUC': auc,
1282+
'MCC': mcc, 'Recall': recall,
1283+
'Precision': precision, 'F1': f1,
1284+
'Accuracy': accuracy}
1285+
1286+
return None
1287+
1288+
1289+
def scree_plot(self,metric = ['Q2Y','R2Y','AUC']):
1290+
"""
1291+
1292+
:param x: Data to use in the scree plot
1293+
:param y:
1294+
:param total_comps:
1295+
:return:
1296+
"""
1297+
fig, ax = plt.subplots()
1298+
1299+
if np.shape(metric)[0] == 1:
1300+
sup_list = ['Q2Y','R2Y']
1301+
metric = sup_list + metric
1302+
elif np.shape(metric)[0] == 2:
1303+
sup_list = ['Q2Y']
1304+
metric = sup_list + metric
1305+
1306+
b1 = self.screeCV[metric[0]]
1307+
b2 = self.screeCV[metric[1]]
1308+
b3 = self.screeCV[metric[2]]
1309+
1310+
total_comps = np.shape(self.screeCV[metric[2]])[0]
12751311

1276-
ax.bar([x - 0.2 for x in range(1, total_comps + 1)], height=r2, width=0.2)
1277-
ax.bar([x for x in range(1, total_comps + 1)], height=q2, width=0.2)
1278-
ax.bar([x + 0.2 for x in range(1, total_comps + 1)], height=auc, width=0.2)
1312+
ax.bar([x - 0.2 for x in range(1, total_comps + 1)], height=b1, width=0.2)
1313+
ax.bar([x for x in range(1, total_comps + 1)], height=b2, width=0.2)
1314+
ax.bar([x + 0.2 for x in range(1, total_comps + 1)], height=b3, width=0.2)
12791315

1280-
ax.legend(['R2', 'Q2', 'Mean_AUC'])
1316+
ax.legend(metric)
12811317
ax.set_xlabel("Number of components")
1282-
ax.set_ylabel("R2/Q2Y/AUC")
1318+
ax.set_ylabel((metric[0] + "/" + metric[1] + "/" + metric[2]))
12831319

12841320
# Specific case where n comps = 2 #
1285-
if q2.size == 2:
1286-
plateau_index = np.where(np.diff(q2) / q2[0] < 0.05)[0]
1321+
if b1.size == 2:
1322+
plateau_index = np.where(np.diff(b1) / b1[0] < 0.05)[0]
12871323
if plateau_index.size == 0:
12881324
print("Consider exploring a higher level of components")
12891325
else:
1290-
plateau = np.min(np.where(np.diff(q2)/q2[0] < 0.05)[0])
1326+
plateau = np.min(np.where(np.diff(b1)/b1[0] < 0.05)[0])
12911327
ax.vlines(x=(plateau + 1), ymin=0, ymax=1, colors='red', linestyles='dashed')
1292-
print("Q2Y measure stabilizes (increase of less than 5% of previous value or decrease) "
1328+
print(metric[0] + " measure stabilizes (increase of less than 5% of previous value or decrease) "
12931329
"at component {0}".format(plateau + 1))
12941330

12951331
else:
1296-
plateau_index = np.where((np.diff(q2) / q2[0:-1]) < 0.05)[0]
1332+
plateau_index = np.where((np.diff(b1) / b1[0:-1]) < 0.05)[0]
12971333
if plateau_index.size == 0:
12981334
print("Consider exploring a higher level of components")
12991335
else:
13001336
plateau = np.min(plateau_index)
13011337
ax.vlines(x=(plateau + 1), ymin=0, ymax=1, colors='red', linestyles='dashed')
1302-
print("Q2Y measure stabilizes (increase of less than 5% of previous value or decrease) "
1338+
print(metric[0] + "measure stabilizes (increase of less than 5% of previous value or decrease) "
13031339
"at component {0}".format(plateau + 1))
13041340

13051341
plt.show()

0 commit comments

Comments
 (0)