Skip to content

Commit 7bbb65d

Browse files
committed
Reorganizing tools
1 parent 0bdb269 commit 7bbb65d

File tree

158 files changed

+57456
-61251
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

158 files changed

+57456
-61251
lines changed
+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
2+
3+
4+
| Folder | Creator | Email | Brief Description |
5+
| -------------------- | ------------ | -------------------------- | ------------------------------------------------------------ |
6+
| add_groups | Yuwen Zhang | [email protected] | Generate grouping method(s) for `model.xyz`. |
7+
| get_max_rmse_xyz | Ke Xu | [email protected] | Identify structures with the largest errors. |
8+
| pbc_mol | Zherui Chen | [email protected] | It processes XYZ trajectory files of molecular simulations to ensure that molecules remain intact across periodic boundaries in the simulation box. |
9+
| pca_sampling | Penghua Ying | [email protected] | Farthest-point sampling based on `calorine`. |
10+
| perturbed2poscar | Who? | | What? |
11+
| rdf_adf | Ke Xu | [email protected] | Calculate RDF and ADF using `OVITO`. |
12+
| select_xyz_frames | Zherui Chen | [email protected] | Select frames from the `exyz` file. |
13+
| shift_energy_to_zero | Nan Xu | [email protected] | Shift the average energy of each species to zero for a dataset. |
14+
| split_xyz | Yong Wang | [email protected] | Some functionalities for training/test data. |
15+
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,87 @@
1-
"""
2-
Purpose:
3-
Select structures from orginal large reference dataset based on principal component Analysis (PCA) of
4-
descriptor space using farthest point sampling. We use the PbTe as a toy example to show how this script
5-
works, one need to modify the path of reference dataset, nep model, and selected frame number case by case.
6-
7-
Ref:
8-
calorine: https://calorine.materialsmodeling.org/tutorials/visualize_descriptor_space_with_pca.html
9-
https://github.com/bigd4/PyNEP/blob/master/examples/plot_select_structure.py
10-
11-
Author:
12-
Penghua Ying <hityingph(at)163.com>
13-
"""
14-
15-
from ase.io import read, write
16-
from pylab import *
17-
from calorine.nep import get_descriptors
18-
from sklearn.decomposition import PCA
19-
from tqdm import tqdm
20-
from scipy.spatial.distance import cdist
21-
22-
# Farthest Point Sampling
23-
def farthest_point_sampling(points, n_samples):
24-
n_points = points.shape[0]
25-
selected_indices = [np.random.randint(n_points)]
26-
for _ in range(1, n_samples):
27-
distances = cdist(points, points[selected_indices])
28-
min_distances = np.min(distances, axis=1)
29-
next_index = np.argmax(min_distances)
30-
selected_indices.append(next_index)
31-
return selected_indices
32-
33-
aw = 2
34-
fs = 16
35-
lw = 2
36-
font = {'size' : fs}
37-
matplotlib.rc('font', **font)
38-
matplotlib.rc('axes' , linewidth=aw)
39-
40-
def set_fig_properties(ax_list):
41-
tl = 8
42-
tw = 2
43-
tlm = 4
44-
45-
for ax in ax_list:
46-
ax.tick_params(which='major', length=tl, width=tw)
47-
ax.tick_params(which='minor', length=tlm, width=tw)
48-
ax.tick_params(which='both', axis='both', direction='out', right=False, top=False)
49-
50-
51-
52-
tol = read("../../examples/11_NEP_potential_PbTe/test.xyz", ":") # read orginal larger reference.xyz
53-
54-
55-
descriptors = []
56-
for i, t in tqdm(enumerate(tol)):
57-
d = get_descriptors(t, model_filename='../../examples/11_NEP_potential_PbTe/nep.txt') # get descriptors using the pre-trained nep model
58-
d_mean = np.mean(d, axis=0) # Use average of each atomic descriptors to get structure descriptors
59-
descriptors.append(d_mean)
60-
61-
descriptors = np.array(descriptors)
62-
print(f'Total frame of structures in dataset: {descriptors.shape[0]}')
63-
print(f'Number of descriptor components: {descriptors.shape[1]}')
64-
pca = PCA(n_components=2)
65-
pc = pca.fit_transform(descriptors)
66-
p0 = pca.explained_variance_ratio_[0]
67-
p1 = pca.explained_variance_ratio_[1]
68-
print(f'Explained variance for component 0: {p0:.2f}')
69-
print(f'Explained variance for component 1: {p1:.2f}')
70-
71-
# Select 25 structures using FPS
72-
n_samples = 25
73-
selected_indices = farthest_point_sampling(pc, n_samples)
74-
selected_structures = [tol[i] for i in selected_indices]
75-
unselected_structures = [t for i, t in enumerate(tol) if i not in selected_indices]
76-
77-
# Save the selected and unselected structures
78-
write('selected_structures.xyz', selected_structures)
79-
80-
figure(figsize=(10, 8))
81-
set_fig_properties([gca()])
82-
scatter(pc[:, 0], pc[:, 1], alpha=0.5, c="C0", label='All structures')
83-
scatter(pc[selected_indices, 0], pc[selected_indices, 1], s=8, color='C1', label='Selected structures')
84-
xlabel('PC1')
85-
ylabel('PC2')
86-
legend()
1+
"""
2+
Purpose:
3+
Select structures from orginal large reference dataset based on principal component Analysis (PCA) of
4+
descriptor space using farthest point sampling. We use the PbTe as a toy example to show how this script
5+
works, one need to modify the path of reference dataset, nep model, and selected frame number case by case.
6+
7+
Ref:
8+
calorine: https://calorine.materialsmodeling.org/tutorials/visualize_descriptor_space_with_pca.html
9+
https://github.com/bigd4/PyNEP/blob/master/examples/plot_select_structure.py
10+
11+
Author:
12+
Penghua Ying <hityingph(at)163.com>
13+
"""
14+
15+
from ase.io import read, write
16+
from pylab import *
17+
from calorine.nep import get_descriptors
18+
from sklearn.decomposition import PCA
19+
from tqdm import tqdm
20+
from scipy.spatial.distance import cdist
21+
22+
# Farthest Point Sampling
23+
def farthest_point_sampling(points, n_samples):
24+
n_points = points.shape[0]
25+
selected_indices = [np.random.randint(n_points)]
26+
for _ in range(1, n_samples):
27+
distances = cdist(points, points[selected_indices])
28+
min_distances = np.min(distances, axis=1)
29+
next_index = np.argmax(min_distances)
30+
selected_indices.append(next_index)
31+
return selected_indices
32+
33+
aw = 2
34+
fs = 16
35+
lw = 2
36+
font = {'size' : fs}
37+
matplotlib.rc('font', **font)
38+
matplotlib.rc('axes' , linewidth=aw)
39+
40+
def set_fig_properties(ax_list):
41+
tl = 8
42+
tw = 2
43+
tlm = 4
44+
45+
for ax in ax_list:
46+
ax.tick_params(which='major', length=tl, width=tw)
47+
ax.tick_params(which='minor', length=tlm, width=tw)
48+
ax.tick_params(which='both', axis='both', direction='out', right=False, top=False)
49+
50+
51+
52+
tol = read("../../examples/11_NEP_potential_PbTe/test.xyz", ":") # read orginal larger reference.xyz
53+
54+
55+
descriptors = []
56+
for i, t in tqdm(enumerate(tol)):
57+
d = get_descriptors(t, model_filename='../../examples/11_NEP_potential_PbTe/nep.txt') # get descriptors using the pre-trained nep model
58+
d_mean = np.mean(d, axis=0) # Use average of each atomic descriptors to get structure descriptors
59+
descriptors.append(d_mean)
60+
61+
descriptors = np.array(descriptors)
62+
print(f'Total frame of structures in dataset: {descriptors.shape[0]}')
63+
print(f'Number of descriptor components: {descriptors.shape[1]}')
64+
pca = PCA(n_components=2)
65+
pc = pca.fit_transform(descriptors)
66+
p0 = pca.explained_variance_ratio_[0]
67+
p1 = pca.explained_variance_ratio_[1]
68+
print(f'Explained variance for component 0: {p0:.2f}')
69+
print(f'Explained variance for component 1: {p1:.2f}')
70+
71+
# Select 25 structures using FPS
72+
n_samples = 25
73+
selected_indices = farthest_point_sampling(pc, n_samples)
74+
selected_structures = [tol[i] for i in selected_indices]
75+
unselected_structures = [t for i, t in enumerate(tol) if i not in selected_indices]
76+
77+
# Save the selected and unselected structures
78+
write('selected_structures.xyz', selected_structures)
79+
80+
figure(figsize=(10, 8))
81+
set_fig_properties([gca()])
82+
scatter(pc[:, 0], pc[:, 1], alpha=0.5, c="C0", label='All structures')
83+
scatter(pc[selected_indices, 0], pc[selected_indices, 1], s=8, color='C1', label='Selected structures')
84+
xlabel('PC1')
85+
ylabel('PC2')
86+
legend()
8787
savefig('FPS.png', bbox_inches='tight')

0 commit comments

Comments
 (0)