Skip to content

Commit 8a5406e

Browse files
committedFeb 22, 2023
New brancch
Origin
1 parent fd24abf commit 8a5406e

7 files changed

+8677
-0
lines changed
 

‎README.md

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# `ocp` by Open Catalyst Project
2+
3+
[![CircleCI](https://circleci.com/gh/Open-Catalyst-Project/ocp.svg?style=shield)](https://circleci.com/gh/Open-Catalyst-Project/ocp)
4+
[![codecov](https://codecov.io/gh/Open-Catalyst-Project/ocp/branch/codecov/graph/badge.svg?token=M606LH5LK6)](https://codecov.io/gh/Open-Catalyst-Project/ocp)
5+
6+
`ocp` is the [Open Catalyst Project](https://opencatalystproject.org/)'s
7+
library of state-of-the-art machine learning algorithms for catalysis.
8+
9+
<div align="left">
10+
<img src="https://user-images.githubusercontent.com/1156489/170388229-642c6619-dece-4c88-85ef-b46f4d5f1031.gif">
11+
</div>
12+
13+
It provides training and evaluation code for tasks and models that take arbitrary
14+
chemical structures as input to predict energies / forces / positions, and can
15+
be used as a base scaffold for research projects. For an overview of tasks, data, and metrics, please read our papers:
16+
- [OC20](https://arxiv.org/abs/2010.09990)
17+
- [OC22](https://arxiv.org/abs/2206.08917)
18+
19+
20+
## Installation1
21+
22+
See [installation instructions](https://github.com/Open-Catalyst-Project/ocp/blob/main/INSTALL.md).
23+
24+
25+
* Install specific versions of Pymatgen and ASE: `pip install pymatgen==2020.4.2 ase==3.19.1`
26+
* Install Catkit from Github: `pip install git+https://github.com/ulissigroup/CatKit.git catkit`
27+
* Clone this repo and install with: `pip install -e .`
28+
29+
30+
31+
32+
33+
## Acknowledgements
34+
35+
- This codebase was initially forked from [CGCNN](https://github.com/txie-93/cgcnn)
36+
by [Tian Xie](http://txie.me), but has undergone significant changes since.
37+
- A lot of engineering ideas have been borrowed from [github.com/facebookresearch/mmf](https://github.com/facebookresearch/mmf).
38+
- The DimeNet++ implementation is based on the [author's Tensorflow implementation](https://github.com/klicperajo/dimenet) and the [DimeNet implementation in Pytorch Geometric](https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/nn/models/dimenet.py).
39+
40+
## License
41+
42+
`ocp` is released under the [MIT](https://github.com/Open-Catalyst-Project/ocp/blob/main/LICENSE.md) license.
43+
44+
## Citing `ocp`
45+
46+
If you use this codebase in your work, please consider citing:
47+
48+
```bibtex
49+
@article{ocp_dataset,
50+
author = {Chanussot*, Lowik and Das*, Abhishek and Goyal*, Siddharth and Lavril*, Thibaut and Shuaibi*, Muhammed and Riviere, Morgane and Tran, Kevin and Heras-Domingo, Javier and Ho, Caleb and Hu, Weihua and Palizhati, Aini and Sriram, Anuroop and Wood, Brandon and Yoon, Junwoong and Parikh, Devi and Zitnick, C. Lawrence and Ulissi, Zachary},
51+
title = {Open Catalyst 2020 (OC20) Dataset and Community Challenges},
52+
journal = {ACS Catalysis},
53+
year = {2021},
54+
doi = {10.1021/acscatal.0c04525},
55+
}
56+
```

‎Shell_repo.code-workspace

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"folders": [
3+
{
4+
"path": "."
5+
}
6+
],
7+
"settings": {}
8+
}

‎ocp/slab_generation/all_sample_all_info

+7,122
Large diffs are not rendered by default.

‎ocp/slab_generation/output.gif

5.42 MB
Loading

‎ocp/slab_generation/read_lmdb_r.ipynb

+197
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from ocpmodels.datasets import LmdbDataset\n",
10+
"import lmdb\n",
11+
"from ase import Atoms"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": 2,
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"slab_data_objs=LmdbDataset({\"src\": \"datasetss/slabs.lmdb\"})"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": 7,
26+
"metadata": {},
27+
"outputs": [
28+
{
29+
"data": {
30+
"text/plain": [
31+
"Data(pos=[104, 3], cell=[1, 3, 3], atomic_numbers=[104], natoms=104, tags=[104], edge_index=[2, 4927], cell_offsets=[4927, 3], y=-869.1991577148438, force=[104, 3], fixed=[104], slab_formula='O72Ti32', unrelax_energy=-864.7914428710938, bulk_energy=-226.61505634, miller=[3], bulk_formula='O16Ti8')"
32+
]
33+
},
34+
"execution_count": 7,
35+
"metadata": {},
36+
"output_type": "execute_result"
37+
}
38+
],
39+
"source": [
40+
"atom_idx=0 \n",
41+
"demo=slab_data_objs[atom_idx]\n",
42+
"demo"
43+
]
44+
},
45+
{
46+
"cell_type": "markdown",
47+
"metadata": {},
48+
"source": [
49+
"# Function for getting surface energy from a Data object"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": 19,
55+
"metadata": {},
56+
"outputs": [],
57+
"source": [
58+
"from pymatgen.analysis.surface_analysis import SlabEntry\n",
59+
"from pymatgen.entries.computed_entries import ComputedEntry\n",
60+
"from pymatgen.io.ase import AseAtomsAdaptor\n",
61+
"\n",
62+
"def get_surface_energy(dat):\n",
63+
" bulk_entry = ComputedEntry(dat.bulk_formula, dat.bulk_energy)\n",
64+
" gas_entry = ComputedEntry('O2', 2*-7.204) # the ref energy for O in OC20\n",
65+
" \n",
66+
" atoms=Atoms(dat.atomic_numbers,\n",
67+
" positions=dat.pos,\n",
68+
" tags=dat.tags,\n",
69+
" cell=dat.cell.squeeze(), pbc=True)\n",
70+
" \n",
71+
" slabentry = SlabEntry(AseAtomsAdaptor.get_structure(atoms), dat.y, dat.miller)\n",
72+
" return slabentry.surface_energy(bulk_entry, [gas_entry])"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": 18,
78+
"metadata": {
79+
"tags": []
80+
},
81+
"outputs": [
82+
{
83+
"name": "stdout",
84+
"output_type": "stream",
85+
"text": [
86+
"Ti4O9 TiO2 (1, 1, 1) 0.486508823034301 - 0.0410153310548295*delu_O\n",
87+
"Ti9O20 TiO2 (1, 1, 1) 0.450274798852927 - 0.0410153310548295*delu_O\n",
88+
"Ti8O17 TiO2 (1, 1, 1) 0.404413278395938 - 0.0205076655274147*delu_O\n",
89+
"Ti9O19 TiO2 (1, 1, 1) 0.416724743705242 - 0.0205076655274147*delu_O\n",
90+
"TiO2 TiO2 (1, 1, 1) 0.38411800535389684\n",
91+
"TiO2 TiO2 (1, 1, 1) 0.3677720348620239\n",
92+
"TiO2 TiO2 (1, 1, 1) 0.37070036027104436\n",
93+
"TiO2 TiO2 (1, 1, 1) 0.3745788867635387\n",
94+
"Ti10O19 TiO2 (1, 1, 1) 0.0205076655274147*delu_O + 0.445462215747588\n",
95+
"Ti9O17 TiO2 (1, 1, 1) 0.0205076655274147*delu_O + 0.400585568918097\n",
96+
"Ti5O9 TiO2 (1, 1, 1) 0.0410153310548295*delu_O + 0.472357140401416\n",
97+
"Ti9O16 TiO2 (1, 1, 1) 0.0410153310548295*delu_O + 0.444017802955972\n",
98+
"Ti4O9 TiO2 (1, 1, 0) 0.546207448328953 - 0.0489107017539134*delu_O\n",
99+
"Ti8O17 TiO2 (1, 1, 0) 0.449417328042306 - 0.0244553508769567*delu_O\n",
100+
"Ti8O17 TiO2 (1, 1, 0) 0.44118767856354 - 0.0244553508769567*delu_O\n",
101+
"Ti9O19 TiO2 (1, 1, 0) 0.492222603655402 - 0.0244553508769567*delu_O\n",
102+
"TiO2 TiO2 (1, 1, 0) 0.42297423266952894\n",
103+
"TiO2 TiO2 (1, 1, 0) 0.3821024961246957\n",
104+
"TiO2 TiO2 (1, 1, 0) 0.4096769123415225\n",
105+
"TiO2 TiO2 (1, 1, 0) 0.42916178479927325\n",
106+
"Ti10O19 TiO2 (1, 1, 0) 0.0244553508769567*delu_O + 0.472355188527271\n",
107+
"Ti10O19 TiO2 (1, 1, 0) 0.0244553508769567*delu_O + 0.466107013553458\n",
108+
"Ti9O17 TiO2 (1, 1, 0) 0.0244553508769567*delu_O + 0.454241846622795\n",
109+
"Ti5O9 TiO2 (1, 1, 0) 0.0489107017539134*delu_O + 0.528671103072817\n",
110+
"Ti9O19 TiO2 (1, 1, -1) 0.445676410149205 - 0.0221349356090043*delu_O\n",
111+
"Ti9O19 TiO2 (1, 1, -1) 0.451522902693637 - 0.0221349356090043*delu_O\n",
112+
"TiO2 TiO2 (1, 1, -1) 0.3594562344146164\n",
113+
"TiO2 TiO2 (1, 1, -1) 0.4066659017681955\n",
114+
"TiO2 TiO2 (1, 1, -1) 0.4265215299197349\n",
115+
"TiO2 TiO2 (1, 1, -1) 0.413633577146169\n",
116+
"Ti9O17 TiO2 (1, 1, -1) 0.0221349356090043*delu_O + 0.432247483548343\n",
117+
"Ti9O17 TiO2 (1, 1, -1) 0.0221349356090043*delu_O + 0.40899019700201\n",
118+
"Ti15O32 TiO2 (1, 0, 1) 0.586705295438452 - 0.0340802406084122*delu_O\n",
119+
"Ti15O32 TiO2 (1, 0, 1) 0.600576047663616 - 0.0340802406084122*delu_O\n",
120+
"TiO2 TiO2 (1, 0, 1) 0.5103686572006261\n",
121+
"TiO2 TiO2 (1, 0, 1) 0.4672545735158993\n",
122+
"TiO2 TiO2 (1, 0, 1) 0.4986365545317719\n",
123+
"TiO2 TiO2 (1, 0, 1) 0.5563709172177385\n",
124+
"Ti15O28 TiO2 (1, 0, 1) 0.0340802406084122*delu_O + 0.549230332269358\n",
125+
"Ti15O28 TiO2 (1, 0, 1) 0.0340802406084122*delu_O + 0.566247571552648\n",
126+
"Ti6O13 TiO2 (1, 0, 0) 0.506599989040615 - 0.0401238257066021*delu_O\n",
127+
"Ti6O13 TiO2 (1, 0, 0) 0.578858303657952 - 0.0401238257066021*delu_O\n",
128+
"Ti7O15 TiO2 (1, 0, 0) 0.680038398006931 - 0.0401238257066021*delu_O\n",
129+
"Ti7O15 TiO2 (1, 0, 0) 0.591349984105807 - 0.0401238257066021*delu_O\n",
130+
"TiO2 TiO2 (1, 0, 0) 0.5297788651195319\n",
131+
"TiO2 TiO2 (1, 0, 0) 0.5571264457875991\n",
132+
"Ti8O15 TiO2 (1, 0, 0) 0.0401238257066021*delu_O + 0.623025610762092\n",
133+
"Ti8O15 TiO2 (1, 0, 0) 0.0401238257066021*delu_O + 0.651810733281343\n",
134+
"Ti7O13 TiO2 (1, 0, 0) 0.0401238257066021*delu_O + 0.609176796113447\n",
135+
"Ti7O13 TiO2 (1, 0, 0) 0.0401238257066021*delu_O + 0.563493005870556\n",
136+
"Ti5O11 TiO2 (1, 0, -1) 0.57793941863426 - 0.0436757073661562*delu_O\n",
137+
"Ti10O21 TiO2 (1, 0, -1) 0.484324367917108 - 0.0218378536830781*delu_O\n",
138+
"Ti10O21 TiO2 (1, 0, -1) 0.486731987731329 - 0.0218378536830781*delu_O\n",
139+
"Ti11O23 TiO2 (1, 0, -1) 0.487101152912659 - 0.0218378536830781*delu_O\n",
140+
"TiO2 TiO2 (1, 0, -1) 0.45874548637045987\n",
141+
"TiO2 TiO2 (1, 0, -1) 0.49111795408103287\n",
142+
"Ti12O23 TiO2 (1, 0, -1) 0.0218378536830781*delu_O + 0.540261562866761\n",
143+
"Ti12O23 TiO2 (1, 0, -1) 0.0218378536830781*delu_O + 0.503619890728277\n",
144+
"Ti11O21 TiO2 (1, 0, -1) 0.0218378536830781*delu_O + 0.507712308528015\n",
145+
"Ti6O11 TiO2 (1, 0, -1) 0.0436757073661562*delu_O + 0.53241624639869\n",
146+
"Ti6O13 TiO2 (0, 1, 1) 0.46653771609867 - 0.0330964371616681*delu_O\n",
147+
"Ti18O37 TiO2 (0, 1, 1) 0.413222183895921 - 0.011032145720556*delu_O\n",
148+
"TiO2 TiO2 (0, 1, 1) 0.41527619029720686\n",
149+
"Ti20O39 TiO2 (0, 1, 1) 0.011032145720556*delu_O + 0.425592340064277\n",
150+
"Ti20O37 TiO2 (0, 1, 1) 0.0330964371616681*delu_O + 0.481269074658621\n",
151+
"TiO2 TiO2 (0, 1, 0) 0.35231647565033786\n",
152+
"Ti2O5 TiO2 (0, 0, 1) 0.503945021250956 - 0.0862807920604508*delu_O\n",
153+
"Ti4O9 TiO2 (0, 0, 1) 0.47568503556361 - 0.0431403960302254*delu_O\n",
154+
"Ti4O9 TiO2 (0, 0, 1) 0.481512043401433 - 0.0431403960302254*delu_O\n",
155+
"TiO2 TiO2 (0, 0, 1) 0.4098890016173229\n",
156+
"Ti6O11 TiO2 (0, 0, 1) 0.0431403960302254*delu_O + 0.522681399662592\n",
157+
"Ti6O11 TiO2 (0, 0, 1) 0.0431403960302254*delu_O + 0.543839081683764\n",
158+
"Ti3O5 TiO2 (0, 0, 1) 0.0862807920604508*delu_O + 0.695930649700783\n"
159+
]
160+
}
161+
],
162+
"source": [
163+
"from pymatgen.core.composition import Composition\n",
164+
"for dat in slab_data_objs:\n",
165+
" print(Composition(dat.slab_formula).reduced_formula, \n",
166+
" Composition(dat.bulk_formula).reduced_formula, \n",
167+
" dat.miller, get_surface_energy(dat))"
168+
]
169+
}
170+
],
171+
"metadata": {
172+
"kernelspec": {
173+
"display_name": "gpustudy",
174+
"language": "python",
175+
"name": "python3"
176+
},
177+
"language_info": {
178+
"codemirror_mode": {
179+
"name": "ipython",
180+
"version": 3
181+
},
182+
"file_extension": ".py",
183+
"mimetype": "text/x-python",
184+
"name": "python",
185+
"nbconvert_exporter": "python",
186+
"pygments_lexer": "ipython3",
187+
"version": "3.8.8"
188+
},
189+
"vscode": {
190+
"interpreter": {
191+
"hash": "07482723249349ed54e2c7e6c4dc9f336a0ae8a59ea3de954cec4c2faf28691e"
192+
}
193+
}
194+
},
195+
"nbformat": 4,
196+
"nbformat_minor": 4
197+
}

‎ocp/slab_generation/script_v3.py

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
from pymatgen.core.surface import SlabGenerator, generate_all_slabs, \
2+
get_symmetrically_distinct_miller_indices
3+
from pymatgen.io.ase import AseAtomsAdaptor
4+
from pymatgen.analysis.local_env import VoronoiNN, CrystalNN
5+
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
6+
from collections import defaultdict
7+
from ase.constraints import FixAtoms
8+
import numpy as np
9+
10+
11+
def tag_surface_atoms(bulk, slab, height_tol=2):
12+
'''
13+
Sets the tags of an `ase.Atoms` object. Any atom that we consider a "bulk"
14+
atom will have a tag of 0, and any atom that we consider a "surface" atom
15+
will have a tag of 1. We use a combination of Voronoi neighbor algorithms
16+
(adapted from from `pymatgen.core.surface.Slab.get_surface_sites`; see
17+
https://pymatgen.org/pymatgen.core.surface.html) and a distance cutoff.
18+
Arg:
19+
bulk_atoms `ase.Atoms` format of the respective bulk structure
20+
surface_atoms The surface where you are trying to find surface sites in
21+
`ase.Atoms` format
22+
'''
23+
24+
height_tags = find_surface_atoms_by_height(slab, height_tol=height_tol)
25+
slab.add_site_property('tag', height_tags)
26+
27+
28+
def calculate_center_of_mass(struct):
29+
'''
30+
Determine the surface atoms indices from here
31+
'''
32+
weights = [site.species.weight for site in struct]
33+
center_of_mass = np.average(struct.frac_coords,
34+
weights=weights, axis=0)
35+
return center_of_mass
36+
37+
def find_surface_atoms_by_height(slab, height_tol=2):
38+
'''
39+
As discussed in the docstring for `_find_surface_atoms_with_voronoi`,
40+
sometimes we might accidentally tag a surface atom as a bulk atom if there
41+
are multiple coordination environments for that atom type within the bulk.
42+
One heuristic that we use to address this is to simply figure out if an
43+
atom is close to the surface. This function will figure that out.
44+
Specifically: We consider an atom a surface atom if it is within 2
45+
Angstroms of the heighest atom in the z-direction (or more accurately, the
46+
direction of the 3rd unit cell vector).
47+
Arg:
48+
surface_atoms The surface where you are trying to find surface sites in
49+
`ase.Atoms` format
50+
Returns:
51+
tags A list that contains the indices of
52+
the surface atoms
53+
'''
54+
unit_cell_height = np.linalg.norm(slab.lattice.matrix[2])
55+
scaled_positions = slab.frac_coords
56+
scaled_max_height = max(scaled_position[2] for scaled_position in scaled_positions)
57+
scaled_threshold_top = scaled_max_height - height_tol / unit_cell_height
58+
59+
tags = [0 if scaled_position[2] < scaled_threshold_top else 1
60+
for scaled_position in scaled_positions]
61+
62+
return tags
63+
64+
65+
def get_repeat_from_min_lw(slab, min_lw):
66+
"""
67+
Modified version of algorithm from adsorption.py for determining the super cell
68+
matrix of the slab given min_lw. This will location the smallest super slab
69+
cell with min_lw by including square root 3x3 transformation matrices
70+
"""
71+
72+
xlength = np.linalg.norm(slab.lattice.matrix[0])
73+
ylength = np.linalg.norm(slab.lattice.matrix[1])
74+
xrep = np.ceil(min_lw / xlength)
75+
yrep = np.ceil(min_lw / ylength)
76+
rtslab = slab.copy()
77+
rtslab.make_supercell([[1,1,0], [1,-1,0], [0,0,1]])
78+
rt_matrix = rtslab.lattice.matrix
79+
xlength_rt = np.linalg.norm(rt_matrix[0])
80+
ylength_rt = np.linalg.norm(rt_matrix[1])
81+
xrep_rt = np.ceil(min_lw / xlength_rt)
82+
yrep_rt = np.ceil(min_lw / ylength_rt)
83+
84+
xrep = xrep*np.array([1,0,0]) if xrep*xlength < xrep_rt*xlength_rt else xrep_rt*np.array([1,1,0])
85+
yrep = yrep*np.array([0,1,0]) if yrep*ylength < yrep_rt*ylength_rt else yrep_rt*np.array([1,-1,0])
86+
zrep = [0,0,1]
87+
return [xrep, yrep, zrep]
88+
89+
90+
def slab_generator(ase_bulk, mmi, slab_size, vacuum_size, tol=0.1, height_tol=2, min_lw=8):
91+
92+
bulk = AseAtomsAdaptor.get_structure(ase_bulk)
93+
all_slabs = generate_all_slabs(bulk, mmi, slab_size, vacuum_size,
94+
center_slab=True, max_normal_search=1, symmetrize=True, tol=tol)
95+
96+
comp = bulk.composition.reduced_formula
97+
98+
atoms_slabs = []
99+
for slab in all_slabs:
100+
101+
new_slab = slab.copy()
102+
tag_surface_atoms(bulk, new_slab, height_tol=height_tol)
103+
104+
# Get the symmetry operations to identify equivalent sites on both sides
105+
new_slab.add_site_property('original_index', [i for i, site in enumerate(new_slab)])
106+
sg = SpacegroupAnalyzer(new_slab)
107+
sym_slab = sg.get_symmetrized_structure()
108+
109+
# Identify equivalent sites on other surface
110+
new_tags = []
111+
for site in sym_slab:
112+
if site.tag == 1:
113+
if site.original_index not in new_tags:
114+
new_tags.append(site.original_index)
115+
116+
for eq_site in sym_slab.find_equivalent_sites(site):
117+
if eq_site.original_index not in new_tags:
118+
new_tags.append(eq_site.original_index)
119+
120+
# Tag both surfaces
121+
tags = [0 if i not in new_tags else 1 for i, site in enumerate(new_slab)]
122+
new_slab.add_site_property('tag', tags)
123+
124+
msuper = get_repeat_from_min_lw(new_slab, min_lw)
125+
new_slab.make_supercell(msuper)
126+
127+
# ASE Atoms object format
128+
atoms = AseAtomsAdaptor.get_atoms(new_slab)
129+
atoms.set_tags([site.tag for site in new_slab])
130+
atoms.set_constraint(FixAtoms([i for i, site in enumerate(new_slab) if site.tag == 0]))
131+
atoms.info['miller_index'] = new_slab.miller_index
132+
atoms_slabs.append(atoms)
133+
134+
return atoms_slabs

‎ocp/slab_generation/slabs.ipynb

+1,160
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)
Please sign in to comment.