-
Notifications
You must be signed in to change notification settings - Fork 40
TREXIO + MCSCF wavefunction #92
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
dc61e9a
c639fd4
00f97c3
c4ac993
3fe27fd
58a1332
21ce806
243da68
38b91b8
c48fb69
83c1a0d
04550af
19bec15
177e46c
1f74dd3
aa6b613
7e798f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,16 +21,21 @@ | |
from pyscf import gto | ||
from pyscf import scf | ||
from pyscf import pbc | ||
from pyscf import mcscf | ||
from pyscf import fci | ||
|
||
import trexio | ||
|
||
def to_trexio(obj, filename, backend='h5'): | ||
def to_trexio(obj, filename, backend='h5', ci_threshold=None, chunk_size=None): | ||
with trexio.File(filename, 'u', back_end=_mode(backend)) as tf: | ||
if isinstance(obj, gto.Mole): | ||
_mol_to_trexio(obj, tf) | ||
elif isinstance(obj, scf.hf.SCF): | ||
_scf_to_trexio(obj, tf) | ||
elif isinstance(obj, mcscf.casci.CASCI) or isinstance(obj, mcscf.CASSCF): | ||
ci_threshold = ci_threshold if ci_threshold is not None else 0. | ||
chunk_size = chunk_size if chunk_size is not None else 100000 | ||
_mcscf_to_trexio(obj, tf, ci_threshold=ci_threshold, chunk_size=chunk_size) | ||
else: | ||
raise NotImplementedError(f'Conversion function for {obj.__class__}') | ||
|
||
|
@@ -248,8 +253,39 @@ def _scf_to_trexio(mf, trexio_file): | |
def _cc_to_trexio(cc_obj, trexio_file): | ||
raise NotImplementedError | ||
|
||
def _mcscf_to_trexio(cas_obj, trexio_file): | ||
raise NotImplementedError | ||
def _mcscf_to_trexio(cas_obj, trexio_file, ci_threshold=0., chunk_size=100000): | ||
mol = cas_obj.mol | ||
_mol_to_trexio(mol, trexio_file) | ||
mo_energy_cas = cas_obj.mo_energy | ||
mo_cas = cas_obj.mo_coeff | ||
num_mo = mo_energy_cas.size | ||
spin_cas = np.zeros(mo_energy_cas.size, dtype=int) | ||
mo_type_cas = 'CAS' | ||
trexio.write_mo_type(trexio_file, mo_type_cas) | ||
idx = _order_ao_index(mol) | ||
trexio.write_mo_num(trexio_file, num_mo) | ||
trexio.write_mo_coefficient(trexio_file, mo_cas[idx].T.ravel()) | ||
trexio.write_mo_energy(trexio_file, mo_energy_cas) | ||
trexio.write_mo_spin(trexio_file, spin_cas) | ||
|
||
ncore = cas_obj.ncore | ||
ncas = cas_obj.ncas | ||
mo_classes = np.array(["Virtual"] * num_mo, dtype=str) # Initialize all MOs as Virtual | ||
mo_classes[:ncore] = "Core" | ||
mo_classes[ncore:ncore + ncas] = "Active" | ||
trexio.write_mo_class(trexio_file, list(mo_classes)) | ||
|
||
occupation = np.zeros(num_mo) | ||
occupation[:ncore] = 2.0 | ||
rdm1 = cas_obj.fcisolver.make_rdm1(cas_obj.ci, ncas, cas_obj.nelecas) | ||
natural_occ = np.linalg.eigh(rdm1)[0] | ||
occupation[ncore:ncore + ncas] = natural_occ[::-1] | ||
occupation[ncore + ncas:] = 0.0 | ||
trexio.write_mo_occupation(trexio_file, occupation) | ||
|
||
total_elec_cas = sum(cas_obj.nelecas) | ||
|
||
det_to_trexio(cas_obj, ncas, total_elec_cas, trexio_file, ci_threshold, chunk_size) | ||
|
||
def mol_from_trexio(filename): | ||
mol = gto.Mole() | ||
|
@@ -329,19 +365,25 @@ def write_eri(eri, filename, backend='h5'): | |
idx[:,:,2:] = idx_pair[None,:,:] | ||
idx = idx[np.tril_indices(npair)] | ||
|
||
# Physicist notation | ||
idx=idx.reshape((num_integrals,4)) | ||
for i in range(num_integrals): | ||
idx[i,1],idx[i,2]=idx[i,2],idx[i,1] | ||
|
||
idx=idx.flatten() | ||
|
||
with trexio.File(filename, 'w', back_end=_mode(backend)) as tf: | ||
trexio.write_mo_2e_int_eri(tf, 0, num_integrals, idx, eri.ravel()) | ||
|
||
def read_eri(filename): | ||
'''Read ERIs in AO basis, 8-fold symmetry is assumed''' | ||
with trexio.File(filename, 'r', back_end=trexio.TREXIO_AUTO) as tf: | ||
nmo = trexio.read_mo_num(tf) | ||
nao_pair = nmo * (nmo+1) // 2 | ||
eri_size = nao_pair * (nao_pair+1) // 2 | ||
idx, data, n_read, eof_flag = trexio.read_mo_2e_int_eri(tf, 0, eri_size) | ||
eri = np.zeros(eri_size) | ||
x = idx[:,0]*(idx[:,0]+1)//2 + idx[:,1] | ||
y = idx[:,2]*(idx[:,2]+1)//2 + idx[:,3] | ||
x = idx[:,0]*(idx[:,0]+1)//2 + idx[:,2] | ||
y = idx[:,1]*(idx[:,1]+1)//2 + idx[:,3] | ||
eri[x*(x+1)//2+y] = data | ||
return eri | ||
|
||
|
@@ -410,17 +452,18 @@ def get_occsa_and_occsb(mcscf, norb, nelec, ci_threshold=0.): | |
|
||
return occsa_sorted, occsb_sorted, ci_values_sorted, num_determinants | ||
|
||
def det_to_trexio(mcscf, norb, nelec, filename, backend='h5', ci_threshold=0., chunk_size=100000): | ||
def det_to_trexio(mcscf, norb, nelec, trexio_file, ci_threshold=0., chunk_size=100000): | ||
from trexio_tools.group_tools import determinant as trexio_det | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @NastaMauger, in my opinion, we should not import There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be possible to accept this PR as it is and open a new one with your idea? I agree that your suggestion would help reduce dependencies, but since this has already been validated by everyone and the only thing preventing the final merge is unrelated to the current modification, I would really appreciate if it could be merged To be honest, I’m not sure how much work this would require, especially since this PR has been open for a while simply because I had forgotten about it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
mo_num = norb | ||
int64_num = int((mo_num - 1) / 64) + 1 | ||
ncore = mcscf.ncore | ||
int64_num = trexio.get_int64_num(trexio_file) | ||
|
||
occsa, occsb, ci_values, num_determinants = get_occsa_and_occsb(mcscf, norb, nelec, ci_threshold) | ||
|
||
det_list = [] | ||
for a, b, coeff in zip(occsa, occsb, ci_values): | ||
occsa_upshifted = [orb + 1 for orb in a] | ||
occsb_upshifted = [orb + 1 for orb in b] | ||
occsa_upshifted = [orb for orb in range(ncore)] + [orb+ncore for orb in a] | ||
occsb_upshifted = [orb for orb in range(ncore)] + [orb+ncore for orb in b] | ||
det_tmp = [] | ||
det_tmp += trexio_det.to_determinant_list(occsa_upshifted, int64_num) | ||
det_tmp += trexio_det.to_determinant_list(occsb_upshifted, int64_num) | ||
|
@@ -431,24 +474,19 @@ def det_to_trexio(mcscf, norb, nelec, filename, backend='h5', ci_threshold=0., c | |
else: | ||
n_chunks = 1 | ||
|
||
with trexio.File(filename, 'u', back_end=_mode(backend)) as tf: | ||
if trexio.has_determinant(tf): | ||
trexio.delete_determinant(tf) | ||
trexio.write_mo_num(tf, mo_num) | ||
trexio.write_electron_up_num(tf, len(a)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this removed? We need the number of up and down electrons to perform the CI determinants "correctness" check before writing them in the file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is now handled by the |
||
trexio.write_electron_dn_num(tf, len(b)) | ||
trexio.write_electron_num(tf, len(a) + len(b)) | ||
if trexio.has_determinant(trexio_file): | ||
trexio.delete_determinant(trexio_file) | ||
|
||
offset_file = 0 | ||
for i in range(n_chunks): | ||
start = i * chunk_size | ||
end = min((i + 1) * chunk_size, num_determinants) | ||
current_chunk_size = end - start | ||
|
||
if current_chunk_size > 0: | ||
trexio.write_determinant_list(tf, offset_file, current_chunk_size, det_list[start:end]) | ||
trexio.write_determinant_coefficient(tf, offset_file, current_chunk_size, ci_values[start:end]) | ||
offset_file += current_chunk_size | ||
offset_file = 0 | ||
for i in range(n_chunks): | ||
start = i * chunk_size | ||
end = min((i + 1) * chunk_size, num_determinants) | ||
current_chunk_size = end - start | ||
|
||
if current_chunk_size > 0: | ||
trexio.write_determinant_list(trexio_file, offset_file, current_chunk_size, det_list[start:end]) | ||
trexio.write_determinant_coefficient(trexio_file, offset_file, current_chunk_size, ci_values[start:end]) | ||
offset_file += current_chunk_size | ||
|
||
def read_det_trexio(filename): | ||
with trexio.File(filename, 'r', back_end=trexio.TREXIO_AUTO) as tf: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you need to reshape the integrals here?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is due to the difference between chemist and physicist notation.
QP2 uses the physicist's notation, whereas PySCF uses the chemist's.
There is a PR where Kevin is modifying this.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In fact, I think there should be a possibility with TREXIO to register data in one format or another, and to have an HDF5 field indicating which format the data is stored in.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How did you test this part?
I am asking because
eri
array accordingly (which remains in the original chemist notation of PySCF).read_eri
part is untouched. So when reading the ERI's in physicist notation, one may encounter issues as PySCF may expect chemist notation. So the the index swap might be needed inread_eri
too.I guess one of these points leads to tests failing in the CI. But maybe I missed something?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@q-posev This is something I worked on with Anthony during a Zoom call a while ago because we noticed some weird behavior. I'm not sure if it's still necessary, so I left it here as a comment just in case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regarding your point 2: exactly. That's why I think something should be done in the TREXIO format to ensure that the formats are properly registered and can be used seamlessly across different software.
I never really had the opportunity to finish this, and I know Kevin is working on something similar on the QP2 side.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added a small comment in case someone wants to follow this. If it turns out to be useless, I guess it can easily be removed by either you or Anthony in the next PR
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Even if we enforce one format or another in TREXIO, the user can still confuse them and accidentally export the wrong info. But we have
trexio-tools
package which can be used to validate some TREXIO fields and people can contribute more robust tests to the repo (e.g. computing the HF energy from the one- and two-electron integrals)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The whole point of TREXIO is to have a single and consistent way of doing things so the data is easy to interpret. We chose physicist's notation because chemist's notation can't generalize to more than 2-electron integrals. Reordering indices from 1,2,3,4 to 1,3,2,4 is not really a big issue...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this zoom call we realized we had to reorder the indices because we checked the enrgy with quantum package. So we fixed the ordering for the export. I don't recall we checked importing integrals into pyscf. The reordering should also be done in the reading part.