Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallel optimisations #47

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
TensorCast = "02d47bb6-7ce6-556a-be16-bb1710789e2b"

[compat]
ACE = "= 0.12.22"
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
59 changes: 59 additions & 0 deletions examples/H2O/Codes/graph_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from sparseutils import from_coo_tuple
from ase.db import connect
import h5py
import math
import numpy as np
import os

def write_dataset_h5(db_path:str='/home/c/chenqian3/ACEhamiltonians/H2O_PASHA/H2O_Pasha/Data/dbs/dyn-wd-300K_3.db', outp_direc:str='/home/c/chenqian3/ACEhamiltonians/H2O_PASHA/H2O_Pasha/Data/graph_data'):

db = connect(db_path)
rows = db.select()

Atoms_list = []
H_data_list = []
S_data_list = []
dm_data_list = []
for row in rows:
Atoms_list.append(row.toatoms())
H_data_list.append(from_coo_tuple(row.data['H']))
S_data_list.append(from_coo_tuple(row.data['S']))
dm_data_list.append(from_coo_tuple(row.data['dm']))

n_geometries = len(Atoms_list)
length_number = int(math.log10(n_geometries))+1

file_name = os.path.basename(db_path).rstrip('.db')
output_path = os.path.join(outp_direc, file_name+'.h5')

with h5py.File(output_path, "w") as f:
for i, (atoms, H, S, dm) in enumerate(zip(Atoms_list, H_data_list, S_data_list, dm_data_list)):
gs = f.create_group(f'{i:0{length_number}}')
gsd = gs.create_group('Data')
gsdd = gsd.create_group('DoS')
gsdd.create_dataset('broadening', dtype=np.float64)
gsdd.create_dataset('energies', dtype=np.float64)
gsdd.create_dataset('values', dtype=np.float64)
gsd.create_dataset('H', data=np.expand_dims(H, axis=-1).T, dtype=np.float64)
gsd.create_dataset('H_gama', data=H.T)
gsd.create_dataset('S', data=np.expand_dims(S, axis=-1).T, dtype=np.float64)
gsd.create_dataset('S_gama', data=S.T)
gsd.create_dataset('dm', data=np.expand_dims(dm, axis=-1).T, dtype=np.float64)
gsd.create_dataset('dm_gama', data=dm.T)
gsd.create_dataset('fermi_level', dtype=np.float64)
gsd.create_dataset('forces', dtype=np.float64)
gsd.create_dataset('total_energy', dtype=np.float64)
gsi = gs.create_group('Info')
gsib = gsi.create_group('Basis')
gsib.create_dataset('14', dtype=np.float64)
gsi.create_dataset('Translations', data=np.array([[0, 0, 0]], dtype=np.int64))
gsi.create_dataset('k-points', data=np.array([[0., 0., 0.]]), dtype=np.float64)
gss = gs.create_group('Structure')
gss.create_dataset('atomic_numbers', data=atoms.numbers, dtype=np.int64)
# gss.create_dataset('lattice', data=atoms.cell, dtype=np.float64)
margins = atoms.positions.max(axis=0) - atoms.positions.min(axis=0)
cell = np.zeros((3, 3))
np.fill_diagonal(cell, margins+100)
gss.create_dataset('lattice', data=cell, dtype=np.float64)
gss.create_dataset('pbc', data=atoms.pbc, dtype=np.bool_)
gss.create_dataset('positions', data=atoms.positions, dtype=np.float64)
7 changes: 7 additions & 0 deletions examples/H2O/Codes/inspect_h5.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
using HDF5

db_path_list = ["../Data/Datasets/smpl10_md_w$(i).h5" for i in [6, 12, 21]]

db_path_list = ["./Data/Datasets/smpl10_md_w$(i).h5" for i in [6, 12, 21]]

file = h5open(db_path_list[1])
119 changes: 119 additions & 0 deletions examples/H2O/Codes/read_data_ORCA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from ase.db import connect
import numpy as np
from ase.db import connect
import h5py
import math
import numpy as np
import os


basis_definition = dict(H=("2s","1p"), C=("3s", "2p", "1d"), O=("3s", "2p", "1d"))

def split_orbitals(symbols:list, basis_definition:dict)->list:
orbitals = [j for i in symbols for j in basis_definition[i]]
orbitals = [i[-1] for i in orbitals for _ in range(int(i[:-1]))]
return orbitals

def reorder_idx(orbitals:list)->list:
ct = -1
idx_list = []
# orb_orders = dict(s = [1], p = [2,3,1], d = [3,4,2,5,1], f = [4,5,3,6,2,7,1]) #ChatGPT
# orb_orders = dict(s = [1], p = [2,1,3], d = [4,2,1,3,5], f = [6,4,2,1,3,5,7]) #(0,-1,1)
# orb_orders = dict(s = [1], p = [1,2,3], d = [1,2,3,4,5], f = [1,2,3,4,5,6,7])
orb_orders = dict(s = [1], p = [3,1,2], d = [5,3,1,2,4], f = [7,5,3,1,2,4,6]) #(0,+1,-1)
# orb_orders = dict(s = [1], p = [1,3,2], d = [4,2,1,3,5], f = [6,4,2,1,3,5,7])
# orb_orders = dict(s = [1], p = [3,2,1], d = [4,2,1,3,5], f = [6,4,2,1,3,5,7])

for orb in orbitals:
idx_list.extend([ct+i for i in orb_orders[orb]])
ct += len(orb_orders[orb])
return idx_list

def orca_to_fhiaims_ordering(symbols: list, basis_definition:dict, matrix:np.ndarray)->np.ndarray:
orbitals = split_orbitals(symbols, basis_definition)
index_map = reorder_idx(orbitals)
reordered_matrix = matrix[np.ix_(index_map, index_map)]
return reordered_matrix

def prefactor_corection(symbols: list, basis_definition:dict)->np.ndarray:
orbitals = split_orbitals(symbols, basis_definition)
vec = []
for orbital in orbitals:
if orbital == 's':
vec.append(1)
elif orbital == 'p':
vec.extend([1, 1, -1])
elif orbital == 'd':
vec.extend([1, 1, 1, -1, 1])
elif orbital == 'f':
vec.extend([1, 1, 1, 1, -1, 1, -1])
vec = np.array(vec)
map_mat = np.matmul(vec.reshape(-1, 1), vec.reshape(1, -1))
return map_mat


def write_dataset_h5(db_path:str="./Data/dbs/schnorb_hamiltonian_water.db", outp_direc:str='./Data/Datasets'):

db = connect(db_path)
rows = db.select()

Atoms_list = []
H_data_list = []
S_data_list = []
energy_data_list = []
force_data_list = []
for row in rows:
Atoms_list.append(row.toatoms())
atoms = row.toatoms()
symbols = atoms.get_chemical_symbols()
map_mat = prefactor_corection(symbols, basis_definition)
H_data_list.append(orca_to_fhiaims_ordering(symbols, basis_definition, row.data['hamiltonian'])*map_mat)
S_data_list.append(orca_to_fhiaims_ordering(symbols, basis_definition, row.data['overlap'])*map_mat)
# H_data_list.append(orca_to_fhiaims_ordering(symbols, basis_definition, row.data['hamiltonian']))
# S_data_list.append(orca_to_fhiaims_ordering(symbols, basis_definition, row.data['overlap']))
energy_data_list.append(row.data['energy'])
force_data_list.append(row.data['forces'])

n_geometries = len(Atoms_list)
length_number = int(math.log10(n_geometries))+1

file_name = os.path.basename(db_path).rstrip('.db')
output_path = os.path.join(outp_direc, file_name+'.h5')

with h5py.File(output_path, "w") as f:
for i, (atoms, H, S, energy, force) in enumerate(zip(Atoms_list, H_data_list, S_data_list, energy_data_list, force_data_list)):
gs = f.create_group(f'{i:0{length_number}}')
gsd = gs.create_group('Data')
gsdd = gsd.create_group('DoS')
gsdd.create_dataset('broadening', dtype=np.float64)
gsdd.create_dataset('energies', dtype=np.float64)
gsdd.create_dataset('values', dtype=np.float64)
gsd.create_dataset('H', data=np.expand_dims(H, axis=-1).T, dtype=np.float64)
gsd.create_dataset('H_gama', data=H.T)
gsd.create_dataset('S', data=np.expand_dims(S, axis=-1).T, dtype=np.float64)
gsd.create_dataset('S_gama', data=S.T)
# gsd.create_dataset('dm', data=np.expand_dims(dm, axis=-1).T, dtype=np.float64)
# gsd.create_dataset('dm_gama', data=dm.T)
gsd.create_dataset('fermi_level', dtype=np.float64)
gsd.create_dataset('forces', data=force.T, dtype=np.float64)
gsd.create_dataset('total_energy', data=energy, dtype=np.float64)
gsi = gs.create_group('Info')
gsib = gsi.create_group('Basis')
gsib.create_dataset('14', dtype=np.float64)
gsi.create_dataset('Translations', data=np.array([[0, 0, 0]], dtype=np.int64))
gsi.create_dataset('k-points', data=np.array([[0., 0., 0.]]), dtype=np.float64)
gss = gs.create_group('Structure')
gss.create_dataset('atomic_numbers', data=atoms.numbers, dtype=np.int64)
# gss.create_dataset('lattice', data=atoms.cell, dtype=np.float64)
margins = atoms.positions.max(axis=0) - atoms.positions.min(axis=0)
cell = np.zeros((3, 3))
np.fill_diagonal(cell, margins+100)
gss.create_dataset('lattice', data=cell, dtype=np.float64)
gss.create_dataset('pbc', data=atoms.pbc, dtype=np.bool_)
gss.create_dataset('positions', data=atoms.positions, dtype=np.float64)


db_path_list = [f'./Data/dbs/schnorb_hamiltonian_{i}.db' for i in ["water", "ethanol_dft"]]

for db_path in db_path_list:
write_dataset_h5(db_path)
65 changes: 65 additions & 0 deletions examples/H2O/Codes/read_data_dimer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from sparseutils import from_coo_tuple
from ase.db import connect
import h5py
import math
import numpy as np
import os

def write_dataset_h5(db_path:str='../Data/dbs/dyn-wd-300K_3.db', outp_direc:str='../Data/Datasets'):

db = connect(db_path)
rows = db.select()

Atoms_list = []
H_data_list = []
S_data_list = []
dm_data_list = []
for row in rows:
Atoms_list.append(row.toatoms())
H_data_list.append(from_coo_tuple(row.data['H']))
S_data_list.append(from_coo_tuple(row.data['S']))
dm_data_list.append(from_coo_tuple(row.data['dm']))

n_geometries = len(Atoms_list)
length_number = int(math.log10(n_geometries))+1

file_name = os.path.basename(db_path).rstrip('.db')
output_path = os.path.join(outp_direc, file_name+'.h5')

with h5py.File(output_path, "w") as f:
for i, (atoms, H, S, dm) in enumerate(zip(Atoms_list, H_data_list, S_data_list, dm_data_list)):
gs = f.create_group(f'{i:0{length_number}}')
gsd = gs.create_group('Data')
gsdd = gsd.create_group('DoS')
gsdd.create_dataset('broadening', dtype=np.float64)
gsdd.create_dataset('energies', dtype=np.float64)
gsdd.create_dataset('values', dtype=np.float64)
gsd.create_dataset('H', data=np.expand_dims(H, axis=-1).T, dtype=np.float64)
gsd.create_dataset('H_gama', data=H.T)
gsd.create_dataset('S', data=np.expand_dims(S, axis=-1).T, dtype=np.float64)
gsd.create_dataset('S_gama', data=S.T)
gsd.create_dataset('dm', data=np.expand_dims(dm, axis=-1).T, dtype=np.float64)
gsd.create_dataset('dm_gama', data=dm.T)
gsd.create_dataset('fermi_level', dtype=np.float64)
gsd.create_dataset('forces', dtype=np.float64)
gsd.create_dataset('total_energy', dtype=np.float64)
gsi = gs.create_group('Info')
gsib = gsi.create_group('Basis')
gsib.create_dataset('14', dtype=np.float64)
gsi.create_dataset('Translations', data=np.array([[0, 0, 0]], dtype=np.int64))
gsi.create_dataset('k-points', data=np.array([[0., 0., 0.]]), dtype=np.float64)
gss = gs.create_group('Structure')
gss.create_dataset('atomic_numbers', data=atoms.numbers, dtype=np.int64)
# gss.create_dataset('lattice', data=atoms.cell, dtype=np.float64)
margins = atoms.positions.max(axis=0) - atoms.positions.min(axis=0)
cell = np.zeros((3, 3))
np.fill_diagonal(cell, margins+100)
gss.create_dataset('lattice', data=cell, dtype=np.float64)
gss.create_dataset('pbc', data=atoms.pbc, dtype=np.bool_)
gss.create_dataset('positions', data=atoms.positions, dtype=np.float64)


db_path_list = ["../Data/dbs/dyn-wd-300K_3.db", "../Data/dbs/dyn-wd-500K_3.db"]

for db_path in db_path_list:
write_dataset_h5(db_path)
61 changes: 61 additions & 0 deletions examples/H2O/Codes/read_data_single.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from sparseutils import from_coo_tuple
from ase.db import connect
import h5py
import math
import numpy as np
import os

def write_dataset_h5(db_path:str='../Data/dbs/dyn-wd-300K_3.db', outp_direc:str='../Data/Datasets'):

db = connect(db_path)
rows = db.select()

Atoms_list = []
H_data_list = []
S_data_list = []
for row in rows:
Atoms_list.append(row.toatoms())
H_data_list.append(from_coo_tuple(row.data['hamiltonian']))
S_data_list.append(from_coo_tuple(row.data['overlap']))

n_geometries = len(Atoms_list)
length_number = int(math.log10(n_geometries))+1

file_name = os.path.basename(db_path).rstrip('.db')
output_path = os.path.join(outp_direc, file_name+'.h5')

with h5py.File(output_path, "w") as f:
for i, (atoms, H, S) in enumerate(zip(Atoms_list, H_data_list, S_data_list)):
gs = f.create_group(f'{i:0{length_number}}')
gsd = gs.create_group('Data')
gsdd = gsd.create_group('DoS')
gsdd.create_dataset('broadening', dtype=np.float64)
gsdd.create_dataset('energies', dtype=np.float64)
gsdd.create_dataset('values', dtype=np.float64)
gsd.create_dataset('H', data=np.expand_dims(H, axis=-1).T, dtype=np.float64)
gsd.create_dataset('H_gama', data=H.T)
gsd.create_dataset('S', data=np.expand_dims(S, axis=-1).T, dtype=np.float64)
gsd.create_dataset('S_gama', data=S.T)
gsd.create_dataset('fermi_level', dtype=np.float64)
gsd.create_dataset('forces', dtype=np.float64)
gsd.create_dataset('total_energy', dtype=np.float64)
gsi = gs.create_group('Info')
gsib = gsi.create_group('Basis')
gsib.create_dataset('14', dtype=np.float64)
gsi.create_dataset('Translations', data=np.array([[0, 0, 0]], dtype=np.int64))
gsi.create_dataset('k-points', data=np.array([[0., 0., 0.]]), dtype=np.float64)
gss = gs.create_group('Structure')
gss.create_dataset('atomic_numbers', data=atoms.numbers, dtype=np.int64)
# gss.create_dataset('lattice', data=atoms.cell, dtype=np.float64)
margins = atoms.positions.max(axis=0) - atoms.positions.min(axis=0)
cell = np.zeros((3, 3))
np.fill_diagonal(cell, margins+100)
gss.create_dataset('lattice', data=cell, dtype=np.float64)
gss.create_dataset('pbc', data=atoms.pbc, dtype=np.bool_)
gss.create_dataset('positions', data=atoms.positions, dtype=np.float64)


db_path_list = ["/home/c/chenqian3/ACEhamiltonians/H2O_PASHA/H2O_Pasha/Data/dbs/H2O_H_aims.db"]

for db_path in db_path_list:
write_dataset_h5(db_path)
Loading
Loading