Skip to content

Commit 4fb52ec

Browse files
committed
Merge branch 'devel' of https://github.com/Quantum-Dynamics-Hub/libra-code into devel
2 parents a79a1b2 + dc34f16 commit 4fb52ec

7 files changed

Lines changed: 138 additions & 13 deletions

File tree

src/libra_py/packages/cp2k/methods.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import re
2424
import numpy as np
2525
import scipy.sparse as sp
26+
import scipy.linalg
2627
import time
2728
import glob
2829
from libra_py.workflows.nbra import step2_many_body
@@ -2126,6 +2127,36 @@ def compute_energies_coeffs(ks_mat, overlap):
21262127
eigenvectors = eigenvectors[:,sorted_indices].T
21272128

21282129

2130+
return eigenvalues[sorted_indices], eigenvectors
2131+
2132+
2133+
def compute_energies_coeffs_scipy(ks_mat, overlap):
2134+
"""
2135+
This function solves the general eigenvalue problem described above using a Cholesky decomposition
2136+
of the overlap matrix. The eigenvalues are sorted.
2137+
More information: https://doi.org/10.1016/j.cpc.2004.12.014
2138+
Args:
2139+
ks_mat (numpy array): The Kohn-Sham matrix
2140+
overlap (numpy array): The atomic orbital overlap matrix
2141+
Returns:
2142+
eigenvalues (numpy array): The energies (eigenvalues)
2143+
eigenvectors (numpy array): The MO coefficients
2144+
"""
2145+
# Cholesky decomposition of the overlap matrix
2146+
U = scipy.linalg.cholesky( overlap ).T
2147+
# One ca also use the following as well but it is computationally more demanding
2148+
# U = scipy.linalg.fractional_matrix_power(S, 0.5)
2149+
U_inv = scipy.linalg.inv( U )
2150+
UT_inv = scipy.linalg.inv( U.T )
2151+
#K_prime = scipy.linalg.multi_dot( [UT_inv, ks_mat, U_inv] )
2152+
K_prime = UT_inv @ ks_mat @ U_inv
2153+
eigenvalues, eigenvectors = scipy.linalg.eig( K_prime )
2154+
# Transform back the coefficients
2155+
eigenvectors = U_inv @ eigenvectors
2156+
sorted_indices = np.argsort(eigenvalues)
2157+
eigenvectors = eigenvectors[:,sorted_indices].T
2158+
2159+
21292160
return eigenvalues[sorted_indices], eigenvectors
21302161

21312162
def compute_density_matrix(eigenvectors, homo_index):

src/libra_py/packages/dftbplus/methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ def dftb_distribute( istep, fstep, nsteps_this_job, trajectory_xyz_file, dftb_in
685685
for step in range( nsteps_this_job ):
686686

687687
# extract the curr_step xyz coordianates from the trajectory file and write it to another xyz file
688-
CP2K_methods.read_trajectory_xyz_file( trajectory_xyz_file, curr_step )
688+
_, _ = CP2K_methods.read_trajectory_xyz_file( trajectory_xyz_file, curr_step )
689689
curr_step += 1
690690

691691
# Go back to the main directory

src/libra_py/packages/gaussian/methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def gaussian_distribute( istep, fstep, nsteps_this_job, trajectory_xyz_file, gau
289289
for step in range( nsteps_this_job ):
290290

291291
# Extract the coordinates and write them to a xyz file
292-
CP2K_methods.read_trajectory_xyz_file( trajectory_xyz_file, curr_step )
292+
_, _ = CP2K_methods.read_trajectory_xyz_file( trajectory_xyz_file, curr_step )
293293

294294
# Now, we need to edit the gaussian_input file by adding the
295295
# coordinates to the input file

src/libra_py/workflows/nbra/generate_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def make_input(prefix, input_template, software, trajectory_xyz_file, step):
4646
lines_input = f.readlines()
4747
f.close()
4848

49-
CP2K_methods.read_trajectory_xyz_file(trajectory_xyz_file, step)
49+
_, _ = CP2K_methods.read_trajectory_xyz_file(trajectory_xyz_file, step)
5050

5151
if software.lower()=="cp2k":
5252
f = open(F"input_{prefix}_{step}.inp", "w")

src/libra_py/workflows/nbra/ml_map.py

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
import matplotlib.pyplot as plt
3535
from sklearn.preprocessing import StandardScaler, MinMaxScaler
3636
from sklearn.kernel_ridge import KernelRidge
37-
from sklearn.metrics import mean_squared_error, accuracy_score, mean_absolute_error, r2_score
37+
from sklearn.cluster import KMeans
38+
from sklearn.metrics import mean_squared_error, accuracy_score, mean_absolute_error, r2_score, pairwise_distances
3839
from liblibra_core import *
3940
import libra_py.packages.cp2k.methods as CP2K_methods
4041
import libra_py.packages.dftbplus.methods as DFTB_methods
@@ -517,6 +518,76 @@ def find_indices_inputs(params):
517518
return list(indices)
518519

519520

521+
def read_trajectory_xyz_file(file_name: str, istep: int, fstep: int):
522+
"""
523+
"""
524+
f = open(file_name,'r')
525+
lines = f.readlines()
526+
f.close()
527+
# The number of atoms for each time step in the .xyz file of the trajectory.
528+
number_of_atoms = int(lines[0].split()[0])
529+
530+
# This is used to skip the first two lines for each time step.
531+
n = number_of_atoms+2
532+
533+
# Write the coordinates of the 'step'th time step into the file
534+
coords = []
535+
for step in range(istep, fstep+1):
536+
coord = []
537+
for i in range( n * step + 2, n * ( step + 1 ) ):
538+
tmp = lines[ i ].split()
539+
# print(tmp)
540+
x = float( tmp[1])
541+
y = float( tmp[2])
542+
z = float( tmp[3])
543+
coord.append([x,y,z])
544+
coords.append(coord)
545+
546+
coords = np.array(coords)
547+
labels = []
548+
for i in range(2, number_of_atoms+2):
549+
tmp = lines[i].split()
550+
labels.append( tmp[0])
551+
552+
return labels, coords
553+
554+
def rmsd(p1, p2):
555+
"""
556+
Calculate RMSD between two geometries
557+
"""
558+
return np.sqrt(np.mean((p1 - p2)**2))
559+
560+
561+
def find_kmeans_indices(trajectory_file, istep, fstep, ncluster=10, random_state=0):
562+
"""
563+
"""
564+
# Read the XYZ trajectory file
565+
t1 = time.time()
566+
labels, coords = read_trajectory_xyz_file(trajectory_file, istep, fstep)
567+
print('Finished reading trajectory file: ', time.time()-t1)
568+
# Vectorize the coordinates nparray
569+
flattened_coords = coords.reshape(coords.shape[0], -1)
570+
t1 = time.time()
571+
rmsd_matrix = pairwise_distances(flattened_coords, metric=rmsd)
572+
print('Finished computing the distance matrix with RMSD metric: ', time.time()-t1)
573+
# Do the K-means clustering
574+
t1 = time.time()
575+
kmeans = KMeans(n_clusters=ncluster, random_state=random_state).fit(rmsd_matrix)
576+
print(f'Finished clustering for ncluster={ncluster}: ', time.time()-t1)
577+
clusters = kmeans.labels_
578+
indices = []
579+
for cluster_id in range(ncluster):
580+
cluster_members = np.where(clusters == cluster_id)[0]
581+
# Select the first member of the cluster as representative
582+
indices.append(np.sort(cluster_members)[0])
583+
# Sort the indices
584+
indices = list(np.sort(indices))
585+
# Print the geometries indices
586+
print("Selected geometries indices are:", indices)
587+
588+
return indices
589+
590+
520591
def rebuild_matrix_from_partitions(params, partitions, output_shape):
521592
"""
522593
This function is one of the most important here. It will
@@ -598,24 +669,43 @@ def compute_properties(params, models, input_scalers, output_scalers):
598669
for i, step in enumerate(indices):
599670
print("======================== \n Performing calculations for step ", step)
600671
print("*** Generating guess Hamiltonian for step ", step)
672+
tt = time.time()
601673
generate_data.gen_data(data_gen_params, step)
674+
print('data generation time:', time.time()-tt, ' seconds')
602675
input_mat = np.load(f'{params["path_to_input_mats"]}/{params["prefix"]}_{params["input_property"]}_{step}.npy')
603676
if i==0:
604677
ref_mat_files = glob.glob(f'{params["path_to_output_mats"]}/{params["prefix"]}_ref_{params["output_property"]}_*.npy')
605678
#output_mat = np.load(f'{params["path_to_output_mats"]}/{params["prefix"]}_ref_{params["output_property"]}_{step}.npy')
606679
output_mat = np.load(ref_mat_files[0])
607680
params["input_partition"] = True
681+
tt = time.time()
608682
partitioned_input = partition_matrix(params, input_mat)
683+
print('input partitioning time:', time.time()-tt, ' seconds')
609684
# Now apply the models to each partition
685+
tt = time.time()
610686
outputs = []
611687
for j in range(len(input_scalers)):
612688
input_scaled = input_scalers[j].transform(np.array(partitioned_input[j]).reshape(1,-1))
613689
output_scaled = models[j].predict(input_scaled)#.reshape(1,-1))
614690
output = output_scalers[j].inverse_transform(output_scaled)
615691
outputs.append(output.reshape(output.shape[1]))
692+
print('scaling data time:', time.time()-tt, ' seconds')
693+
tt = time.time()
616694
ks_ham_mat = rebuild_matrix_from_partitions(params, outputs, output_mat.shape)
695+
print('rebuilding matrix from partitions time:', time.time()-tt, ' seconds')
696+
tt = time.time()
617697
atomic_overlap = compute_atomic_orbital_overlap_matrix(params, step)
698+
print('atomic orbital overlap calculation time:', time.time()-tt, ' seconds')
699+
tt = time.time()
700+
#os.environ['OMP_NUM_THREADS'] = '%d'%params['nprocs']
701+
#print(type(ks_ham_mat))
702+
#print(type(atomic_overlap))
703+
#np.save('k.npy', ks_ham_mat)
704+
#np.save('s.npy', atomic_overlap)
618705
eigenvalues, eigenvectors = CP2K_methods.compute_energies_coeffs(ks_ham_mat, atomic_overlap)
706+
#eigenvalues, eigenvectors = CP2K_methods.compute_energies_coeffs_scipy(ks_ham_mat, atomic_overlap)
707+
#os.environ['OMP_NUM_THREADS'] = '1'
708+
print('diagonalizing the KS Hamiltonian matrix time:', time.time()-tt, ' seconds')
619709
if params["do_error_analysis"]:
620710
if not os.path.exists("../error_data"):
621711
os.system(f"mkdir ../error_data")
@@ -625,6 +715,7 @@ def compute_properties(params, models, input_scalers, output_scalers):
625715
try:
626716
ks_ham_mat_ref = np.load(f'{params["path_to_output_mats"]}/{params["prefix"]}_ref_{params["output_property"]}_{step}.npy')
627717
eigenvalues_ref, eigenvectors_ref = CP2K_methods.compute_energies_coeffs(ks_ham_mat_ref, atomic_overlap)
718+
#eigenvalues_ref, eigenvectors_ref = CP2K_methods.compute_energies_coeffs_scipy(ks_ham_mat_ref, atomic_overlap)
628719
# We only save the eigenvalues but not the eigenvectors of the reference calculations
629720
# The first reason is because we want to plot them and then we'll do the error analysis of all
630721
# molecular orbitals. The second reason is that we compute the \epsilon_i=<\psi_{i_{ref}}|\psi_{i_{ml}}> for
@@ -635,6 +726,7 @@ def compute_properties(params, models, input_scalers, output_scalers):
635726
os.system(f"mkdir {params['path_to_save_ref_mos']}")
636727
if params["save_ref_eigenvalues"]:
637728
np.save(f"{params['path_to_save_ref_mos']}/E_ref_{step}.npy", eigenvalues_ref) # [lowest_orbital-1:highest_orbital])
729+
np.save(f"{params['path_to_save_ref_mos']}/E_ml_{step}.npy", eigenvalues) # [lowest_orbital-1:highest_orbital])
638730
if params["save_ref_eigenvectors"]:
639731
np.save(f"{params['path_to_save_ref_mos']}/mos_ref_{step}.npy", eigenvectors_ref) #[lowest_orbital-1:highest_orbital,:][:,lowest_orbital-1:highest_orbital])
640732
ml_ref_overlap = compute_mo_overlaps(params, eigenvectors_ref, eigenvectors, step, step) #[lowest_orbital-1:highest_orbital,:][:,lowest_orbital-1:highest_orbital]
@@ -713,10 +805,10 @@ def compute_properties(params, models, input_scalers, output_scalers):
713805
#if params["compute_total_energy"]:
714806
# we have to make a cp2k input file based on the reference input
715807
# and then run it. Then since the files are large we need to remove them
716-
os.system("rm *.log *.npy *.wfn* *.inp *.xyz")
808+
#os.system("rm *.log *.npy *.wfn* *.inp *.xyz")
717809
os.system("mkdir ../ml_total_energy")
718810
os.system("mv output*.out ../ml_total_energy/.")
719-
os.system("rm *.out")
811+
#os.system("rm *.out")
720812
#os.chdir("../")
721813
#os.system(f"rm -rf tmp_guess_ham_{params['job']}")
722814

src/libra_py/workflows/nbra/step2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ def run_cp2k_libint_step2(params):
395395

396396
# Now try to get parameters from the input
397397
critical_params = [ "cp2k_ot_input_template", "cp2k_diag_input_template", "trajectory_xyz_filename" ]
398-
default_params = {"res_dir": os.getcwd() + "/res", "all_logfiles": os.getcwd() + "/all_logfiles", "all_pdosfiles": os.getcwd() + "/all_pdosfiles", "all_images": os.getcwd() + "/all_images", "image_format": 'bmp', "istep": 0, "fstep": 2, "lowest_orbital": 1, "highest_orbital": 2, "is_spherical": True, "isXTB": False, "isUKS": False, "remove_molden": True, "nprocs": 2, "cp2k_exe": "cp2k.psmp", "mpi_executable": "mpirun", "cube_visualization": False, "vmd_input_template": "vmd.tcl", "states_to_plot": [1], "plot_phase_corrected": True, "vmd_exe": "vmd", "tachyon_exe": "tachyon_LINIXAMD64", "x_pixels": 1024, "y_pixels": 1024, "remove_cube": True, 'together_mode': False}
398+
default_params = {"res_dir": os.getcwd() + "/res", "all_logfiles": os.getcwd() + "/all_logfiles", "all_pdosfiles": os.getcwd() + "/all_pdosfiles", "all_images": os.getcwd() + "/all_images", "image_format": 'bmp', "istep": 0, "fstep": 2, "lowest_orbital": 1, "highest_orbital": 2, "is_spherical": True, "isXTB": False, "isUKS": False, "remove_molden": True, "nprocs": 2, "cp2k_exe": "cp2k.psmp", "mpi_executable": "mpirun", "cube_visualization": False, "vmd_input_template": "vmd.tcl", "states_to_plot": [1], "plot_phase_corrected": True, "vmd_exe": "vmd", "tachyon_exe": "tachyon_LINIXAMD64", "x_pixels": 1024, "y_pixels": 1024, "remove_cube": True, 'together_mode': False, 'restart_file': True}
399399
comn.check_input(params, default_params, critical_params)
400400

401401

src/libra_py/workflows/nbra/step3.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2545,11 +2545,12 @@ def run_step3_sd_nacs_libint(params):
25452545
# Since we have performed state-reordering we need to
25462546
# convert to scipy npz format now
25472547
t2 = time.time()
2548-
for i in range(len(St_sds_cmatrix)):
2548+
for i in range(len(St_sds_cmatrix)-1):
25492549
St_sds[i] = data_conv.MATRIX2scipynpz( St_sds_cmatrix[i].real() )
2550-
sd2ci = SD2CI[i]
2550+
sd2ci_prev = SD2CI[i]
2551+
sd2ci_curr = SD2CI[i+1]
25512552
# Compute the St_ci
2552-
St_ci = np.linalg.multi_dot([sd2ci.T, St_sds[i].todense().real, sd2ci])
2553+
St_ci = np.linalg.multi_dot([sd2ci_prev.T, St_sds[i].todense().real, sd2ci_curr])
25532554
St_cis.append(sp.csc_matrix(St_ci))
25542555

25552556
# Now we need to apply state-reordering to St_cis
@@ -2580,10 +2581,11 @@ def run_step3_sd_nacs_libint(params):
25802581
St_cis[i] = data_conv.MATRIX2scipynpz( St_cis_cmatrix[i].real() )
25812582

25822583
else:
2583-
for i in range(len(St_sds)):
2584-
sd2ci = SD2CI[i]
2584+
for i in range(len(St_sds)-1):
2585+
sd2ci_prev = SD2CI[i]
2586+
sd2ci_curr = SD2CI[i+1]
25852587
# Compute the St_ci
2586-
St_ci = np.linalg.multi_dot([sd2ci.T, St_sds[i].todense().real, sd2ci])
2588+
St_ci = np.linalg.multi_dot([sd2ci_prev.T, St_sds[i].todense().real, sd2ci_curr])
25872589
St_cis.append(sp.csc_matrix(St_ci))
25882590
sp.save_npz(F'{params["path_to_save_sd_Hvibs"]}/St_ci_{step+start_time}_re.npz', sp.csc_matrix(St_ci))
25892591

0 commit comments

Comments
 (0)