Skip to content

Commit 04ad28d

Browse files
committed
Merge remote-tracking branch 'public/0.2.X' into 0.2.X_merge_public
Should check python/vasp/vasp/io/incar.py merge Conflicts: python/vasp/vasp/io/incar.py src/casm/app/enum.cc src/casm/app/format.cc
2 parents ff4a3c7 + 8676429 commit 04ad28d

7 files changed

Lines changed: 126 additions & 46 deletions

File tree

INSTALL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
**C++11**
77

8-
CASM must be compiled with a compiler that supports the C++11 standard.
8+
CASM must be compiled with a compiler that supports the C++11 standard. Testing is done with gcc-4.8.5 and clang-800.0.42.1.
99

1010
**If using Mac OS X - Xcode command-line tools**
1111

python/casm/casm/learn/evolve.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import deap.tools
99
import deap.algorithms
1010
from operator import attrgetter
11+
from deap.tools import HallOfFame
1112

1213

1314
def initNRandomOn(container, n_features, n_features_init):
@@ -204,6 +205,7 @@ def initialize_population(n_population, toolbox, filename=None, verbose=True):
204205
"""
205206
if filename is not None and os.path.exists(filename):
206207
load initial population
208+
# may be List[Individual] or HallOfFame, which is converted to List[Individual]
207209
else:
208210
create random initial population of size n_population via toolbox.population
209211
"""
@@ -214,6 +216,9 @@ def initialize_population(n_population, toolbox, filename=None, verbose=True):
214216
print "Loading initial population:", filename
215217
with open(filename, 'rb') as f:
216218
pop = pickle.load(f)
219+
if isinstance(pop, HallOfFame):
220+
# convert to List
221+
pop = [indiv for indiv in pop]
217222
else:
218223
if verbose:
219224
print "Constructing initial population"

python/casm/casm/learn/fit.py

Lines changed: 76 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -475,12 +475,30 @@ def print_input_help():
475475
# The "problem_specs"/"weight" options specify the method to use for weighting
476476
# training data.
477477
#
478-
# If weights are included, then the linear model is changed from
479-
# X*b = y -> L*X*b = L*y,
478+
# Ordinary least squares minimizes
479+
# (y-X*b).transpose() * (y-X*b)
480+
#
481+
# where 'X' is the correlation matrix of shape (Nvalue, Nbfunc), and 'y'
482+
# is a vector of Nvalue calculated properties, and 'b' are the fitting
483+
# coefficients (ECI).
480484
#
481-
# where 'X' is the correlation matrix of shape (Nvalue, Nbfunc),
482-
# and 'property' is a vector of Nvalue calculated properties, and
483-
# W = L*L.transpose() is the weight matrix.
485+
# Weighted least squares minimizes
486+
# (y-X*b).transpose() * W * (y-X*b)
487+
#
488+
# Using the SVD, and given that W is Hermitian:
489+
# U * S * U.transpose() == W
490+
#
491+
# Define L such that:
492+
# L.transpose() = U * sqrt(S)
493+
#
494+
# Then we can write the weighted least squares problem using:
495+
# (y-X*b).transpose() * L.transpose() * L * (y-X*b)
496+
#
497+
# Or:
498+
# (L*y-L*X*b).transpose() * (L*y-L*X*b)
499+
#
500+
# So, if weights are included, then the linear model is changed from
501+
# X*b = y -> L*X*b = L*y
484502
#
485503
# By default, W = np.matlib.eye(Nvalue) (unweighted).
486504
#
@@ -776,8 +794,8 @@ def print_input_help():
776794
# Options for "evolve_params_kwargs":
777795
#
778796
# "n_population": int, optional, default=100
779-
# Population size. This many random initial starting individuals are
780-
# created.
797+
# Initial population size. This many random initial starting individuals
798+
# are created if no "pop_begin_filename" file exists.
781799
#
782800
# "n_halloffame": int, optional, default=25
783801
# Maxsize of the hall of fame which holds the best individuals
@@ -798,19 +816,32 @@ def print_input_help():
798816
# with.
799817
#
800818
# "pop_begin_filename": string, optional, default="population_begin.pkl"
801-
# Filename where the initial population is read from, if it exists.
802-
#
819+
# Filename suffix where the initial population is read from, if it
820+
# exists. For example, if "filename_prefix" is "Ef_kfold10" and
821+
# "pop_begin_filename" is "population_begin.pkl", then the initial
822+
# population is read from the file "Ef_kfold10_population_begin.pkl".
823+
#
824+
# The population file may contain either a list of individual,
825+
# as written to the "population_end.pkl" file, or a HallOfFame
826+
# instance, as written to either an "evolve_halloffame.pkl" file or
827+
# overall casm-learn "halloffame.pkl" file.
828+
#
803829
# "pop_end_filename": string, optional, default="population_end.pkl"
804-
# Filename where the final population is saved.
830+
# Filename where the final population is saved. For example, if
831+
# "filename_prefix" is "Ef_kfold10" and "pop_end_filename" is
832+
# "population_end.pkl", then the final population is saved to the
833+
# file "Ef_kfold10_population_end.pkl".
805834
#
806835
# "halloffame_filename": string, optional, default="evolve_halloffame.pkl"
807836
# Filename where a hall of fame is saved holding the best individuals
808-
# encountered in any generation.
837+
# encountered in any generation. For example, if "filename_prefix" is
838+
# "Ef_kfold10" and "halloffame_filename" is "evolve_halloffame.pkl",
839+
# then it is saved to the file "Ef_kfold10_evolve_halloffame.pkl".
809840
#
810841
# "filename_prefix": string, optional
811842
# Prefix for filenames, default uses input file filename excluding
812843
# extension. For example, if input file is named "Ef_kfold10.json", then
813-
# "Ef_kfold10_population_begin.pkl", "specs2_population_end.pkl", and
844+
# "Ef_kfold10_population_begin.pkl", "Ef_kfold10_population_end.pkl", and
814845
# "Ef_kfold10_evolve_halloffame.pkl" are used.
815846
816847
"feature_selection" : {
@@ -919,6 +950,10 @@ def print_input_help():
919950
# "uncalculated" : Predicted ground states and near ground states that have not been calculated
920951
# "below_hull" : All configurations predicted below the prediction of the DFT hull
921952
#
953+
# primitive_only: bool, optional, default=True
954+
# If True, only use primitive configurations to construct the convex hull,
955+
# else use all selected configurations.
956+
#
922957
# uncalculated_range: number, optional, default=0.0
923958
# Include all configurations with clex_hull_dist less than this value (+hull_tol)
924959
# in the "uncalculated" configurations results. Default only includes predicted
@@ -945,6 +980,7 @@ def print_input_help():
945980
"checkhull" : {
946981
"selection": "ALL",
947982
"write_results": true,
983+
"primitive_only": true,
948984
"uncalculated_range": 1e-8,
949985
"ranged_rms": [0.001, 0.005, 0.01, 0.05, 0.1, 0.5],
950986
"composition": "atom_frac",
@@ -1410,11 +1446,11 @@ def read_sample_weight(input, tdata, verbose=True):
14101446
# use method to get weights
14111447
if specs["weight"]["method"] == "wCustom":
14121448
if verbose:
1413-
print "Reading custom weights"
1449+
print "# Reading custom weights"
14141450
sample_weight = tdata.data["weight"].values
14151451
elif specs["weight"]["method"] == "wCustom2d":
14161452
if verbose:
1417-
print "Reading custom2d weights"
1453+
print "# Reading custom2d weights"
14181454
cols = ["weight(" + str(i) + ")" for i in xrange(tdata.n_samples)]
14191455
sample_weight = tdata.data.loc[:,cols].values
14201456
elif specs["weight"]["method"] == "wHullDist":
@@ -1539,7 +1575,9 @@ def check_input(name):
15391575
pickle.dump(fdata, open(fit_data_filename, 'wb'))
15401576

15411577
if verbose:
1542-
print "# Writing problem specs to:", fit_data_filename, "\n"
1578+
print "# Writing problem specs to:", fit_data_filename
1579+
print "# To inspect or customize the problem specs further, use the '--checkspecs' method\n"
1580+
15431581

15441582
# during runtime only, if LinearRegression and LeaveOneOut, update fdata.cv and fdata.scoring
15451583
# to use optimized LOOCV score method
@@ -1813,12 +1851,16 @@ def checkhull(input, hall, indices=None, verbose=True):
18131851
"gs_spurious" : Predicted ground states that are not DFT ground states
18141852
"uncalculated" : Predicted ground states and near ground states that have not been calculated
18151853
"below_hull" : All configurations predicted below the prediction of the DFT hull
1816-
1854+
1855+
primitive_only: bool, optional, default=True
1856+
If True, only use primitive configurations to construct the convex hull,
1857+
else use all selected configurations.
1858+
18171859
uncalculated_range: number, optional, default=0.0
18181860
Include all configurations with clex_hull_dist less than this value (+hull_tol)
18191861
in the "uncalculated" configurations. Default only includes predicted
18201862
ground states.
1821-
1863+
18221864
ranged_rms: List[number], optional, default=[0.001, 0.005, 0.01, 0.05, 0.1, 0.5]
18231865
Calculates the root-mean-square error for DFT calculated configurations
18241866
within a particular range (in eV/unitcell) of the DFT hull. The list
@@ -1843,6 +1885,7 @@ def checkhull(input, hall, indices=None, verbose=True):
18431885
"checkhull" : {
18441886
"selection": "ALL",
18451887
"write_results": True
1888+
"primitive_only": true,
18461889
"uncalculated_range": 0.0,
18471890
"ranged_rms": [0.001, 0.005, 0.01, 0.05, 0.1, 0.5],
18481891
"composition": "atom_frac",
@@ -1878,6 +1921,7 @@ def checkhull(input, hall, indices=None, verbose=True):
18781921
opt = {
18791922
"selection":"ALL",
18801923
"write_results":False,
1924+
"primitive_only":True,
18811925
"uncalculated_range":0.0,
18821926
"hull_tol":1e-8,
18831927
"composition":"atom_frac",
@@ -1905,6 +1949,7 @@ def checkhull(input, hall, indices=None, verbose=True):
19051949
uncalculated_range = d["uncalculated_range"]
19061950
dim_tol = d["dim_tol"]
19071951
bottom_tol = d["bottom_tol"]
1952+
primitive_only = d["primitive_only"]
19081953

19091954
# save current default clex
19101955
orig_clex = proj.settings.default_clex
@@ -1946,8 +1991,11 @@ def checkhull(input, hall, indices=None, verbose=True):
19461991
comp = "comp"
19471992
dft_Eform = "formation_energy"
19481993
clex_Eform = "clex(formation_energy)"
1949-
1950-
sel.query([comp, is_primitive, is_calculated, configname, dft_hull_dist_long, dft_Eform])
1994+
1995+
query_cols = [comp, is_calculated, configname, dft_hull_dist_long, dft_Eform]
1996+
if primitive_only:
1997+
query_cols.append(is_primitive)
1998+
sel.query(query_cols)
19511999

19522000

19532001
compcol = []
@@ -1979,7 +2027,10 @@ def checkhull(input, hall, indices=None, verbose=True):
19792027
indiv = hall[indiv_i]
19802028
write_eci(proj, indiv.eci, fit_details=casm.learn.to_json(indiv_i, indiv), clex=clex, verbose=verbose)
19812029

1982-
df = sel.data[sel.data.loc[:,is_primitive] == 1].sort_values(compcol)
2030+
if primitive_only:
2031+
df = sel.data[sel.data.loc[:,is_primitive] == 1].sort_values(compcol)
2032+
else:
2033+
df = sel.data.sort_values(compcol)
19832034
df_calc = df[df.loc[:,is_calculated] == 1].apply(pandas.to_numeric, errors='ignore')
19842035
dft_gs = df_calc[df_calc.loc[:,dft_hull_dist_long] < hull_tol]
19852036

@@ -1990,7 +2041,10 @@ def checkhull(input, hall, indices=None, verbose=True):
19902041
# query:
19912042
sel.query([clex_hull_dist_long, clex_Eform, clex_dft_hull_dist_long], force=True)
19922043

1993-
df = sel.data[sel.data.loc[:,is_primitive] == 1].sort_values(compcol)
2044+
if primitive_only:
2045+
df = sel.data[sel.data.loc[:,is_primitive] == 1].sort_values(compcol)
2046+
else:
2047+
df = sel.data.sort_values(compcol)
19942048
df.rename(
19952049
inplace=True,
19962050
columns={
@@ -2029,7 +2083,7 @@ def printer(attr, title):
20292083
if df.shape[0]:
20302084
if verbose:
20312085
print title + ":"
2032-
print df.drop(to_drop, axis=1).to_string(**kwargs)
2086+
print df.drop(to_drop, axis=1, errors='ignore').to_string(**kwargs)
20332087
if write_results:
20342088
print "write:", output_name, "\n"
20352089
else:

python/casm/casm/learn/tools.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,32 @@ def set_sample_weight(sample_weight, y=None, X=None):
7676
"""
7777
Calculate weighted data and weighted target values.
7878
79-
Uses sample weights to calculate
79+
Ordinary least squares minimizes
80+
(y-X*b).transpose() * (y-X*b)
8081
81-
L*X * b = L*y
82+
where 'X' is the correlation matrix of shape (Nvalue, Nbfunc), and 'y'
83+
is a vector of Nvalue calculated properties, and 'b' are the fitting
84+
coefficients (ECI).
85+
86+
Weighted least squares minimizes
87+
(y-X*b).transpose() * W * (y-X*b)
88+
89+
Using the SVD, and given that W is Hermitian:
90+
U * S * U.transpose() == W
91+
92+
Define L such that:
93+
L.transpose() = U * sqrt(S)
8294
83-
a weighted linear model where the weights are given by W = L * L.transpose().
95+
Then we can write the weighted least squares problem using:
96+
(y-X*b).transpose() * L.transpose() * L * (y-X*b)
8497
98+
Or:
99+
(L*y-L*X*b).transpose() * (L*y-L*X*b)
100+
101+
So, if weights are included, then the linear model is changed from
102+
X*b = y -> L*X*b = L*y
103+
104+
85105
Arguments
86106
---------
87107
@@ -142,7 +162,8 @@ def set_sample_weight(sample_weight, y=None, X=None):
142162
raise Exception("Error in set_sample_weight: sample_weight dimension > 2")
143163

144164
# weighted data
145-
L = np.linalg.cholesky(W)
165+
U, S, V = np.linalg.svd(W)
166+
L = U.dot(np.diag(np.sqrt(S))).transpose()
146167

147168
if X is not None:
148169
weighted_X = np.dot(L, X)

src/casm/app/format.cc

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -734,27 +734,27 @@ LCHARG = .FALSE.\n";
734734
args.log << "This JSON file contains specifications for generating the cluster\n" <<
735735
"basis functions. \n\n";
736736

737-
args.log << "'site_basis_functions' may specify a string, which can be either 'occupation' or \n"
738-
<< "'chebychev'. Otherwise, specifies a JSON object containing a composition vector or\n"
739-
<< "a JSON array containing multiple composition vectors. A single composition vector\n"
740-
<< "is formatted as, e.g.\n"
741-
<< " \"composition\" : [\"Au\" : 0.25, \"Cu\" : 0.75] \n"
742-
<< "The site basis functions will then be constructed as to be optimized for that composition.\n\n"
737+
std::cout << "'site_basis_functions' may specify a string, which can be either 'occupation' or \n"
738+
<< "'chebychev'. Otherwise, specifies a JSON object containing a composition vector or\n"
739+
<< "a JSON array containing multiple composition vectors. A single composition vector\n"
740+
<< "is formatted as, e.g.\n"
741+
<< " \"composition\" : {\"Au\" : 0.25, \"Cu\" : 0.75} \n"
742+
<< "The site basis functions will then be constructed as to be optimized for that composition.\n\n"
743743

744744
<< "To specify different compositions on multiple sublattices, an array can be used. \n"
745745
<< "As an example, the following specifies a different composition on sublattice 0 than\n"
746746
<< "on sublattices 1 and 3: \n\n"
747747

748-
<< " \"site_basis_functions\" : [\n"
749-
<< " {\n"
750-
<< " \"composition\" : [\"Ga\" : 0.3, \"In\" : 0.7],\n"
751-
<< " \"sublat_indices\" : [0]\n"
752-
<< " },\n"
753-
<< " {\n"
754-
<< " \"composition\" : [\"Ga\" : 1.0, \"In\" : 0.0],\n"
755-
<< " \"sublat_indices\" : [1,2]\n"
756-
<< " }\n"
757-
<< " ]\n\n"
748+
<< " \"site_basis_functions\" : [\n"
749+
<< " {\n"
750+
<< " \"composition\" : {\"Ga\" : 0.3, \"In\" : 0.7},\n"
751+
<< " \"sublat_indices\" : [0]\n"
752+
<< " },\n"
753+
<< " {\n"
754+
<< " \"composition\" : {\"Ga\" : 1.0, \"In\" : 0.0},\n"
755+
<< " \"sublat_indices\" : [1,2]\n"
756+
<< " }\n"
757+
<< " ]\n\n"
758758

759759
<< "Sublattices are specified in the same order as in prim.json. Sublattice compositions\n"
760760
<< "are not allowed to break the symmetry of the crystal. If equivalent sublattices are\n"

src/casm/monte_carlo/grand_canonical/GrandCanonical.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -423,8 +423,7 @@ namespace CASM {
423423
auto corr = correlations(config, _clexulator());
424424
double formation_energy = _eci() * corr.data();
425425
auto comp_x = primclex().composition_axes().param_composition(CASM::comp_n(config));
426-
double Ep = formation_energy - comp_x.dot(m_condition.param_chem_pot());
427-
return Ep / supercell().volume();
426+
return formation_energy - comp_x.dot(m_condition.param_chem_pot());
428427
}
429428

430429
/// \brief Calculate delta correlations for an event

src/casm/monte_carlo/grand_canonical/GrandCanonicalIO.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,8 @@ namespace CASM {
286286

287287
DataFormatter<ConstMonteCarloPtr> formatter;
288288

289-
formatter.push_back(ConstantValueFormatter<std::string, ConstMonteCarloPtr>("configname", configname));
289+
bool print_json = true;
290+
formatter.push_back(ConstantValueFormatter<std::string, ConstMonteCarloPtr>("configname", configname, print_json));
290291
formatter.push_back(MonteCarloTFormatter<GrandCanonical>());
291292
formatter.push_back(GrandCanonicalLTEFormatter(phi_LTE1));
292293
std::set<std::string> exclude;

0 commit comments

Comments
 (0)