Skip to content

Commit 8676429

Browse files
authored
Merge pull request #47 from bpuchala/0.2.X-backlog
0.2.X backlog
2 parents 6c75978 + 9faa627 commit 8676429

8 files changed

Lines changed: 91 additions & 32 deletions

File tree

INSTALL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
**C++11**
77

8-
CASM must be compiled with a compiler that supports the C++11 standard.
8+
CASM must be compiled with a compiler that supports the C++11 standard. Testing is done with gcc-4.8.5 and clang-800.0.42.1.
99

1010
**If using Mac OS X - Xcode command-line tools**
1111

python/casm/casm/learn/evolve.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import deap.tools
99
import deap.algorithms
1010
from operator import attrgetter
11+
from deap.tools import HallOfFame
1112

1213

1314
def initNRandomOn(container, n_features, n_features_init):
@@ -204,6 +205,7 @@ def initialize_population(n_population, toolbox, filename=None, verbose=True):
204205
"""
205206
if filename is not None and os.path.exists(filename):
206207
load initial population
208+
# may be List[Individual] or HallOfFame, which is converted to List[Individual]
207209
else:
208210
create random initial population of size n_population via toolbox.population
209211
"""
@@ -214,6 +216,9 @@ def initialize_population(n_population, toolbox, filename=None, verbose=True):
214216
print "Loading initial population:", filename
215217
with open(filename, 'rb') as f:
216218
pop = pickle.load(f)
219+
if isinstance(pop, HallOfFame):
220+
# convert to List
221+
pop = [indiv for indiv in pop]
217222
else:
218223
if verbose:
219224
print "Constructing initial population"

python/casm/casm/learn/fit.py

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -474,12 +474,30 @@ def print_input_help():
474474
# The "problem_specs"/"weight" options specify the method to use for weighting
475475
# training data.
476476
#
477-
# If weights are included, then the linear model is changed from
478-
# X*b = y -> L*X*b = L*y,
477+
# Ordinary least squares minimizes
478+
# (y-X*b).transpose() * (y-X*b)
479+
#
480+
# where 'X' is the correlation matrix of shape (Nvalue, Nbfunc), and 'y'
481+
# is a vector of Nvalue calculated properties, and 'b' are the fitting
482+
# coefficients (ECI).
479483
#
480-
# where 'X' is the correlation matrix of shape (Nvalue, Nbfunc),
481-
# and 'property' is a vector of Nvalue calculated properties, and
482-
# W = L*L.transpose() is the weight matrix.
484+
# Weighted least squares minimizes
485+
# (y-X*b).transpose() * W * (y-X*b)
486+
#
487+
# Using the SVD, and given that W is Hermitian:
488+
# U * S * U.transpose() == W
489+
#
490+
# Define L such that:
491+
# L.transpose() = U * sqrt(S)
492+
#
493+
# Then we can write the weighted least squares problem using:
494+
# (y-X*b).transpose() * L.transpose() * L * (y-X*b)
495+
#
496+
# Or:
497+
# (L*y-L*X*b).transpose() * (L*y-L*X*b)
498+
#
499+
# So, if weights are included, then the linear model is changed from
500+
# X*b = y -> L*X*b = L*y
483501
#
484502
# By default, W = np.matlib.eye(Nvalue) (unweighted).
485503
#
@@ -775,8 +793,8 @@ def print_input_help():
775793
# Options for "evolve_params_kwargs":
776794
#
777795
# "n_population": int, optional, default=100
778-
# Population size. This many random initial starting individuals are
779-
# created.
796+
# Initial population size. This many random initial starting individuals
797+
# are created if no "pop_begin_filename" file exists.
780798
#
781799
# "n_halloffame": int, optional, default=25
782800
# Maxsize of the hall of fame which holds the best individuals
@@ -797,19 +815,32 @@ def print_input_help():
797815
# with.
798816
#
799817
# "pop_begin_filename": string, optional, default="population_begin.pkl"
800-
# Filename where the initial population is read from, if it exists.
801-
#
818+
# Filename suffix where the initial population is read from, if it
819+
# exists. For example, if "filename_prefix" is "Ef_kfold10" and
820+
# "pop_begin_filename" is "population_begin.pkl", then the initial
821+
# population is read from the file "Ef_kfold10_population_begin.pkl".
822+
#
823+
# The population file may contain either a list of individual,
824+
# as written to the "population_end.pkl" file, or a HallOfFame
825+
# instance, as written to either an "evolve_halloffame.pkl" file or
826+
# overall casm-learn "halloffame.pkl" file.
827+
#
802828
# "pop_end_filename": string, optional, default="population_end.pkl"
803-
# Filename where the final population is saved.
829+
# Filename where the final population is saved. For example, if
830+
# "filename_prefix" is "Ef_kfold10" and "pop_end_filename" is
831+
# "population_end.pkl", then the final population is saved to the
832+
# file "Ef_kfold10_population_end.pkl".
804833
#
805834
# "halloffame_filename": string, optional, default="evolve_halloffame.pkl"
806835
# Filename where a hall of fame is saved holding the best individuals
807-
# encountered in any generation.
836+
# encountered in any generation. For example, if "filename_prefix" is
837+
# "Ef_kfold10" and "halloffame_filename" is "evolve_halloffame.pkl",
838+
# then it is saved to the file "Ef_kfold10_evolve_halloffame.pkl".
808839
#
809840
# "filename_prefix": string, optional
810841
# Prefix for filenames, default uses input file filename excluding
811842
# extension. For example, if input file is named "Ef_kfold10.json", then
812-
# "Ef_kfold10_population_begin.pkl", "specs2_population_end.pkl", and
843+
# "Ef_kfold10_population_begin.pkl", "Ef_kfold10_population_end.pkl", and
813844
# "Ef_kfold10_evolve_halloffame.pkl" are used.
814845
815846
"feature_selection" : {
@@ -1414,11 +1445,11 @@ def read_sample_weight(input, tdata, verbose=True):
14141445
# use method to get weights
14151446
if specs["weight"]["method"] == "wCustom":
14161447
if verbose:
1417-
print "Reading custom weights"
1448+
print "# Reading custom weights"
14181449
sample_weight = tdata.data["weight"].values
14191450
elif specs["weight"]["method"] == "wCustom2d":
14201451
if verbose:
1421-
print "Reading custom2d weights"
1452+
print "# Reading custom2d weights"
14221453
cols = ["weight(" + str(i) + ")" for i in xrange(tdata.n_samples)]
14231454
sample_weight = tdata.data.loc[:,cols].values
14241455
elif specs["weight"]["method"] == "wHullDist":
@@ -1543,7 +1574,9 @@ def check_input(name):
15431574
pickle.dump(fdata, open(fit_data_filename, 'wb'))
15441575

15451576
if verbose:
1546-
print "# Writing problem specs to:", fit_data_filename, "\n"
1577+
print "# Writing problem specs to:", fit_data_filename
1578+
print "# To inspect or customize the problem specs further, use the '--checkspecs' method\n"
1579+
15471580

15481581
# during runtime only, if LinearRegression and LeaveOneOut, update fdata.cv and fdata.scoring
15491582
# to use optimized LOOCV score method

python/casm/casm/learn/tools.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,32 @@ def set_sample_weight(sample_weight, y=None, X=None):
7676
"""
7777
Calculate weighted data and weighted target values.
7878
79-
Uses sample weights to calculate
79+
Ordinary least squares minimizes
80+
(y-X*b).transpose() * (y-X*b)
8081
81-
L*X * b = L*y
82+
where 'X' is the correlation matrix of shape (Nvalue, Nbfunc), and 'y'
83+
is a vector of Nvalue calculated properties, and 'b' are the fitting
84+
coefficients (ECI).
85+
86+
Weighted least squares minimizes
87+
(y-X*b).transpose() * W * (y-X*b)
88+
89+
Using the SVD, and given that W is Hermitian:
90+
U * S * U.transpose() == W
91+
92+
Define L such that:
93+
L.transpose() = U * sqrt(S)
8294
83-
a weighted linear model where the weights are given by W = L * L.transpose().
95+
Then we can write the weighted least squares problem using:
96+
(y-X*b).transpose() * L.transpose() * L * (y-X*b)
8497
98+
Or:
99+
(L*y-L*X*b).transpose() * (L*y-L*X*b)
100+
101+
So, if weights are included, then the linear model is changed from
102+
X*b = y -> L*X*b = L*y
103+
104+
85105
Arguments
86106
---------
87107
@@ -142,7 +162,8 @@ def set_sample_weight(sample_weight, y=None, X=None):
142162
raise Exception("Error in set_sample_weight: sample_weight dimension > 2")
143163

144164
# weighted data
145-
L = np.linalg.cholesky(W)
165+
U, S, V = np.linalg.svd(W)
166+
L = U.dot(np.diag(np.sqrt(S))).transpose()
146167

147168
if X is not None:
148169
weighted_X = np.dot(L, X)

src/casm/app/enum.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,9 @@ namespace CASM {
144144
" volume from MINV to MAXV (units: number of primitive \n"
145145
" cells). \n\n"
146146

147-
" casm enum --configs --scellname NAME \n"
148-
" - To enumerate configurations for a particular \n"
149-
" supercell. \n\n"
147+
" casm enum --configs --scelnames NAME1 NAME2 \n"
148+
" - To enumerate configurations for one or more particular\n"
149+
" supercells. \n\n"
150150

151151
" casm enum --configs [...] --filter '... casm query commands...' \n"
152152
" - To perform restricted enumeration of configurations \n"
@@ -199,9 +199,9 @@ namespace CASM {
199199
std::cerr << "Error in 'casm enum'. If --supercells is given, --max must be given." << std::endl;
200200
return ERR_INVALID_ARG;
201201
}
202-
if(vm.count("configs") && (vm.count("max") + vm.count("all") != 1)) {
202+
if(vm.count("configs") && (vm.count("max") + vm.count("scelnames") + vm.count("all") != 1)) {
203203
std::cerr << "\n" << enum_opt.desc() << "\n" << std::endl;
204-
std::cerr << "Error in 'casm enum'. If --configs is given, exactly one of either --max or --all must be given." << std::endl;
204+
std::cerr << "Error in 'casm enum'. If --configs is given, exactly one of either --max, --scelnames, or --all must be given." << std::endl;
205205
return ERR_INVALID_ARG;
206206
}
207207
}
@@ -309,7 +309,7 @@ namespace CASM {
309309
}
310310
}
311311
}
312-
if(vm.count("scellname")) {
312+
if(vm.count("scelnames")) {
313313
Index j;
314314
std::cout << "Enumerate configurations for named supercells" << std::endl << std::endl;
315315
for(int i = 0; i < scellname_list.size(); i++) {

src/casm/app/format.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,7 @@ LCHARG = .FALSE.\n";
732732
<< "'chebychev'. Otherwise, specifies a JSON object containing a composition vector or\n"
733733
<< "a JSON array containing multiple composition vectors. A single composition vector\n"
734734
<< "is formatted as, e.g.\n"
735-
<< " \"composition\" : [\"Au\" : 0.25, \"Cu\" : 0.75] \n"
735+
<< " \"composition\" : {\"Au\" : 0.25, \"Cu\" : 0.75} \n"
736736
<< "The site basis functions will then be constructed as to be optimized for that composition.\n\n"
737737

738738
<< "To specify different compositions on multiple sublattices, an array can be used. \n"
@@ -741,11 +741,11 @@ LCHARG = .FALSE.\n";
741741

742742
<< " \"site_basis_functions\" : [\n"
743743
<< " {\n"
744-
<< " \"composition\" : [\"Ga\" : 0.3, \"In\" : 0.7],\n"
744+
<< " \"composition\" : {\"Ga\" : 0.3, \"In\" : 0.7},\n"
745745
<< " \"sublat_indices\" : [0]\n"
746746
<< " },\n"
747747
<< " {\n"
748-
<< " \"composition\" : [\"Ga\" : 1.0, \"In\" : 0.0],\n"
748+
<< " \"composition\" : {\"Ga\" : 1.0, \"In\" : 0.0},\n"
749749
<< " \"sublat_indices\" : [1,2]\n"
750750
<< " }\n"
751751
<< " ]\n\n"

src/casm/monte_carlo/grand_canonical/GrandCanonical.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -423,8 +423,7 @@ namespace CASM {
423423
auto corr = correlations(config, _clexulator());
424424
double formation_energy = _eci() * corr.data();
425425
auto comp_x = primclex().composition_axes().param_composition(CASM::comp_n(config));
426-
double Ep = formation_energy - comp_x.dot(m_condition.param_chem_pot());
427-
return Ep / supercell().volume();
426+
return formation_energy - comp_x.dot(m_condition.param_chem_pot());
428427
}
429428

430429
/// \brief Calculate delta correlations for an event

src/casm/monte_carlo/grand_canonical/GrandCanonicalIO.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,8 @@ namespace CASM {
286286

287287
DataFormatter<ConstMonteCarloPtr> formatter;
288288

289-
formatter.push_back(ConstantValueFormatter<std::string, ConstMonteCarloPtr>("configname", configname));
289+
bool print_json = true;
290+
formatter.push_back(ConstantValueFormatter<std::string, ConstMonteCarloPtr>("configname", configname, print_json));
290291
formatter.push_back(MonteCarloTFormatter<GrandCanonical>());
291292
formatter.push_back(GrandCanonicalLTEFormatter(phi_LTE1));
292293
std::set<std::string> exclude;

0 commit comments

Comments
 (0)