@@ -475,12 +475,30 @@ def print_input_help():
475475 # The "problem_specs"/"weight" options specify the method to use for weighting
476476 # training data.
477477 #
478- # If weights are included, then the linear model is changed from
479- # X*b = y -> L*X*b = L*y,
478+ # Ordinary least squares minimizes
479+ # (y-X*b).transpose() * (y-X*b)
480+ #
481+ # where 'X' is the correlation matrix of shape (Nvalue, Nbfunc), and 'y'
482+ # is a vector of Nvalue calculated properties, and 'b' are the fitting
483+ # coefficients (ECI).
480484 #
481- # where 'X' is the correlation matrix of shape (Nvalue, Nbfunc),
482- # and 'property' is a vector of Nvalue calculated properties, and
483- # W = L*L.transpose() is the weight matrix.
485+ # Weighted least squares minimizes
486+ # (y-X*b).transpose() * W * (y-X*b)
487+ #
488+ # Using the SVD, and given that W is Hermitian:
489+ # U * S * U.transpose() == W
490+ #
491+ # Define L such that:
492+ # L.transpose() = U * sqrt(S)
493+ #
494+ # Then we can write the weighted least squares problem using:
495+ # (y-X*b).transpose() * L.transpose() * L * (y-X*b)
496+ #
497+ # Or:
498+ # (L*y-L*X*b).transpose() * (L*y-L*X*b)
499+ #
500+ # So, if weights are included, then the linear model is changed from
501+ # X*b = y -> L*X*b = L*y
484502 #
485503 # By default, W = np.matlib.eye(Nvalue) (unweighted).
486504 #
@@ -776,8 +794,8 @@ def print_input_help():
776794 # Options for "evolve_params_kwargs":
777795 #
778796 # "n_population": int, optional, default=100
779- # Population size. This many random initial starting individuals are
780- # created.
797+ # Initial population size. This many random initial starting individuals
798+ # are created if no "pop_begin_filename" file exists .
781799 #
782800 # "n_halloffame": int, optional, default=25
783801 # Maxsize of the hall of fame which holds the best individuals
@@ -798,19 +816,32 @@ def print_input_help():
798816 # with.
799817 #
800818 # "pop_begin_filename": string, optional, default="population_begin.pkl"
801- # Filename where the initial population is read from, if it exists.
802- #
819+ # Filename suffix where the initial population is read from, if it
820+ # exists. For example, if "filename_prefix" is "Ef_kfold10" and
821+ # "pop_begin_filename" is "population_begin.pkl", then the initial
822+ # population is read from the file "Ef_kfold10_population_begin.pkl".
823+ #
824+ # The population file may contain either a list of individual,
825+ # as written to the "population_end.pkl" file, or a HallOfFame
826+ # instance, as written to either an "evolve_halloffame.pkl" file or
827+ # overall casm-learn "halloffame.pkl" file.
828+ #
803829 # "pop_end_filename": string, optional, default="population_end.pkl"
804- # Filename where the final population is saved.
830+ # Filename where the final population is saved. For example, if
831+ # "filename_prefix" is "Ef_kfold10" and "pop_end_filename" is
832+ # "population_end.pkl", then the final population is saved to the
833+ # file "Ef_kfold10_population_end.pkl".
805834 #
806835 # "halloffame_filename": string, optional, default="evolve_halloffame.pkl"
807836 # Filename where a hall of fame is saved holding the best individuals
808- # encountered in any generation.
837+ # encountered in any generation. For example, if "filename_prefix" is
838+ # "Ef_kfold10" and "halloffame_filename" is "evolve_halloffame.pkl",
839+ # then it is saved to the file "Ef_kfold10_evolve_halloffame.pkl".
809840 #
810841 # "filename_prefix": string, optional
811842 # Prefix for filenames, default uses input file filename excluding
812843 # extension. For example, if input file is named "Ef_kfold10.json", then
813- # "Ef_kfold10_population_begin.pkl", "specs2_population_end .pkl", and
844+ # "Ef_kfold10_population_begin.pkl", "Ef_kfold10_population_end .pkl", and
814845 # "Ef_kfold10_evolve_halloffame.pkl" are used.
815846
816847 "feature_selection" : {
@@ -919,6 +950,10 @@ def print_input_help():
919950 # "uncalculated" : Predicted ground states and near ground states that have not been calculated
920951 # "below_hull" : All configurations predicted below the prediction of the DFT hull
921952 #
953+ # primitive_only: bool, optional, default=True
954+ # If True, only use primitive configurations to construct the convex hull,
955+ # else use all selected configurations.
956+ #
922957 # uncalculated_range: number, optional, default=0.0
923958 # Include all configurations with clex_hull_dist less than this value (+hull_tol)
924959 # in the "uncalculated" configurations results. Default only includes predicted
@@ -945,6 +980,7 @@ def print_input_help():
945980 "checkhull" : {
946981 "selection": "ALL",
947982 "write_results": true,
983+ "primitive_only": true,
948984 "uncalculated_range": 1e-8,
949985 "ranged_rms": [0.001, 0.005, 0.01, 0.05, 0.1, 0.5],
950986 "composition": "atom_frac",
@@ -1410,11 +1446,11 @@ def read_sample_weight(input, tdata, verbose=True):
14101446 # use method to get weights
14111447 if specs ["weight" ]["method" ] == "wCustom" :
14121448 if verbose :
1413- print "Reading custom weights"
1449+ print "# Reading custom weights"
14141450 sample_weight = tdata .data ["weight" ].values
14151451 elif specs ["weight" ]["method" ] == "wCustom2d" :
14161452 if verbose :
1417- print "Reading custom2d weights"
1453+ print "# Reading custom2d weights"
14181454 cols = ["weight(" + str (i ) + ")" for i in xrange (tdata .n_samples )]
14191455 sample_weight = tdata .data .loc [:,cols ].values
14201456 elif specs ["weight" ]["method" ] == "wHullDist" :
@@ -1539,7 +1575,9 @@ def check_input(name):
15391575 pickle .dump (fdata , open (fit_data_filename , 'wb' ))
15401576
15411577 if verbose :
1542- print "# Writing problem specs to:" , fit_data_filename , "\n "
1578+ print "# Writing problem specs to:" , fit_data_filename
1579+ print "# To inspect or customize the problem specs further, use the '--checkspecs' method\n "
1580+
15431581
15441582 # during runtime only, if LinearRegression and LeaveOneOut, update fdata.cv and fdata.scoring
15451583 # to use optimized LOOCV score method
@@ -1813,12 +1851,16 @@ def checkhull(input, hall, indices=None, verbose=True):
18131851 "gs_spurious" : Predicted ground states that are not DFT ground states
18141852 "uncalculated" : Predicted ground states and near ground states that have not been calculated
18151853 "below_hull" : All configurations predicted below the prediction of the DFT hull
1816-
1854+
1855+ primitive_only: bool, optional, default=True
1856+ If True, only use primitive configurations to construct the convex hull,
1857+ else use all selected configurations.
1858+
18171859 uncalculated_range: number, optional, default=0.0
18181860 Include all configurations with clex_hull_dist less than this value (+hull_tol)
18191861 in the "uncalculated" configurations. Default only includes predicted
18201862 ground states.
1821-
1863+
18221864 ranged_rms: List[number], optional, default=[0.001, 0.005, 0.01, 0.05, 0.1, 0.5]
18231865 Calculates the root-mean-square error for DFT calculated configurations
18241866 within a particular range (in eV/unitcell) of the DFT hull. The list
@@ -1843,6 +1885,7 @@ def checkhull(input, hall, indices=None, verbose=True):
18431885 "checkhull" : {
18441886 "selection": "ALL",
18451887 "write_results": True
1888+ "primitive_only": true,
18461889 "uncalculated_range": 0.0,
18471890 "ranged_rms": [0.001, 0.005, 0.01, 0.05, 0.1, 0.5],
18481891 "composition": "atom_frac",
@@ -1878,6 +1921,7 @@ def checkhull(input, hall, indices=None, verbose=True):
18781921 opt = {
18791922 "selection" :"ALL" ,
18801923 "write_results" :False ,
1924+ "primitive_only" :True ,
18811925 "uncalculated_range" :0.0 ,
18821926 "hull_tol" :1e-8 ,
18831927 "composition" :"atom_frac" ,
@@ -1905,6 +1949,7 @@ def checkhull(input, hall, indices=None, verbose=True):
19051949 uncalculated_range = d ["uncalculated_range" ]
19061950 dim_tol = d ["dim_tol" ]
19071951 bottom_tol = d ["bottom_tol" ]
1952+ primitive_only = d ["primitive_only" ]
19081953
19091954 # save current default clex
19101955 orig_clex = proj .settings .default_clex
@@ -1946,8 +1991,11 @@ def checkhull(input, hall, indices=None, verbose=True):
19461991 comp = "comp"
19471992 dft_Eform = "formation_energy"
19481993 clex_Eform = "clex(formation_energy)"
1949-
1950- sel .query ([comp , is_primitive , is_calculated , configname , dft_hull_dist_long , dft_Eform ])
1994+
1995+ query_cols = [comp , is_calculated , configname , dft_hull_dist_long , dft_Eform ]
1996+ if primitive_only :
1997+ query_cols .append (is_primitive )
1998+ sel .query (query_cols )
19511999
19522000
19532001 compcol = []
@@ -1979,7 +2027,10 @@ def checkhull(input, hall, indices=None, verbose=True):
19792027 indiv = hall [indiv_i ]
19802028 write_eci (proj , indiv .eci , fit_details = casm .learn .to_json (indiv_i , indiv ), clex = clex , verbose = verbose )
19812029
1982- df = sel .data [sel .data .loc [:,is_primitive ] == 1 ].sort_values (compcol )
2030+ if primitive_only :
2031+ df = sel .data [sel .data .loc [:,is_primitive ] == 1 ].sort_values (compcol )
2032+ else :
2033+ df = sel .data .sort_values (compcol )
19832034 df_calc = df [df .loc [:,is_calculated ] == 1 ].apply (pandas .to_numeric , errors = 'ignore' )
19842035 dft_gs = df_calc [df_calc .loc [:,dft_hull_dist_long ] < hull_tol ]
19852036
@@ -1990,7 +2041,10 @@ def checkhull(input, hall, indices=None, verbose=True):
19902041 # query:
19912042 sel .query ([clex_hull_dist_long , clex_Eform , clex_dft_hull_dist_long ], force = True )
19922043
1993- df = sel .data [sel .data .loc [:,is_primitive ] == 1 ].sort_values (compcol )
2044+ if primitive_only :
2045+ df = sel .data [sel .data .loc [:,is_primitive ] == 1 ].sort_values (compcol )
2046+ else :
2047+ df = sel .data .sort_values (compcol )
19942048 df .rename (
19952049 inplace = True ,
19962050 columns = {
@@ -2029,7 +2083,7 @@ def printer(attr, title):
20292083 if df .shape [0 ]:
20302084 if verbose :
20312085 print title + ":"
2032- print df .drop (to_drop , axis = 1 ).to_string (** kwargs )
2086+ print df .drop (to_drop , axis = 1 , errors = 'ignore' ).to_string (** kwargs )
20332087 if write_results :
20342088 print "write:" , output_name , "\n "
20352089 else :
0 commit comments