@@ -474,12 +474,30 @@ def print_input_help():
474474 # The "problem_specs"/"weight" options specify the method to use for weighting
475475 # training data.
476476 #
477- # If weights are included, then the linear model is changed from
478- # X*b = y -> L*X*b = L*y,
477+ # Ordinary least squares minimizes
478+ # (y-X*b).transpose() * (y-X*b)
479+ #
480+ # where 'X' is the correlation matrix of shape (Nvalue, Nbfunc), and 'y'
481+ # is a vector of Nvalue calculated properties, and 'b' are the fitting
482+ # coefficients (ECI).
479483 #
480- # where 'X' is the correlation matrix of shape (Nvalue, Nbfunc),
481- # and 'property' is a vector of Nvalue calculated properties, and
482- # W = L*L.transpose() is the weight matrix.
484+ # Weighted least squares minimizes
485+ # (y-X*b).transpose() * W * (y-X*b)
486+ #
487+ # Using the SVD, and given that W is Hermitian:
488+ # U * S * U.transpose() == W
489+ #
490+ # Define L such that:
491+ # L.transpose() = U * sqrt(S)
492+ #
493+ # Then we can write the weighted least squares problem using:
494+ # (y-X*b).transpose() * L.transpose() * L * (y-X*b)
495+ #
496+ # Or:
497+ # (L*y-L*X*b).transpose() * (L*y-L*X*b)
498+ #
499+ # So, if weights are included, then the linear model is changed from
500+ # X*b = y -> L*X*b = L*y
483501 #
484502 # By default, W = np.matlib.eye(Nvalue) (unweighted).
485503 #
@@ -775,8 +793,8 @@ def print_input_help():
775793 # Options for "evolve_params_kwargs":
776794 #
777795 # "n_population": int, optional, default=100
778- # Population size. This many random initial starting individuals are
779- # created.
796+ # Initial population size. This many random initial starting individuals
797+ # are created if no "pop_begin_filename" file exists .
780798 #
781799 # "n_halloffame": int, optional, default=25
782800 # Maxsize of the hall of fame which holds the best individuals
@@ -797,19 +815,32 @@ def print_input_help():
797815 # with.
798816 #
799817 # "pop_begin_filename": string, optional, default="population_begin.pkl"
800- # Filename where the initial population is read from, if it exists.
801- #
818+ # Filename suffix where the initial population is read from, if it
819+ # exists. For example, if "filename_prefix" is "Ef_kfold10" and
820+ # "pop_begin_filename" is "population_begin.pkl", then the initial
821+ # population is read from the file "Ef_kfold10_population_begin.pkl".
822+ #
823+ # The population file may contain either a list of individual,
824+ # as written to the "population_end.pkl" file, or a HallOfFame
825+ # instance, as written to either an "evolve_halloffame.pkl" file or
826+ # overall casm-learn "halloffame.pkl" file.
827+ #
802828 # "pop_end_filename": string, optional, default="population_end.pkl"
803- # Filename where the final population is saved.
829+ # Filename where the final population is saved. For example, if
830+ # "filename_prefix" is "Ef_kfold10" and "pop_end_filename" is
831+ # "population_end.pkl", then the final population is saved to the
832+ # file "Ef_kfold10_population_end.pkl".
804833 #
805834 # "halloffame_filename": string, optional, default="evolve_halloffame.pkl"
806835 # Filename where a hall of fame is saved holding the best individuals
807- # encountered in any generation.
836+ # encountered in any generation. For example, if "filename_prefix" is
837+ # "Ef_kfold10" and "halloffame_filename" is "evolve_halloffame.pkl",
838+ # then it is saved to the file "Ef_kfold10_evolve_halloffame.pkl".
808839 #
809840 # "filename_prefix": string, optional
810841 # Prefix for filenames, default uses input file filename excluding
811842 # extension. For example, if input file is named "Ef_kfold10.json", then
812- # "Ef_kfold10_population_begin.pkl", "specs2_population_end .pkl", and
843+ # "Ef_kfold10_population_begin.pkl", "Ef_kfold10_population_end .pkl", and
813844 # "Ef_kfold10_evolve_halloffame.pkl" are used.
814845
815846 "feature_selection" : {
@@ -1414,11 +1445,11 @@ def read_sample_weight(input, tdata, verbose=True):
14141445 # use method to get weights
14151446 if specs ["weight" ]["method" ] == "wCustom" :
14161447 if verbose :
1417- print "Reading custom weights"
1448+ print "# Reading custom weights"
14181449 sample_weight = tdata .data ["weight" ].values
14191450 elif specs ["weight" ]["method" ] == "wCustom2d" :
14201451 if verbose :
1421- print "Reading custom2d weights"
1452+ print "# Reading custom2d weights"
14221453 cols = ["weight(" + str (i ) + ")" for i in xrange (tdata .n_samples )]
14231454 sample_weight = tdata .data .loc [:,cols ].values
14241455 elif specs ["weight" ]["method" ] == "wHullDist" :
@@ -1543,7 +1574,9 @@ def check_input(name):
15431574 pickle .dump (fdata , open (fit_data_filename , 'wb' ))
15441575
15451576 if verbose :
1546- print "# Writing problem specs to:" , fit_data_filename , "\n "
1577+ print "# Writing problem specs to:" , fit_data_filename
1578+ print "# To inspect or customize the problem specs further, use the '--checkspecs' method\n "
1579+
15471580
15481581 # during runtime only, if LinearRegression and LeaveOneOut, update fdata.cv and fdata.scoring
15491582 # to use optimized LOOCV score method
0 commit comments