@@ -474,12 +474,30 @@ def print_input_help():
474474 # The "problem_specs"/"weight" options specify the method to use for weighting
475475 # training data.
476476 #
477- # If weights are included, then the linear model is changed from
478- # X*b = y -> L*X*b = L*y,
477+ # Ordinary least squares minimizes
478+ # (y-X*b).transpose() * (y-X*b)
479+ #
480+ # where 'X' is the correlation matrix of shape (Nvalue, Nbfunc), and 'y'
481+ # is a vector of Nvalue calculated properties, and 'b' are the fitting
482+ # coefficients (ECI).
479483 #
480- # where 'X' is the correlation matrix of shape (Nvalue, Nbfunc),
481- # and 'property' is a vector of Nvalue calculated properties, and
482- # W = L*L.transpose() is the weight matrix.
484+ # Weighted least squares minimizes
485+ # (y-X*b).transpose() * W * (y-X*b)
486+ #
487+ # Using the SVD, and given that W is Hermitian:
488+ # U * S * U.transpose() == W
489+ #
490+ # Define L such that:
491+ # L.transpose() = U * sqrt(S)
492+ #
493+ # Then we can write the weighted least squares problem using:
494+ # (y-X*b).transpose() * L.transpose() * L * (y-X*b)
495+ #
496+ # Or:
497+ # (L*y-L*X*b).transpose() * (L*y-L*X*b)
498+ #
499+ # So, if weights are included, then the linear model is changed from
500+ # X*b = y -> L*X*b = L*y
483501 #
484502 # By default, W = np.matlib.eye(Nvalue) (unweighted).
485503 #
@@ -797,19 +815,27 @@ def print_input_help():
797815 # with.
798816 #
799817 # "pop_begin_filename": string, optional, default="population_begin.pkl"
800- # Filename where the initial population is read from, if it exists.
801- #
818+ # Filename suffix where the initial population is read from, if it
819+ # exists. For example, if "filename_prefix" is "Ef_kfold10" and
820+ # "pop_begin_filename" is "population_begin.pkl", then the initial
821+ # population is read from the file "Ef_kfold10_population_begin.pkl".
822+ #
802823 # "pop_end_filename": string, optional, default="population_end.pkl"
803- # Filename where the final population is saved.
824+ # Filename where the final population is saved. For example, if
825+ # "filename_prefix" is "Ef_kfold10" and "pop_end_filename" is
826+ # "population_end.pkl", then the final population is saved to the
827+ # file "Ef_kfold10_population_end.pkl".
804828 #
805829 # "halloffame_filename": string, optional, default="evolve_halloffame.pkl"
806830 # Filename where a hall of fame is saved holding the best individuals
807- # encountered in any generation.
831+ # encountered in any generation. For example, if "filename_prefix" is
832+ # "Ef_kfold10" and "halloffame_filename" is "evolve_halloffame.pkl",
833+ # then it is saved to the file "Ef_kfold10_evolve_halloffame.pkl".
808834 #
809835 # "filename_prefix": string, optional
810836 # Prefix for filenames, default uses input file filename excluding
811837 # extension. For example, if input file is named "Ef_kfold10.json", then
812- # "Ef_kfold10_population_begin.pkl", "specs2_population_end .pkl", and
838+ # "Ef_kfold10_population_begin.pkl", "Ef_kfold10_population_end .pkl", and
813839 # "Ef_kfold10_evolve_halloffame.pkl" are used.
814840
815841 "feature_selection" : {
@@ -1414,11 +1440,11 @@ def read_sample_weight(input, tdata, verbose=True):
14141440 # use method to get weights
14151441 if specs ["weight" ]["method" ] == "wCustom" :
14161442 if verbose :
1417- print "Reading custom weights"
1443+ print "# Reading custom weights"
14181444 sample_weight = tdata .data ["weight" ].values
14191445 elif specs ["weight" ]["method" ] == "wCustom2d" :
14201446 if verbose :
1421- print "Reading custom2d weights"
1447+ print "# Reading custom2d weights"
14221448 cols = ["weight(" + str (i ) + ")" for i in xrange (tdata .n_samples )]
14231449 sample_weight = tdata .data .loc [:,cols ].values
14241450 elif specs ["weight" ]["method" ] == "wHullDist" :
@@ -1543,7 +1569,9 @@ def check_input(name):
15431569 pickle .dump (fdata , open (fit_data_filename , 'wb' ))
15441570
15451571 if verbose :
1546- print "# Writing problem specs to:" , fit_data_filename , "\n "
1572+ print "# Writing problem specs to:" , fit_data_filename
1573+ print "# To inspect or customize the problem specs further, use the '--checkspecs' method\n "
1574+
15471575
15481576 # during runtime only, if LinearRegression and LeaveOneOut, update fdata.cv and fdata.scoring
15491577 # to use optimized LOOCV score method
0 commit comments