Merge pull request #10 from BhallaLab/develop

dilawar · web-flow · commit 3d11d6f6e6df · 2018-10-18T14:36:04.000+05:30
Added option to multi_param_minimization to tweak model file in accordance with optimization results
diff --git a/Brent_minimization.py b/Brent_minimization.py
@@ -85,20 +85,30 @@ def enumerateFindSimFiles( location ):
         quit()
 
 class EvalFunc:
-    def __init__( self, objField, expts, weights, pool, modelFile ):
+    def __init__( self, objField, expts, weights, pool, modelFile, presettle = [] ):
         self.objField = objField
         self.expts = expts
         self.weights = weights
         self.pool = pool # pool of available CPUs
         self.modelFile = modelFile
+        self.presettle = presettle
 
     def doEval( self, x ):
         ret = []
         spl = self.objField.split( '.' )
         assert( len(spl) == 2 )
         obj, field = spl
+
+        settleDict = {}
+        if len( self.presettle ) == 3:
+            presettleTime = float( self.presettle[2] )
+            if presettleTime > 0:
+            #print("{}".format( self.presettle ) )
+                settleDict = findSim.innerMain( self.presettle[0], modelFile = self.presettle[1], hidePlot=True, silent=True, scaleParam=[obj,field,str(x)], settleTime = presettleTime )
+        #print( "Doing presettle, len = {}".format( len(settleDict) ) )
+
         for k in self.expts:
-            ret.append( self.pool.apply_async( findSim.innerMain, (k,), dict(modelFile = self.modelFile, hidePlot=True, silent=True, scaleParam=[obj,field,str(x)]), callback = reportReturn ) )
+            ret.append( self.pool.apply_async( findSim.innerMain, (k,), dict(modelFile = self.modelFile, hidePlot=True, silent=True, scaleParam=[obj,field,str(x)], settleDict=settleDict ), callback = reportReturn ) )
         score = [ i.get() for i in ret ]
         sumScore = sum([ s*w for s,w in zip(score, self.weights) if s>=0.0])
         sumWts = sum( [ w for s,w in zip(score, self.weights) if s>=0.0 ] )
@@ -111,8 +121,10 @@ def main():
     parser.add_argument( 'location', type = str, help='Required: Directory in which the scripts (in tsv format) are all located. OR: File in which each line is the filename of a scripts.tsv file, followed by weight to assign for that file.')
     parser.add_argument( '-n', '--numProcesses', type = int, help='Optional: Number of processes to spawn', default = 2 )
     parser.add_argument( '-m', '--model', type = str, help='Optional: Composite model definition file. First searched in directory "location", then in current directory.', default = "FindSim_compositeModel_1.g" )
-    parser.add_argument( '-p', '--parameter_sweep', nargs='*', default=[],  help='Does a parameter sweep in range 0.5-2x of each object.field pair.' )
-    parser.add_argument( '-f', '--file', type = str, help='Optional: File name for output of parameter sweep', default = "" )
+    parser.add_argument( '-p', '--parameter_optimize', nargs='*', default=[],  help='Does a parameter optimization for each specified object.field pair.' )
+    parser.add_argument( '-ps', '--presettle', nargs=3, default=[],  help='Arguments: tsv_file, model_file, settle_time. Obtains values of all concentrations after a specified settle-time, so that all calculations for the optimization runs can be initialized to this presettled value. The tsv_file is to specify which subset of the model_file to use. This option is most useful in costly multiscale models.' )
+    parser.add_argument( '-f', '--file', type = str, help='Optional: File name for output of parameter optimization', default = ""
+    )
     args = parser.parse_args()
     location = args.location
     if location[-1] != '/':
@@ -130,13 +142,14 @@ def main():
     pool = Pool( processes = args.numProcesses )
 
     results = {}
-    for i in args.parameter_sweep:
+    for i in args.parameter_optimize:
         print( "{}".format( i ) )
         spl = i.split( '.' )
         assert( len(spl) == 2 )
         obj, field = spl
-        ev = EvalFunc( i, fnames, weights, pool, modelFile )
-        results[i] = optimize.minimize_scalar( ev.doEval )
+        ev = EvalFunc( i, fnames, weights, pool, modelFile, args.presettle )
+        # Bounded method uses Bounded Brent method.
+        results[i] = optimize.minimize_scalar( ev.doEval, method = 'bounded', bounds = (0.0, 100.0) )
         print( "\n Finished optimizing for " + i)
     print( "\n---------------- Completed ----------------- " )
     dumpData = False
diff --git a/findSim.py b/findSim.py
@@ -33,7 +33,7 @@
 **********************************************************************/
 
 '''
-
+from __future__ import print_function
 import heapq
 import pylab
 import numpy as np
@@ -545,7 +545,10 @@ def _scaleOneParam( self, params ):
 
         obj = self.findObj( '/model', params[0] )
         scale = float( params[2] )
-        assert( scale >= 0.0 and scale <= 100.0 )
+        if not ( scale >= 0.0 and scale <= 100.0 ):
+            print( "Error: Scale {} out of range".format( scale ) )
+            assert( False )
+        #assert( scale >= 0.0 and scale <= 100.0 )
         if params[1] == 'Kd':
             if not obj.isA[ "ReacBase" ]:
                 raise SimError( "scaleParam: can only assign Kd to a Reac, was: '{}'".format( obj.className ) )
@@ -1435,11 +1438,19 @@ def main():
     parser.add_argument( '-hs', '--hide_subplots', action="store_true", help='Hide subplot output of simulation. By default the graphs include dotted lines to indicate individual quantities (e.g., states of a molecule) that are being summed to give a total response. This flag turns off just those dotted lines, while leaving the main plot intact.' )
     parser.add_argument( '-o', '--optimize_elec', action="store_true", help='Optimize electrical computation. By default the electrical computation runs for the entire duration of the simulation. With this flag the system turns off the electrical engine except during the times when electrical stimuli are being given. This can be *much* faster.' )
     parser.add_argument( '-s', '--scale_param', nargs=3, default=[],  help='Scale specified object.field by ratio.' )
+    parser.add_argument( '-settle_time', '--settle_time', type=float, default=0,  help='Run model for specified settle time and return dict of {path,conc}.' )
     args = parser.parse_args()
-    innerMain( args.script, modelFile = args.model, dumpFname = args.dump_subset, paramFname = args.param_file, hidePlot = args.hide_plot, hideSubplots = args.hide_subplots, optimizeElec = args.optimize_elec, scaleParam = args.scale_param )
+    innerMain( args.script, modelFile = args.model, dumpFname = args.dump_subset, paramFname = args.param_file, hidePlot = args.hide_plot, hideSubplots = args.hide_subplots, optimizeElec = args.optimize_elec, scaleParam = args.scale_param, settleTime = args.settle_time )
+
 
+def innerMain( script, modelFile = "model/synSynth7.g", dumpFname = "", paramFname = "", hidePlot = True, hideSubplots = False, optimizeElec=True, silent = False, scaleParam=[], settleTime = 0, settleDict = {} ):
+    ''' If *settleTime* > 0, then we need to return a dict of concs of
+    all variable pools in the chem model obtained after loading in model, 
+    applying all modifications, and running for specified settle time.\n
+    If the *settleDict* is not empty, then the system goes through and 
+    matches up pools to assign initial concentrations.
+    '''
 
-def innerMain( script, modelFile = "model/synSynth7.g", dumpFname = "", paramFname = "", hidePlot = True, hideSubplots = False, optimizeElec=True, silent = False, scaleParam=[] ):
     global pause
     solver = "gsl"  # Pick any of gsl, gssa, ee..
     modelWarning = ""
@@ -1511,6 +1522,33 @@ def innerMain( script, modelFile = "model/synSynth7.g", dumpFname = "", paramFna
             for i in range( 10, 20 ):
                 moose.setClock( i, 0.1 )
 
+        ##############################################################
+        # Here we handle presettling. First to generate, then to apply
+        # the dict of settled values.
+        if settleTime > 0:
+            t0 = time.time()
+            moose.reinit()
+            #print settleTime
+            moose.start( settleTime )
+            w = moose.wildcardFind( modelId.path + "/##[ISA=PoolBase]" )
+            ret = {}
+            for i in w:
+                if not i.isBuffered:
+                    ret[i.path] = i.n
+                    #print( "{}.nInit =   {:.3f}".format( i.path, i.n ))
+            #print "-------------------- settle done -------------------"
+            moose.delete( modelId )
+            if moose.exists( '/library' ):
+                moose.delete( '/library' )
+            #print( "Done settling in {:.2f} seconds".format( time.time()-t0))
+            print( "s", end = '' )
+            sys.stdout.flush()
+            return ret
+
+        for key, value in settleDict.items():
+            moose.element( key ).nInit = value
+        ##############################################################
+
         t0 = time.time()
         score = runit( expt, model,stims, readouts, modelId )
         elapsedTime = time.time() - t0
@@ -1522,13 +1560,17 @@ def innerMain( script, modelFile = "model/synSynth7.g", dumpFname = "", paramFna
                 
             pylab.show()
         moose.delete( modelId )
+        if moose.exists( '/library' ):
+            moose.delete( '/library' )
         return score
         
     except SimError as msg:
         if not silent:
             print( "Error: findSim failed for script {}: {}".format(script, msg ))
         if modelId:
             moose.delete( modelId )
+            if moose.exists( '/library' ):
+                moose.delete( '/library' )
         return -1.0
 # Run the 'main' if this script is executed standalone.
 if __name__ == '__main__':
diff --git a/multi_param_minimization.py b/multi_param_minimization.py
@@ -49,12 +49,12 @@
 import time
 import findSim
 from multiprocessing import Pool
+import moose
 
 scaleFactors = [0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1, 1.05, 1.1, 1.2, 1.4, 1.6, 1.8, 2.0]
 
 resultCount = 0
 numIterations = 0
-TOLERANCE = 0.001 # Don't want to go too tight on tolarance.
 
 def reportReturn( result ):
     global resultCount
@@ -94,6 +94,7 @@ def __init__( self, params, expts, weights, pool, modelFile ):
         self.weights = weights
         self.pool = pool # pool of available CPUs
         self.modelFile = modelFile
+        self.score = []
 
     def doEval( self, x ):
         ret = []
@@ -109,9 +110,9 @@ def doEval( self, x ):
 
         for k in self.expts:
             ret.append( self.pool.apply_async( findSim.innerMain, (k,), dict(modelFile = self.modelFile, hidePlot=True, silent=True, scaleParam=paramList), callback = reportReturn ) )
-        score = [ i.get() for i in ret ]
-        sumScore = sum([ s*w for s,w in zip(score, self.weights) if s>=0.0])
-        sumWts = sum( [ w for s,w in zip(score, self.weights) if s>=0.0 ] )
+        self.score = [ i.get() for i in ret ]
+        sumScore = sum([ s*w for s,w in zip(self.score, self.weights) if s>=0.0])
+        sumWts = sum( [ w for s,w in zip(self.score, self.weights) if s>=0.0 ] )
         return sumScore/sumWts
 
 def optCallback( x ):
@@ -122,14 +123,15 @@ def optCallback( x ):
 
 def main():
     t0 = time.time()
-    parser = argparse.ArgumentParser( description = 'Wrapper script to run a lot of FindSim evaluations in parallel.' )
-
+    parser = argparse.ArgumentParser( description = 'Script to run a multi-parameter optimization in which each function evaluation is the weighted mean of a set of FindSim evaluations. These evaluations may be run in parallel. The optimiser uses the BGFR method with bounds. Since we are doing relative scaling the bounds are between 0.03 and 30 for Kd, tau and Km, and between 0 and 30 for other parameters' )
     parser.add_argument( 'location', type = str, help='Required: Directory in which the scripts (in tsv format) are all located. OR: File in which each line is the filename of a scripts.tsv file, followed by weight to assign for that file.')
     parser.add_argument( '-n', '--numProcesses', type = int, help='Optional: Number of processes to spawn', default = 2 )
+    parser.add_argument( '-t', '--tolerance', type = float, help='Optional: Tolerance criterion for completion of minimization', default = 1e-4 )
     parser.add_argument( '-m', '--model', type = str, help='Optional: Composite model definition file. First searched in directory "location", then in current directory.', default = "FindSim_compositeModel_1.g" )
-    parser.add_argument( '-p', '--parameters', nargs='*', default=[],  help='Parameter to vary. Each is defined as an object.field pair. The object is defined as a unique MOOSE name, typically name or parent/name. The field is separated from the object by a period. The field may be concInit for molecules, Kf, Kb, Kd or tau for reactions, and Km or Kcat for enzymes. One can specify more than one parameter for a given reaction or enzyme. It is advisable to use Kd and tau for reactions unless you have a unidirectional reaction.' )
+    parser.add_argument( '-p', '--parameters', nargs='*', default=[],  help='Parameter to vary. Each is defined as an object.field pair. The object is defined as a unique MOOSE name, typically name or parent/name. The field is separated from the object by a period. The field may be concInit for molecules, Kf, Kb, Kd or tau for reactions, and Km or kcat for enzymes. One can specify more than one parameter for a given reaction or enzyme. It is advisable to use Kd and tau for reactions unless you have a unidirectional reaction.' )
     parser.add_argument( '-f', '--file', type = str, help='Optional: File name for output of parameter minimization', default = "" )
     args = parser.parse_args()
+
     location = args.location
     if location[-1] != '/':
         location += '/'
@@ -146,32 +148,92 @@ def main():
     pool = Pool( processes = args.numProcesses )
 
     params = []
+    bounds = []
     for i in args.parameters:
         print( "{}".format( i ) )
         spl = i.split( '.' )
         assert( len(spl) == 2 )
         params.append( i )
+        if spl[1] == 'Kd' or spl[1] == 'tau' or spl[1] == 'Km':
+            bounds.append( (0.03,30) )
+        else:
+            bounds.append( (0.0, 30 ) ) # Concs, Kfs and Kbs can be zero.
     ev = EvalFunc( params, fnames, weights, pool, modelFile )
-    results = optimize.minimize( ev.doEval, np.ones( len(params) ), tol = TOLERANCE, callback = optCallback )
+    # Generate the score for each expt for the initial condition
+    ev.doEval( [1.0]* len( params ) )
+    initScore = ev.score
+    # Do the minimization
+    results = optimize.minimize( ev.doEval, np.ones( len(params) ), method='L-BFGS-B', tol = args.tolerance, callback = optCallback, bounds = bounds )
     print( "\n----------- Completed in {:.3f} sec ---------- ".format(time.time() - t0 ) )
     print( "\n----- Score= {:.4f} ------ ".format(results.fun ) )
     dumpData = False
     fp = ""
     if len( args.file ) > 0:
         fp = open( args.file, "w" )
         dumpData = True
-    analyzeResults( fp, dumpData, results, params )
+    analyzeResults( fp, dumpData, results, params, ev, initScore )
     if dumpData:
         fp.close()
-
-def analyzeResults( fp, dumpData, results, params ):
+        dumpTweakedModelFile( args, params, results )
+
+def dumpTweakedModelFile( args, params, results ):
+    filename, file_extension = os.path.splitext( args.model )
+    resfname, res_ext = os.path.splitext( args.file )
+    if file_extension == ".xml":
+        modelId, errormsg = moose.mooseReadSBML( args.model, 'model', 'ee' )
+        tweakParams( params, results.x )
+        moose.mooseWriteSBML( modelId.path, resfname + "_tweaked.xml" )
+        moose.delete( modelId )
+    elif file_extension == ".g":
+        modelId = moose.loadModel( args.model, 'model', 'ee' )
+        tweakParams( params, results.x )
+        moose.mooseWriteKkit( modelId.path, resfname + "_tweaked.g" )
+        moose.delete( modelId )
+    else:
+        print( "Warning: dumpTweakedModelFile: Don't know file type for {}".format( args.model ) )
+
+def tweakParams( params, scaleFactors ):
+    for i, x in zip( params, scaleFactors ):
+        objname, field = i.split( '.' )
+        w = moose.wildcardFind( "/model/##/{0},/model/{0}".format(objname) )
+        if len(w) != 1:
+            print( "Error: tweakParams: Need precisely one object to match name '{}', got {}".format( objname, len(w) ) )
+            continue
+        obj = w[0]
+        if field == 'Kd':
+            obj.Kf /= np.sqrt( x )
+            obj.Kb *= np.sqrt( x )
+        elif field == 'tau':
+            obj.Kf /= x
+            obj.Kb /= x
+        else:
+            obj.setField( field, obj.getField( field ) * x )
+        #print( "Tweaked {}.{} by {}".format( obj.path, field, x ) )
+
+
+
+def analyzeResults( fp, dumpData, results, params, evalObj, initScore ):
     #assert( len(results.x) == len( results.fun ) )
     assert( len(results.x) == len( params ) )
+    out = []
     for p,x, in zip(params, results.x):
-        outputStr = "Parameter = {},\toptimized scale={:.3f}".format(p, x)
-        print( outputStr )
+        out.append( "Parameter = {:40s}scale = {:.3f}".format(p, x) )
+    out.append( "\n{:40s}{:>12s}{:>12s}{:>12s}".format( "File", "initScore", "finalScore", "weight" ) )
+    initSum = 0.0
+    finalSum = 0.0
+    numSum = 0.0
+    assert( len( evalObj.expts ) == len( initScore ) )
+    for e, i, f, w in zip( evalObj.expts, initScore, evalObj.score, evalObj.weights ):
+        out.append( "{:40s}{:12.3f}{:12.3f}{:12.3f}".format( e, i, f, w ) )
+        if i >= 0:
+            initSum += i * w
+            finalSum += f * w
+            numSum += w
+    out.append( "\nInit score = {:.4f}, final = {:.4f}".format(initSum/numSum, finalSum / numSum) )
+    for i in out:
+        print( i )
         if dumpData:
-            fp.write( outputStr + '\n' )
+            fp.write( i + '\n' )
         
 # Run the 'main' if this script is executed standalone.
 if __name__ == '__main__':