@@ -120,7 +120,7 @@ def truncateInputs(inputs: dict):
120120 return clone
121121
122122
123- last_xformers_memory_efficient_attention = {}
123+ # last_xformers_memory_efficient_attention = {}
124124last_attn_procs = None
125125last_lora_weights = None
126126
@@ -132,7 +132,7 @@ async def inference(all_inputs: dict, response) -> dict:
132132 global pipelines
133133 global last_model_id
134134 global schedulers
135- global last_xformers_memory_efficient_attention
135+ # global last_xformers_memory_efficient_attention
136136 global always_normalize_model_id
137137 global last_attn_procs
138138 global last_lora_weights
@@ -456,25 +456,25 @@ def sendStatus():
456456 model_inputs ["mask_image" ] = PIL .Image .fromarray (mask )
457457
458458 # Turning on takes 3ms and turning off 1ms... don't worry, I've got your back :)
459- x_m_e_a = call_inputs .get ("xformers_memory_efficient_attention" , True )
460- last_x_m_e_a = last_xformers_memory_efficient_attention .get (pipeline , None )
461- if x_m_e_a != last_x_m_e_a :
462- if x_m_e_a == True :
463- print ("pipeline.enable_xformers_memory_efficient_attention()" )
464- pipeline .enable_xformers_memory_efficient_attention () # default on
465- elif x_m_e_a == False :
466- print ("pipeline.disable_xformers_memory_efficient_attention()" )
467- pipeline .disable_xformers_memory_efficient_attention ()
468- else :
469- return {
470- "$error" : {
471- "code" : "INVALID_XFORMERS_MEMORY_EFFICIENT_ATTENTION_VALUE" ,
472- "message" : f"x_m_e_a expects True or False, not: { x_m_e_a } " ,
473- "requested" : x_m_e_a ,
474- "available" : [True , False ],
475- }
476- }
477- last_xformers_memory_efficient_attention .update ({pipeline : x_m_e_a })
459+ # x_m_e_a = call_inputs.get("xformers_memory_efficient_attention", True)
460+ # last_x_m_e_a = last_xformers_memory_efficient_attention.get(pipeline, None)
461+ # if x_m_e_a != last_x_m_e_a:
462+ # if x_m_e_a == True:
463+ # print("pipeline.enable_xformers_memory_efficient_attention()")
464+ # pipeline.enable_xformers_memory_efficient_attention() # default on
465+ # elif x_m_e_a == False:
466+ # print("pipeline.disable_xformers_memory_efficient_attention()")
467+ # pipeline.disable_xformers_memory_efficient_attention()
468+ # else:
469+ # return {
470+ # "$error": {
471+ # "code": "INVALID_XFORMERS_MEMORY_EFFICIENT_ATTENTION_VALUE",
472+ # "message": f"x_m_e_a expects True or False, not: {x_m_e_a}",
473+ # "requested": x_m_e_a,
474+ # "available": [True, False],
475+ # }
476+ # }
477+ # last_xformers_memory_efficient_attention.update({pipeline: x_m_e_a})
478478
479479 # Run the model
480480 # with autocast(device_id):
0 commit comments