feat(pytorch2): bump deps, drop conda/xformers

gadicc · gadicc · commit a3d807896e2b · 2023-07-19T20:13:13.000+01:00
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,5 @@
-ARG FROM_IMAGE="gadicc/diffusers-api-base:python3.9-pytorch1.12.1-cuda11.6-xformers"
+ARG FROM_IMAGE="pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime"
+# ARG FROM_IMAGE="gadicc/diffusers-api-base:python3.9-pytorch1.12.1-cuda11.6-xformers"
 # You only need the -banana variant if you need banana's optimization
 # i.e. not relevant if you're using RUNTIME_DOWNLOADS
 # ARG FROM_IMAGE="gadicc/python3.9-pytorch1.12.1-cuda11.6-xformers-banana"
@@ -20,6 +21,7 @@ RUN if [ -n "$http_proxy" ] ; then \
 ARG REQUESTS_CA_BUNDLE=${http_proxy:+/usr/local/share/ca-certificates/squid-self-signed.crt}
 
 ARG DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -yqq git
 
 FROM base AS patchmatch
 ARG USE_PATCHMATCH=0
@@ -67,15 +69,15 @@ RUN if [ "$USE_DREAMBOOTH" = "1" ] ; then \
     # By specifying the same torch version as conda, it won't download again.
     # Without this, it will upgrade torch, break xformers, make bigger image.
     # bitsandbytes==0.40.0.post4 had failed cuda detection on dreambooth test.
-    pip install -r diffusers/examples/dreambooth/requirements.txt bitsandbytes==0.39.1 torch==1.12.1 ; \
+    pip install -r diffusers/examples/dreambooth/requirements.txt ; \
   fi
 RUN if [ "$USE_DREAMBOOTH" = "1" ] ; then apt-get install git-lfs ; fi
 
 ARG USE_REALESRGAN=1
-RUN if [ "$USE_REALESRGAN" = "1" ] ; then conda install -c pytorch torchvision ; fi
 RUN if [ "$USE_REALESRGAN" = "1" ] ; then apt-get install -y libgl1-mesa-glx libglib2.0-0 ; fi
 RUN if [ "$USE_REALESRGAN" = "1" ] ; then git clone https://github.com/xinntao/Real-ESRGAN.git ; fi
-RUN if [ "$USE_REALESRGAN" = "1" ] ; then pip install numba==0.57.1 chardet ; fi
+# RUN if [ "$USE_REALESRGAN" = "1" ] ; then pip install numba==0.57.1 chardet ; fi
+RUN if [ "$USE_REALESRGAN" = "1" ] ; then pip install basicsr==1.4.2 facexlib==0.2.5 gfpgan==1.3.8 ; fi
 RUN if [ "$USE_REALESRGAN" = "1" ] ; then cd Real-ESRGAN && python3 setup.py develop ; fi
 
 COPY api/ .
diff --git a/api/app.py b/api/app.py
@@ -120,7 +120,7 @@ def truncateInputs(inputs: dict):
     return clone
 
 
-last_xformers_memory_efficient_attention = {}
+# last_xformers_memory_efficient_attention = {}
 last_attn_procs = None
 last_lora_weights = None
 
@@ -132,7 +132,7 @@ async def inference(all_inputs: dict, response) -> dict:
     global pipelines
     global last_model_id
     global schedulers
-    global last_xformers_memory_efficient_attention
+    # global last_xformers_memory_efficient_attention
     global always_normalize_model_id
     global last_attn_procs
     global last_lora_weights
@@ -456,25 +456,25 @@ def sendStatus():
         model_inputs["mask_image"] = PIL.Image.fromarray(mask)
 
     # Turning on takes 3ms and turning off 1ms... don't worry, I've got your back :)
-    x_m_e_a = call_inputs.get("xformers_memory_efficient_attention", True)
-    last_x_m_e_a = last_xformers_memory_efficient_attention.get(pipeline, None)
-    if x_m_e_a != last_x_m_e_a:
-        if x_m_e_a == True:
-            print("pipeline.enable_xformers_memory_efficient_attention()")
-            pipeline.enable_xformers_memory_efficient_attention()  # default on
-        elif x_m_e_a == False:
-            print("pipeline.disable_xformers_memory_efficient_attention()")
-            pipeline.disable_xformers_memory_efficient_attention()
-        else:
-            return {
-                "$error": {
-                    "code": "INVALID_XFORMERS_MEMORY_EFFICIENT_ATTENTION_VALUE",
-                    "message": f"x_m_e_a expects True or False, not: {x_m_e_a}",
-                    "requested": x_m_e_a,
-                    "available": [True, False],
-                }
-            }
-        last_xformers_memory_efficient_attention.update({pipeline: x_m_e_a})
+    # x_m_e_a = call_inputs.get("xformers_memory_efficient_attention", True)
+    # last_x_m_e_a = last_xformers_memory_efficient_attention.get(pipeline, None)
+    # if x_m_e_a != last_x_m_e_a:
+    #     if x_m_e_a == True:
+    #         print("pipeline.enable_xformers_memory_efficient_attention()")
+    #         pipeline.enable_xformers_memory_efficient_attention()  # default on
+    #     elif x_m_e_a == False:
+    #         print("pipeline.disable_xformers_memory_efficient_attention()")
+    #         pipeline.disable_xformers_memory_efficient_attention()
+    #     else:
+    #         return {
+    #             "$error": {
+    #                 "code": "INVALID_XFORMERS_MEMORY_EFFICIENT_ATTENTION_VALUE",
+    #                 "message": f"x_m_e_a expects True or False, not: {x_m_e_a}",
+    #                 "requested": x_m_e_a,
+    #                 "available": [True, False],
+    #             }
+    #         }
+    #     last_xformers_memory_efficient_attention.update({pipeline: x_m_e_a})
 
     # Run the model
     # with autocast(device_id):
diff --git a/api/train_dreambooth.py b/api/train_dreambooth.py
@@ -159,7 +159,7 @@ def TrainDreamBooth(model_id: str, pipeline, model_inputs, call_inputs, send_opt
         # 1.10.and an Nvidia Ampere GPU.  Default to  fp16 if a GPU is available else fp32.
         "prior_generation_precision": None,  # "no", "fp32", "fp16", "bf16"
         "local_rank": -1,
-        "enable_xformers_memory_efficient_attention": True,  # DDA, was: None
+        "enable_xformers_memory_efficient_attention": None,
         # Save more memory by using setting grads to None instead of zero. Be aware, that this changes certain
         # behaviors, so disable this argument if it causes any problems. More info:
         # https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html
diff --git a/requirements.txt b/requirements.txt
@@ -25,22 +25,23 @@ scikit-image==0.19.3
 accelerate==0.20.3
 
 # triton==2.0.0.dev20221105
-triton==2.0.0.dev20221202
+# triton==2.0.0.dev20221202 until 2023-07-19
+triton==2.0.0.post1 # released 2023-03-17
 
 ftfy==6.1.1
 
 # spacy==3.4.3
 spacy==3.5.0
 
-k-diffusion==0.0.12
+# k-diffusion==0.0.12 until 2023-07-19
+k-diffusion==0.0.15
 
 # safetensors==0.2.8 until 2023-06-14
 safetensors==0.3.1
 
-# must match conda installed version in Dockerfile
-torch==1.12.1
-# xformers 0.0.15.dev344+git.8910bb5 requires pyre-extensions==0.0.23
-pyre-extensions==0.0.23
+# torch==1.12.1 until 2023-07-19
+torch==2.0.1
+torchvision==0.15.2
 
 # boto3==1.26.12
 boto3==1.26.57
@@ -58,10 +59,4 @@ tensorboard==2.12.0
 
 xtarfile[zstd]==0.1.0
 
-# runtime_downloads upsampling; TODO, make it a build option
-basicsr==1.4.2
-facexlib==0.2.5
-gfpgan==1.3.8
-# torch and torchvision must match
-# https://github.com/pytorch/vision#installation
-#torchvision==0.13.1 # do it from conda
+bitsandbytes==0.40.2 # released 2023-07-17
diff --git a/tests/integration/test_build_download.py b/tests/integration/test_build_download.py
@@ -16,7 +16,8 @@ def test_cloudcache_build_download():
         "MODEL_REVISION": "fp16",
         "MODEL_URL": "s3://",  # <--
     }
-    conda = "conda run --no-capture-output -n xformers"
+    # conda = "conda run --no-capture-output -n xformers"
+    conda = ""
     dda = getDDA(
         minio=minio,
         stream_logs=True,
@@ -59,7 +60,8 @@ def test_huggingface_build_download():
         "MODEL_PRECISION": "fp16",
         "MODEL_REVISION": "fp16",
     }
-    conda = "conda run --no-capture-output -n xformers"
+    # conda = "conda run --no-capture-output -n xformers"
+    conda = ""
     dda = getDDA(
         stream_logs=True,
         environment=environment,
@@ -103,7 +105,8 @@ def test_checkpoint_url_build_download():
         "MODEL_REVISION": "fp16",
         "CHECKPOINT_URL": "https://huggingface.co/hakurei/waifu-diffusion-v1-3/resolve/main/wd-v1-3-float16.ckpt",
     }
-    conda = "conda run --no-capture-output -n xformers"
+    # conda = "conda run --no-capture-output -n xformers"
+    conda = ""
     dda = getDDA(
         stream_logs=True,
         environment=environment,