Bump images for llama2 and update compute (#436)

margaretqian · web-flow · commit 7003793b15ad · 2023-09-18T19:39:17.000-07:00
image and compute
diff --git a/examples/inference-deployments/llama2/llama2_13b.yaml b/examples/inference-deployments/llama2/llama2_13b.yaml
@@ -5,9 +5,8 @@ command: |- # Note this command is a workaround until we build vllm into the inf
   pip uninstall torch -y
   pip install torch==2.0.1
 compute:
-  gpus: 1
-  instance: oci.vm.gpu.a10.1
-image: mosaicml/inference:0.1.37
-cluster: r7z15
+  gpus: 2
+  instance: oci.vm.gpu.a10.2
+image: mosaicml/inference:0.1.40
 default_model:
   model_type: llama2-13b
diff --git a/examples/inference-deployments/llama2/llama2_7b_chat.yaml b/examples/inference-deployments/llama2/llama2_7b_chat.yaml
@@ -7,7 +7,6 @@ command: |- # Note this command is a workaround until we build vllm into the inf
 compute:
   gpus: 1
   instance: oci.vm.gpu.a10.1
-image: mosaicml/inference:0.1.37
-cluster: r7z15
+image: mosaicml/inference:0.1.40
 default_model:
   model_type: llama2-7b-chat