remove --eagle3 from optimum command and switch model

mzegla · mzegla · commit 83509a923ce8 · 2026-05-11T11:47:11.000+02:00
diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
@@ -463,7 +463,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name
                 additional_options = ""
                 if args["draft_eagle3_mode"]:
                     print("Using eagle3 option for the draft model export")
-                    additional_options += " --eagle3  --task text-generation-with-past"
+                    additional_options += " --task text-generation-with-past"
                 optimum_command = "optimum-cli export openvino --model {} --weight-format {} --trust-remote-code {} {}".format(draft_source_model, precision, additional_options, draft_llm_model_path)
                 if os.system(optimum_command):
                     raise ValueError("Failed to export llm model", source_model)
diff --git a/demos/continuous_batching/speculative_decoding/README.md b/demos/continuous_batching/speculative_decoding/README.md
@@ -40,7 +40,7 @@ mkdir models
 Run `export_model.py` script to download and quantize the model:
 
 ```console
-python export_model.py text_generation --source_model Qwen/Qwen3-8B --draft_source_model Tengyunw/qwen3_8b_eagle3 --draft_eagle3_mode --weight-format int4 --config_file_path models/config.json --model_repository_path models
+python export_model.py text_generation --source_model Qwen/Qwen3-8B --draft_source_model AngelSlim/Qwen3-8B_eagle3 --draft_eagle3_mode --weight-format int4 --config_file_path models/config.json --model_repository_path models
 ```
 
 Draft model inherits all scheduler properties from the main model.
@@ -52,6 +52,12 @@ models
 └── Qwen
     └── Qwen3-8B
         ├── added_tokens.json
+        ├── AngelSlim-Qwen3-8B_eagle3
+        │   ├── config.json
+        │   ├── generation_config.json
+        │   ├── openvino_config.json
+        │   ├── openvino_model.bin
+        │   └── openvino_model.xml
         ├── chat_template.jinja
         ├── config.json
         ├── generation_config.json
@@ -65,14 +71,10 @@ models
         ├── openvino_tokenizer.bin
         ├── openvino_tokenizer.xml
         ├── special_tokens_map.json
-        ├── Tengyunw-qwen3_8b_eagle3
-        │   ├── config.json
-        │   ├── generation_config.json
-        │   ├── openvino_model.bin
-        │   └── openvino_model.xml
         ├── tokenizer_config.json
         ├── tokenizer.json
         └── vocab.json
+
 ```
 
 ## Server Deployment
@@ -313,6 +315,8 @@ for chunk in stream:
 ```
 
 Output:
+
+```
 if len(numbers) <= 1:
   return numbers
 else: