Skip to content

Commit 957fbe1

Browse files
gHashTagona-agent
andcommitted
fix: disable auto_stop, upgrade to performance-16x with 32GB RAM
- auto_stop_machines = off (model takes 3.5min to load) - min_machines_running = 1 - NUM_THREADS = 16 Co-authored-by: Ona <no-reply@ona.com>
1 parent 18ebbfb commit 957fbe1

1 file changed

Lines changed: 6 additions & 6 deletions

File tree

fly.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ primary_region = "iad"
1414
MODEL_PATH = "/app/models/smollm2-1.7b-instruct-q8_0.gguf"
1515
TEMPERATURE = "0.7"
1616
TOP_P = "0.9"
17-
NUM_THREADS = "4"
17+
NUM_THREADS = "16"
1818

1919
# SmolLM2-1.7B requires more RAM (~4GB for model + buffers)
2020
# performance-4x: 4 dedicated CPU cores, 8GB RAM
2121
[[vm]]
22-
size = "performance-4x"
23-
memory = "8gb"
24-
cpus = 4
22+
size = "performance-16x"
23+
memory = "32gb"
24+
cpus = 16
2525

2626
# Alternative sizes:
2727
# performance-8x: 8 CPU, 16GB RAM (faster, more expensive)
@@ -37,9 +37,9 @@ primary_region = "iad"
3737
[http_service]
3838
internal_port = 8080
3939
force_https = true
40-
auto_stop_machines = true
40+
auto_stop_machines = "off"
4141
auto_start_machines = true
42-
min_machines_running = 0
42+
min_machines_running = 1
4343

4444
[[http_service.checks]]
4545
grace_period = "180s" # SmolLM2-1.7B needs ~30-60s to load

0 commit comments

Comments
 (0)