llama-stack-examples/apps/01-chatbot/docker-compose.yml at main · The-AI-Alliance/llama-stack-examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
services:
  ollama:
    image: docker.io/ollama/ollama:latest
    ports:
      - ${OLLAMA_PORT:-7869}:${OLLAMA_INTERNAL_PORT:-11434}
    volumes:
      - .:/code
      - ./.ollama:/root/.ollama
    pull_policy: always
    tty: true
    restart: always
    environment:
      - OLLAMA_KEEP_ALIVE=${OLLAMA_KEEP_ALIVE:-24h}
      - OLLAMA_HOST=0.0.0.0
    entrypoint: ["sh", "-c"]
    command: ["ollama serve & sleep 10 && ollama pull ${INFERENCE_MODEL:-llama3.2:1b} && wait"]
    healthcheck:
      test: ["CMD", "sh", "-c", "ollama list | grep -q '${INFERENCE_MODEL:-llama3.2:1b}'"]
      interval: 30s
      timeout: 15s
      retries: 40
      start_period: 300s


  llama-stack:
    image: llamastack/distribution-ollama:latest
    ports:
      - ${LLAMA_STACK_PORT:-5001}:5000
    volumes:
      - ./.llama:/root/.llama
    pull_policy: always
    restart: always
    environment:
      - INFERENCE_MODEL=${INFERENCE_MODEL:-llama3.2:1b}
      - OLLAMA_URL=http://ollama:${OLLAMA_INTERNAL_PORT:-11434}
      - LLAMA_STACK_PORT=5000
    command: --port 5000 --template ollama
    depends_on:
      ollama:
        condition: service_healthy
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:5000/v1/models"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 30s

  llama-stack-playground:
    build:
      context: .
      dockerfile: Dockerfile.playground
    ports:
      - ${PLAYGROUND_PORT:-8501}:${PLAYGROUND_PORT:-8501}
    volumes:
      - ./.llama:/root/.llama
    restart: always
    environment:
      - LLAMA_STACK_ENDPOINT=http://llama-stack:5000
    depends_on:
      llama-stack:
        condition: service_healthy

  chainlit-app:
    build:
      context: .
      dockerfile: Dockerfile.demo_01
    ports:
      - ${CHAINLIT_PORT:-9090}:8000
    restart: always
    environment:
      - LLAMA_STACK_API_URL=http://llama-stack:5000
      - VECTOR_DB_ID=my_demo_vector_db
    depends_on:
      llama-stack:
        condition: service_healthy
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 120s