docling-serve/docs/deploy-examples/docling-serve-simple.yaml at main · WiktorSobanski/docling-serve · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# This example deployment configures Docling Serve with a Service and cuda image
---
apiVersion: v1
kind: Service
metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
spec:
  ports:
  - name: http
    port: 5001
    targetPort: http
  selector:
    app: docling-serve
    component: docling-serve-api
---
kind: Deployment
apiVersion: apps/v1
metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
spec:
  replicas: 1
  selector:
    matchLabels:
      app: docling-serve
      component: docling-serve-api
  template:
    metadata:
      labels:
        app: docling-serve
        component: docling-serve-api
    spec:
      restartPolicy: Always
      containers:
        - name: api
          resources:
            limits:
              cpu: 500m
              memory: 2Gi
              nvidia.com/gpu: 1  # Limit to one GPU
            requests:
              cpu: 250m
              memory: 1Gi
              nvidia.com/gpu: 1  # Limit to one GPU
          env:
            - name: DOCLING_SERVE_ENABLE_UI
              value: 'true'
          ports:
            - name: http
              containerPort: 5001
              protocol: TCP
          imagePullPolicy: Always
          image: 'ghcr.io/docling-project/docling-serve-cu124'