forked from urchade/GLiNER
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdocker-entrypoint.sh
More file actions
38 lines (30 loc) · 1021 Bytes
/
docker-entrypoint.sh
File metadata and controls
38 lines (30 loc) · 1021 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash
set -e
CMD="python -m gliner.serve"
CMD="$CMD --model ${GLINER_MODEL:-urchade/gliner_small-v2.1}"
CMD="$CMD --device ${GLINER_DEVICE:-cuda}"
CMD="$CMD --dtype ${GLINER_DTYPE:-bfloat16}"
CMD="$CMD --max-batch-size ${GLINER_MAX_BATCH_SIZE:-32}"
CMD="$CMD --batch-wait-timeout-ms ${GLINER_BATCH_WAIT_MS:-5}"
CMD="$CMD --target-memory-fraction ${GLINER_MEMORY_FRACTION:-0.8}"
if [ "${GLINER_NUM_REPLICAS:-1}" != "1" ]; then
CMD="$CMD --num-replicas ${GLINER_NUM_REPLICAS}"
fi
if [ "${GLINER_DISABLE_COMPILE}" = "true" ]; then
CMD="$CMD --no-compile"
fi
if [ "${GLINER_ENABLE_FLASHDEBERTA}" = "true" ]; then
CMD="$CMD --enable-flashdeberta"
fi
if [ "${GLINER_ENABLE_PACKING}" = "true" ]; then
CMD="$CMD --enable-sequence-packing"
fi
if [ -n "${GLINER_QUANTIZATION}" ]; then
CMD="$CMD --quantization ${GLINER_QUANTIZATION}"
fi
if [ -n "${GLINER_ROUTE_PREFIX}" ]; then
CMD="$CMD --route-prefix ${GLINER_ROUTE_PREFIX}"
fi
echo "Starting GLiNER Serve..."
echo "Command: $CMD"
exec $CMD