-
Notifications
You must be signed in to change notification settings - Fork 134
Expand file tree
/
Copy pathdocker-compose-gpu.yml
More file actions
68 lines (65 loc) · 1.62 KB
/
docker-compose-gpu.yml
File metadata and controls
68 lines (65 loc) · 1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
services:
orpheus-fastapi:
container_name: orpheus-fastapi
build:
context: .
dockerfile: Dockerfile.gpu
ports:
- "5005:5005"
env_file:
- .env
environment:
- ORPHEUS_API_URL=http://llama-cpp-server:5006/v1/completions
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
depends_on:
llama-cpp-server:
condition: service_started
llama-cpp-server:
image: ghcr.io/ggml-org/llama.cpp:server-cuda
ports:
- "5006:5006"
volumes:
- ./models:/models
env_file:
- .env
depends_on:
model-init:
condition: service_completed_successfully
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
command: >
-m /models/${ORPHEUS_MODEL_NAME}
--port 5006
--host 0.0.0.0
--n-gpu-layers 29
--ctx-size ${ORPHEUS_MAX_TOKENS}
--n-predict ${ORPHEUS_MAX_TOKENS}
--rope-scaling linear
model-init:
image: curlimages/curl:latest
user: ${UID}:${GID}
volumes:
- ./models:/app/models
working_dir: /app
command: >
sh -c '
if [ ! -f /app/models/${ORPHEUS_MODEL_NAME} ]; then
echo "Downloading model file..."
wget -P /app/models https://huggingface.co/lex-au/${ORPHEUS_MODEL_NAME}/resolve/main/${ORPHEUS_MODEL_NAME}
else
echo "Model file already exists"
fi'
restart: "no"