Skip to content

Commit ef51512

Browse files
authored
[DocSum] Aligned the output format (#1948)
1 parent d37062b commit ef51512

3 files changed

Lines changed: 45 additions & 0 deletions

File tree

DocSum/docker_compose/intel/cpu/xeon/compose.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ services:
1717
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
1818
LLM_MODEL_ID: ${LLM_MODEL_ID}
1919
VLLM_TORCH_PROFILER_DIR: "/mnt"
20+
VLLM_CPU_KVCACHE_SPACE: 40
2021
healthcheck:
2122
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
2223
interval: 10s

DocSum/docker_compose/intel/set_env.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
1414

1515
export LLM_ENDPOINT_PORT=8008
1616
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
17+
18+
export BLOCK_SIZE=128
19+
export MAX_NUM_SEQS=256
20+
export MAX_SEQ_LEN_TO_CAPTURE=2048
21+
export NUM_CARDS=1
1722
export MAX_INPUT_TOKENS=1024
1823
export MAX_TOTAL_TOKENS=2048
1924

DocSum/docsum.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import asyncio
55
import base64
6+
import json
67
import os
78
import subprocess
89
import uuid
@@ -142,11 +143,49 @@ def read_text_from_file(file, save_file_name):
142143
return file_content
143144

144145

146+
def align_generator(self, gen, **kwargs):
147+
# OpenAI response format
148+
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
149+
for line in gen:
150+
line = line.decode("utf-8")
151+
start = -1
152+
end = -1
153+
try:
154+
start = line.find("{")
155+
end = line.rfind("}") + 1
156+
if start == -1 or end <= start:
157+
# Handle cases where '{' or '}' are not found or are in the wrong order
158+
json_str = ""
159+
else:
160+
json_str = line[start:end]
161+
except Exception as e:
162+
print(f"Error finding JSON boundaries: {e}")
163+
json_str = ""
164+
165+
try:
166+
# sometimes yield empty chunk, do a fallback here
167+
json_data = json.loads(json_str)
168+
if "ops" in json_data and "op" in json_data["ops"][0]:
169+
if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
170+
yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
171+
else:
172+
pass
173+
elif (
174+
json_data["choices"][0]["finish_reason"] != "eos_token"
175+
and "content" in json_data["choices"][0]["delta"]
176+
):
177+
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
178+
except Exception as e:
179+
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
180+
yield "data: [DONE]\n\n"
181+
182+
145183
class DocSumService:
146184
def __init__(self, host="0.0.0.0", port=8000):
147185
self.host = host
148186
self.port = port
149187
ServiceOrchestrator.align_inputs = align_inputs
188+
ServiceOrchestrator.align_generator = align_generator
150189
self.megaservice = ServiceOrchestrator()
151190
self.megaservice_text_only = ServiceOrchestrator()
152191
self.endpoint = str(MegaServiceEndpoint.DOC_SUMMARY)

0 commit comments

Comments
 (0)