-
Notifications
You must be signed in to change notification settings - Fork 1k
Expand file tree
/
Copy pathconfig.yaml
More file actions
73 lines (57 loc) · 2.66 KB
/
config.yaml
File metadata and controls
73 lines (57 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
vector_store:
# The configuration of the Vector Store connection.
name: milvus
# The name of vector store db. Can be pgvector or milvus.
# Type: str
# ENV Variable: APP_VECTORSTORE_NAME
url: "http://milvus:19530"
# The location of the VectorStore DB.
# Type: str
# ENV Variable: APP_VECTORSTORE_URL
llm:
# The configuration for the server hosting the Large Language models.
model_engine: "triton-trt-llm"
# The backend name hosting the model. Options currently supported are: triton-trt-llm, nv-ai-foundation
# Type: str
# ENV Variable: APP_LLM_MODELENGINE
server_url: "llm:8001"
# The location of the server hosting the large language model. Use this option when model engine is
# set to triton-trt-llm, ignore this option if model_engine is set to "nv-ai-foundation"
# Type: str
# ENV Variable: APP_LLM_SERVERURL
model_name: "ensemble"
# if model_engine is "triton-trt-llm" set this to "ensemble"
# if model_engine is "ai-plaground" options are "llama2_13b", "llama2_70b", "mistral_7b"
# The name of the hosted model.
# Type: str
# ENV Variable: APP_LLM_MODELNAME
text_splitter:
# The configuration for the Text Splitter.
chunk_size: 510
# Chunk size for text splitting.
# When using a token-based text splitter, this is the number of 'tokens per chunk'
# Type: int
chunk_overlap: 200
# Overlapping text length for splitting.
# Type: int
embeddings:
# The configuration embedding models.
model_name: intfloat/e5-large-v2
# The name embedding search model from huggingface or nv-ai-foundation.
# Type: str
dimensions: 1024
# The dimensions of the embedding search model from huggingface.
# Type: int
model_engine: huggingface
# The backend name hosting the model, huggingface and nv-ai-foundation are supported.
# Type: str
prompts:
# The configuration for the prompts used for response generation.
chat_template:
<s>[INST] <<SYS>>You are a helpful, respectful and honest assistant.Always answer as helpfully as possible, while being safe.Please ensure that your responses are positive in nature.<</SYS>>[/INST] {context_str} </s><s>[INST] {query_str} [/INST]
# The chat prompt template guides the model to generate responses for queries.
# Type: str
rag_template:
"<s>[INST] <<SYS>>Use the following context to answer the user's question. If you don't know the answer,just say that you don't know, don't try to make up an answer.<</SYS>><s>[INST] Context: {context_str} Question: {query_str} Only return the helpful answer below and nothing else. Helpful answer:[/INST]"
# The RAG prompt template instructs the model to generate responses for queries while utilizing knowledge base.
# Type: str