GenerativeAIExamples/deploy/compose/config.yaml at v0.3.0 · NVIDIA/GenerativeAIExamples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
vector_store:
  # The configuration of the Vector Store connection.

  name: milvus
  # The name of vector store db. Can be pgvector or milvus.
  # Type: str
  # ENV Variable: APP_VECTORSTORE_NAME

  url: "http://milvus:19530"
  # The location of the VectorStore DB.
  # Type: str
  # ENV Variable: APP_VECTORSTORE_URL

llm:
  # The configuration for the server hosting the Large Language models.

  model_engine: "triton-trt-llm"
  # The backend name hosting the model. Options currently supported are: triton-trt-llm, nv-ai-foundation
  # Type: str
  # ENV Variable: APP_LLM_MODELENGINE

  server_url: "llm:8001"
  # The location of the server hosting the large language model. Use this option when model engine is
  # set to triton-trt-llm, ignore this option if model_engine is set to "nv-ai-foundation"
  # Type: str
  # ENV Variable: APP_LLM_SERVERURL

  model_name: "ensemble"
  # if model_engine is "triton-trt-llm" set this to "ensemble"
  # if model_engine is "ai-plaground" options are "llama2_13b", "llama2_70b", "mistral_7b"
  # The name of the hosted model.
  # Type: str
  # ENV Variable: APP_LLM_MODELNAME

text_splitter:
  # The configuration for the Text Splitter.

  chunk_size: 510
  # Chunk size for text splitting.
  # When using a token-based text splitter, this is the number of 'tokens per chunk'
  # Type: int

  chunk_overlap: 200
  # Overlapping text length for splitting.
  # Type: int

embeddings:
  # The configuration embedding models.

  model_name: intfloat/e5-large-v2
  # The name embedding search model from huggingface or nv-ai-foundation.
  # Type: str

  dimensions: 1024
  # The dimensions of the embedding search model from huggingface.
  # Type: int

  model_engine: huggingface
  # The backend name hosting the model, huggingface and nv-ai-foundation are supported.
  # Type: str

prompts:
  # The configuration for the prompts used for response generation.

  chat_template:
    <s>[INST] <<SYS>>You are a helpful, respectful and honest assistant.Always answer as helpfully as possible, while being safe.Please ensure that your responses are positive in nature.<</SYS>>[/INST] {context_str} </s><s>[INST] {query_str} [/INST]
  # The chat prompt template guides the model to generate responses for queries.
  # Type: str

  rag_template:
    "<s>[INST] <<SYS>>Use the following context to answer the user's question. If you don't know the answer,just say that you don't know, don't try to make up an answer.<</SYS>><s>[INST] Context: {context_str} Question: {query_str} Only return the helpful answer below and nothing else. Helpful answer:[/INST]"
  # The RAG prompt template instructs the model to generate responses for queries while utilizing knowledge base.
  # Type: str