Skip to content

Commit baaf5f8

Browse files
Merge pull request #1 from l3montree-dev/function_documentation
Function documentation
2 parents 1fdba2a + 89f44a0 commit baaf5f8

8 files changed

Lines changed: 54 additions & 16 deletions

File tree

app/api/server.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,27 @@
1616

1717
@app.get("/")
1818
def home():
19+
"""Render the home page of the chatbot application."""
1920
return render_template("index.html")
2021

2122

2223
@app.post("/query")
2324
@limiter.limit("60 per minute") # limit LLM calls
2425
def query():
26+
"""Process a user query using RAG (Retrieval-Augmented Generation).
27+
28+
Retrieves relevant documents from the vector store and generates a
29+
response using the LLM with the retrieved context.
30+
"""
2531
q = request.json["query"]
2632
context = retrieve_top_k(q, k=10)
2733
answer = generate_response(q, context)
2834
return jsonify({"answer": answer})
2935

3036

31-
# rate limit error response
3237
@app.errorhandler(429)
3338
def ratelimit_handler(e):
39+
"""Handle rate limit exceeded errors."""
3440
return jsonify({
3541
"error": "Rate limit exceeded. Please slow down."
3642
}), 429

app/api/testing_server.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,12 @@
2828
questions : list[str] = list(set(p["question"] for p in pairs))
2929
configs : list[str] = list(set(p["config"] for p in pairs))
3030

31-
# prepare all unique pairs of configs for each question and insert into db if not already there
3231
def prepare_pairs():
32+
"""Prepare and insert unique configuration pairs for A/B testing.
33+
34+
For each question, generates all unique pairs of configurations and inserts
35+
them into the ab_pairs table if they don't already exist.
36+
"""
3337
conn = get_db_connection()
3438
cur = conn.cursor()
3539
# insert unique pairs of configs for each question into the ab_pairs table (if they don't already exist)
@@ -53,8 +57,13 @@ def prepare_pairs():
5357
conn.close()
5458

5559

56-
# get the next unanswered pair from the db, along with the corresponding answers, and randomize left/right
5760
def get_next_pair() -> tuple[int, str, dict, dict] | None:
61+
"""Retrieve the next unanswered A/B testing pair with randomized positioning.
62+
63+
Fetches an unanswered comparison pair from the database along with their
64+
corresponding answers. Randomly assigns configs to left/right positions
65+
to avoid position bias.
66+
"""
5867
conn = get_db_connection()
5968
cur = conn.cursor()
6069

@@ -95,6 +104,11 @@ def get_next_pair() -> tuple[int, str, dict, dict] | None:
95104

96105
@app.route("/")
97106
def index():
107+
"""Render the A/B evaluation interface with the next comparison pair.
108+
109+
Fetches the next unanswered pair and renders the evaluation template.
110+
Returns a completion message when all pairs have been evaluated.
111+
"""
98112
pair = get_next_pair()
99113

100114
if not pair:
@@ -113,6 +127,12 @@ def index():
113127

114128
@app.route("/vote", methods=["POST"])
115129
def vote():
130+
"""Process and save a user's vote for an A/B comparison pair.
131+
132+
Extracts the pair_id and winner choice from the form submission, records
133+
the vote in ab_results, marks the pair as answered, and redirects to the
134+
next evaluation.
135+
"""
116136
pair_id = request.form["pair_id"]
117137
winner = request.form["winner"]
118138

app/clients.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
from app.config import *
44

55
def get_mistral_client() -> Mistral:
6+
"""Create and return a Mistral AI client instance"""
67
return Mistral(api_key=API_KEY)
78

89
def get_db_connection():
10+
"""Establish and return a connection to the PostgreSQL database"""
911
return psycopg2.connect(
1012
dbname=DB_NAME,
1113
user=DB_USER,

app/generation/response.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@
44
client = get_mistral_client()
55

66
def generate_response(query: str, context: list[tuple[str, float]]) -> str:
7+
"""
8+
Generate a response to a user query using DevGuard documentation context.
9+
10+
Formats the provided context into a prompt, sends it to the Mistral API. If context
11+
is unavailable, the assistant will indicate so. If the query is unrelated to DevGuard,
12+
the assistant will politely decline and redirect to DevGuard topics.
13+
14+
Safe prompt prepends: "Always assist with care, respect, and truth. Respond with
15+
utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative
16+
content. Ensure replies promote fairness and positivity."
17+
"""
718
# format context
819
context_text = "\n\n".join(
920
f"- {content}" for content, _ in context
@@ -25,10 +36,6 @@ def generate_response(query: str, context: list[tuple[str, float]]) -> str:
2536

2637
message= [{"role": "user", "content": prompt}]
2738

28-
"""
29-
Toggling the safe prompt will prepend your messages with the following system prompt:
30-
Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.
31-
"""
3239
response = client.chat.complete(
3340
model=MODEL_GENERATION,
3441
messages=message,

app/ingestion/chunking.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from app.config import CHUNK_SIZE, OVERLAP_SIZE
22

3-
# split the given docs up in chunks without spliting up words
43
def chunking(docs: str) -> list[str]:
4+
"""Split documentation into chunks of approximately CHUNK_SIZE without breaking words."""
55
chunks : list[str] = []
66
start : int = 0
77
while start < len(docs):
@@ -19,8 +19,9 @@ def chunking(docs: str) -> list[str]:
1919
start = last_space - OVERLAP_SIZE
2020
return chunks
2121

22-
# option: apply overlap to the chunks after initial chunking to ensure that there is some context between them
2322
def apply_overlap(chunks: list[str]) -> list[str]:
23+
"""Apply overlap to chunks by prepending the last OVERLAP_SIZE characters
24+
from the previous chunk to each subsequent chunk"""
2425
if OVERLAP_SIZE <= 0:
2526
return chunks
2627

@@ -38,9 +39,8 @@ def apply_overlap(chunks: list[str]) -> list[str]:
3839
return overlapped
3940

4041

41-
# split recursively for a hierarchy of separators
42-
# attempt to split on high-level separators first, then move to increasingly finer separators if chunks remain too large
4342
def recursive_chunking(docs: str, separators: list[str] = ["\n\n", "\n", ". ", " ", ""]):
43+
"""Split text recursively using a hierarchy of separators. """
4444
# base case
4545
if len(docs) <= CHUNK_SIZE:
4646
return [docs]

app/ingestion/embedding.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from app.clients import get_mistral_client
44
from app.config import BATCH_SIZE, MODEL_EMBEDDING
55

6-
# get the embeddings for a list of chunks, return a list of embeddings
76
def get_embeddings(chunks: list[str]) -> list[list[float]]:
7+
"""Generate and return embeddings for a list of text chunks"""
88
client = get_mistral_client()
99
embeddings: list[list[float]] = []
1010
# call the api with batches to avoid hitting the rate limit
@@ -19,8 +19,8 @@ def get_embeddings(chunks: list[str]) -> list[list[float]]:
1919
embeddings.append(list(embedding))
2020
return embeddings
2121

22-
# get embedding for a single chunk of text
2322
def text_embedding(chunk: str) -> list[float]:
23+
"""Generate an embedding for a single piece of text"""
2424
client = get_mistral_client()
2525
# call the mistral api to get the embedding for the given text
2626
response = client.embeddings.create(

app/ingestion/reader.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
import os
22
from app.config import PATH_DIR
33

4-
# read the docs and return as a single string
54
def read_docs() -> str:
6-
# search for all .md files in the directory
5+
"""Traverse PATH_DIR and concatenate all markdown files."""
76
docs : str = ""
87
for root, _, files in os.walk(str(PATH_DIR)):
98
for file in files:
109
if file.endswith(".md"):
11-
with open(os.path.join(root, file), "r") as f:
10+
with open(os.path.join(root, file), "r", encoding="utf-8") as f:
1211
docs += f.read() + "\n"
1312
return docs

app/retrieval/vector_store.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
from app.ingestion.embedding import text_embedding
33

44
def retrieve_top_k(query: str, k: int = 5):
5+
"""Retrieve the top k most similar documents to the given query.
6+
7+
This function computes the embedding for the query, queries the vector database
8+
for documents ordered by cosine similarity, and returns the top k results."""
59
embedding = text_embedding(query)
610

711
conn = get_db_connection()

0 commit comments

Comments
 (0)