Skip to content

Commit 20c9463

Browse files
committed
formatting3
1 parent 2be1afb commit 20c9463

4 files changed

Lines changed: 198 additions & 0 deletions

File tree

mistral7b/Rag-test.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
2+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
3+
from llama_index.llms.llama_cpp import LlamaCPP
4+
5+
# --- Configuration ---
6+
# Point to your downloaded model file
7+
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # <-- IMPORTANT: update this path
8+
9+
# --- 1. Load the LLM (our quantized Mistral model) ---
10+
# This uses llama-cpp-python to run the GGUF model on your CPU
11+
llm = LlamaCPP(
12+
model_path=MODEL_PATH,
13+
# Model parameters - you can adjust these
14+
temperature=0.1,
15+
max_new_tokens=512,
16+
context_window=3900, # The model's context window size
17+
generate_kwargs={},
18+
model_kwargs={
19+
"n_gpu_layers": -1
20+
}, # Set to > 0 if you have a GPU and want to offload layers
21+
verbose=True,
22+
)
23+
24+
# --- 2. Configure the Embedding Model ---
25+
# This model creates numerical representations of your text for retrieval.
26+
# It runs locally on your machine.
27+
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
28+
29+
# --- 3. Set up Global Settings ---
30+
# This tells LlamaIndex to use our chosen models.
31+
Settings.llm = llm
32+
Settings.embed_model = embed_model
33+
34+
# --- 4. Load Your Data ---
35+
# This will load all files from the 'data' directory.
36+
print("Loading documents...")
37+
documents = SimpleDirectoryReader("D:/Mistral7B/data").load_data()
38+
print(f"Loaded {len(documents)} document(s).")
39+
40+
# --- 5. Create the Index and Query Engine ---
41+
# The VectorStoreIndex will process your documents and build a searchable index.
42+
# The query engine connects the retriever (finds relevant text) with the LLM (generates answers).
43+
print("Indexing documents... (this may take a moment)")
44+
index = VectorStoreIndex.from_documents(documents)
45+
query_engine = index.as_query_engine(streaming=True)
46+
47+
# --- 6. Start Querying ---
48+
print("\n--- Query Engine Ready ---")
49+
while True:
50+
query = input("Ask a question about your documents: ")
51+
if query.lower() == "exit":
52+
break
53+
54+
response_stream = query_engine.query(query)
55+
56+
print("\nAssistant: ", end="")
57+
# Stream the response to the console
58+
response_stream.print_response_stream()
59+
print("\n" + "-" * 50)
5.44 MB
Binary file not shown.

mistral7b/evaluate_results.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import json
2+
3+
from datasets import Dataset
4+
from langchain_community.llms import LlamaCpp # Use LangChain's LlamaCpp for evaluation
5+
from llama_index.embeddings.huggingface import (
6+
HuggingFaceEmbedding,
7+
) # Need this for embeddings
8+
from ragas import evaluate
9+
from ragas.llms import LangchainLLMWrapper
10+
from ragas.metrics import answer_relevancy, faithfulness
11+
12+
# --- 1. Configuration ---
13+
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # Same model used for generation
14+
INPUT_FILE = "D:/Mistral7B/rag_results.json" # The file saved by the previous script
15+
16+
# --- 2. Load the Saved Results ---
17+
print(f"Loading results from {INPUT_FILE}...")
18+
with open(INPUT_FILE, "r", encoding="utf-8") as f:
19+
loaded_data = json.load(f)
20+
21+
# Convert the list of dictionaries into a Hugging Face Dataset
22+
# Ragas requires columns named 'question', 'answer', 'contexts'
23+
eval_dataset = Dataset.from_list([loaded_data[0]]) # Only evaluate the first item
24+
print(f"Loaded {len(eval_dataset)} results.")
25+
26+
# --- 3. Initialize Evaluator Model and Embeddings ---
27+
print("Initializing evaluator models...")
28+
# ... (gpu_layers = 0 setting) ...
29+
30+
eval_llm = LlamaCpp(
31+
model_path=MODEL_PATH,
32+
# ... other parameters ...
33+
n_ctx=1024, # Keep reduced context
34+
# ...
35+
)
36+
ragas_llm = LangchainLLMWrapper(eval_llm)
37+
38+
# --- ADD THIS TEST BLOCK ---
39+
print("\n--- Testing eval_llm directly ---")
40+
try:
41+
test_prompt = (
42+
"Explain the importance of testing in software development in one sentence."
43+
)
44+
print(f"Sending test prompt: {test_prompt}")
45+
response = eval_llm.invoke(test_prompt)
46+
print(f"Test response received: {response}")
47+
print("--- eval_llm test successful ---\n")
48+
except Exception as e:
49+
print("--- eval_llm test FAILED ---")
50+
print(f"Error during direct invocation: {e}")
51+
import traceback
52+
53+
traceback.print_exc()
54+
# Decide if you want to exit here or continue to ragas evaluation
55+
# exit() # Uncomment to stop if the direct test fails
56+
# --- END OF TEST BLOCK ---
57+
58+
# Ragas metrics might also need embeddings
59+
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
60+
61+
# --- 4. Run Ragas Evaluation ---
62+
print("\n--- Running Ragas Accuracy Evaluation ---")
63+
64+
try:
65+
print("Starting Ragas evaluate()...") # <-- ADDED
66+
result = evaluate(
67+
eval_dataset,
68+
metrics=[
69+
faithfulness,
70+
answer_relevancy,
71+
],
72+
llm=ragas_llm,
73+
embeddings=embed_model,
74+
# raise_exceptions=False # Optional: Try adding this if it keeps crashing
75+
)
76+
print("Ragas evaluate() finished.") # <-- ADDED
77+
78+
print("\n--- Ragas Accuracy Results ---")
79+
print(result) # <-- KEEP THIS
80+
81+
# Save results to a file for later analysis
82+
print("Preparing to save results to JSON...") # <-- ADDED
83+
with open("ragas_evaluation_results.json", "w") as f:
84+
# Convert numpy values to Python native types for JSON serialization
85+
import numpy as np
86+
87+
# Check if result is not None and is a dictionary before processing
88+
if result and isinstance(result, dict):
89+
result_dict = {
90+
k: float(v)
91+
if isinstance(v, (np.number, float)) and not np.isnan(v)
92+
else None
93+
for k, v in result.items()
94+
}
95+
print(
96+
f"Result dictionary prepared: {result_dict}"
97+
) # <-- ADDED (optional, can be verbose)
98+
json.dump(result_dict, f, indent=4)
99+
print("Results saved to ragas_evaluation_results.json") # <-- ADDED
100+
else:
101+
print(
102+
"Evaluation result was None or not a dictionary, skipping save."
103+
) # <-- ADDED
104+
105+
except Exception as e:
106+
print("\n--- Evaluation Error ---")
107+
print(f"Error during evaluation or saving: {e}") # <-- MODIFIED
108+
import traceback
109+
110+
traceback.print_exc()
111+
112+
# Make sure to explicitly delete the model to avoid memory issues
113+
if "eval_llm" in locals():
114+
print("Deleting LLM objects...") # <-- ADDED
115+
del ragas_llm
116+
del eval_llm
117+
print("LLM objects deleted.") # <-- ADDED
118+
119+
print("\n--- Evaluation Script Finished ---")

mistral7b/testing-gpu.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from llama_cpp import Llama
2+
3+
# --- Configuration ---
4+
# IMPORTANT: Update this to the correct path on your machine
5+
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
6+
7+
print("Attempting to load model with GPU...")
8+
9+
try:
10+
llm = Llama(
11+
model_path=MODEL_PATH,
12+
n_gpu_layers=-1, # Try to offload all layers to GPU
13+
verbose=True, # This is the most important part!
14+
)
15+
print("\n--- TEST SUCCESSFUL ---")
16+
# Check the output above for lines mentioning CUDA or cuBLAS and layer offloading
17+
18+
except Exception as e:
19+
print("\n--- TEST FAILED ---")
20+
print(f"An error occurred: {e}")

0 commit comments

Comments
 (0)