Skip to content

Commit d450fea

Browse files
committed
codeDump
1 parent 01aacad commit d450fea

4 files changed

Lines changed: 253 additions & 0 deletions

File tree

MIstral7B/Rag-test.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
2+
from llama_index.llms.llama_cpp import LlamaCPP
3+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
4+
# --- Configuration ---
5+
# Point to your downloaded model file
6+
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # <-- IMPORTANT: update this path
7+
8+
# --- 1. Load the LLM (our quantized Mistral model) ---
9+
# This uses llama-cpp-python to run the GGUF model on your CPU
10+
llm = LlamaCPP(
11+
model_path=MODEL_PATH,
12+
# Model parameters - you can adjust these
13+
temperature=0.1,
14+
max_new_tokens=512,
15+
context_window=3900, # The model's context window size
16+
generate_kwargs={},
17+
model_kwargs={"n_gpu_layers": -1}, # Set to > 0 if you have a GPU and want to offload layers
18+
verbose=True,
19+
)
20+
21+
# --- 2. Configure the Embedding Model ---
22+
# This model creates numerical representations of your text for retrieval.
23+
# It runs locally on your machine.
24+
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
25+
26+
# --- 3. Set up Global Settings ---
27+
# This tells LlamaIndex to use our chosen models.
28+
Settings.llm = llm
29+
Settings.embed_model = embed_model
30+
31+
# --- 4. Load Your Data ---
32+
# This will load all files from the 'data' directory.
33+
print("Loading documents...")
34+
documents = SimpleDirectoryReader("D:/Mistral7B/data").load_data()
35+
print(f"Loaded {len(documents)} document(s).")
36+
37+
# --- 5. Create the Index and Query Engine ---
38+
# The VectorStoreIndex will process your documents and build a searchable index.
39+
# The query engine connects the retriever (finds relevant text) with the LLM (generates answers).
40+
print("Indexing documents... (this may take a moment)")
41+
index = VectorStoreIndex.from_documents(documents)
42+
query_engine = index.as_query_engine(streaming=True)
43+
44+
# --- 6. Start Querying ---
45+
print("\n--- Query Engine Ready ---")
46+
while True:
47+
query = input("Ask a question about your documents: ")
48+
if query.lower() == 'exit':
49+
break
50+
51+
response_stream = query_engine.query(query)
52+
53+
print("\nAssistant: ", end="")
54+
# Stream the response to the console
55+
response_stream.print_response_stream()
56+
print("\n" + "-"*50)

MIstral7B/With-Eval.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
import os
2+
import time
3+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
4+
from llama_index.llms.llama_cpp import LlamaCPP
5+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6+
from codecarbon import OfflineEmissionsTracker
7+
from ragas import evaluate
8+
from ragas.metrics import faithfulness, answer_relevancy
9+
from datasets import Dataset
10+
from langchain_community.llms import LlamaCpp
11+
from ragas.llms import LangchainLLMWrapper
12+
13+
# --- 1. Configuration ---
14+
15+
# Set the path to your downloaded GGUF model
16+
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # <-- IMPORTANT: Update this path if needed
17+
18+
# Set the path to your data (PDFs, .txt, etc.)
19+
DATA_PATH = "D:/Mistral7B/data" # <-- IMPORTANT: Update this path if needed
20+
21+
# Set your country's ISO code for CodeCarbon
22+
# Find your code: https://en.wikipedia.org/wiki/List_of_ISO_3166-1_alpha-3_codes
23+
# Using "EGY" for Egypt as an example
24+
YOUR_COUNTRY_ISO_CODE = "EGY"
25+
26+
# Define your "Golden Set" of test questions
27+
TEST_QUESTIONS = [
28+
"What is the main topic of the document?",
29+
#"Summarize the key findings in three bullet points.",
30+
# ... add 10-15 more of your own questions ...
31+
#"What is [a specific term] according to the text?",
32+
#"What conclusion does the author reach?",
33+
]
34+
35+
# --- 2. Initialize Models ---
36+
37+
print("Initializing models...")
38+
39+
# Load the local LLM (Mistral 7B)
40+
llm = LlamaCPP(
41+
model_path=MODEL_PATH,
42+
temperature=0.1,
43+
max_new_tokens=512,
44+
context_window=3900,
45+
generate_kwargs={},
46+
model_kwargs={"n_gpu_layers": 1}, # Set > 0 if you have GPU offloading
47+
verbose=True,
48+
)
49+
50+
# Load the local Embedding Model
51+
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
52+
53+
# Set up LlamaIndex global settings
54+
Settings.llm = llm
55+
Settings.embed_model = embed_model
56+
57+
# --- 3. Load & Index Documents ---
58+
59+
print("Loading documents...")
60+
documents = SimpleDirectoryReader(DATA_PATH).load_data()
61+
print(f"Loaded {len(documents)} document(s).")
62+
63+
print("Indexing documents... (this may take a moment)")
64+
index = VectorStoreIndex.from_documents(documents)
65+
query_engine = index.as_query_engine()
66+
print("Indexing complete.")
67+
68+
# --- 4. Run Evaluation (Accuracy & Environmental Cost) ---
69+
70+
# Initialize a list to hold our evaluation data
71+
eval_data = {
72+
"question": [],
73+
"answer": [],
74+
"contexts": [],
75+
}
76+
77+
# Initialize the CO2 Emissions Tracker
78+
print(f"\nInitializing CodeCarbon tracker for country: {YOUR_COUNTRY_ISO_CODE}")
79+
tracker = OfflineEmissionsTracker(country_iso_code=YOUR_COUNTRY_ISO_CODE)
80+
tracker.start()
81+
82+
print("\n--- Starting Evaluation Loop ---")
83+
84+
try:
85+
for query in TEST_QUESTIONS:
86+
print(f"\nQuerying: {query}")
87+
88+
# --- Start tracking for this specific query ---
89+
tracker.start_task("RAG Query")
90+
start_time = time.time()
91+
92+
# Run the query
93+
response = query_engine.query(query)
94+
95+
# --- Stop tracking for this query ---
96+
end_time = time.time()
97+
# stop_task() returns an EmissionsData OBJECT
98+
emissions_data = tracker.stop_task()
99+
100+
# Collect results for ragas
101+
answer = str(response)
102+
contexts = [node.get_content() for node in response.source_nodes]
103+
104+
eval_data["question"].append(query)
105+
eval_data["answer"].append(answer)
106+
eval_data["contexts"].append(contexts)
107+
108+
# --- Print Results for this Query ---
109+
print(f"Answer: {answer}")
110+
print("-" * 30)
111+
print(f"Latency: {end_time - start_time:.2f} seconds")
112+
113+
# --- CORRECTED LINES ---
114+
# Access attributes using dot notation
115+
print(f"Emissions: {emissions_data.emissions * 1000:.6f} gCO2eq")
116+
print(f"Energy: {emissions_data.energy_consumed * 1000:.6f} Wh")
117+
# --- END OF CORRECTION ---
118+
119+
print("=" * 50)
120+
121+
finally:
122+
# --- CORRECTED LINES ---
123+
# stop() returns a FLOAT (total_emissions_kg)
124+
total_emissions_kg = tracker.stop()
125+
print("\n--- Total Emissions Summary (Saved to emissions.csv) ---")
126+
# Access total energy from the tracker object itself
127+
print(f"Total Energy Consumed: {tracker.final_emissions_data.energy_consumed * 1000:.4f} Wh")
128+
print(f"Total CO2 Emitted: {total_emissions_kg * 1000:.4f} gCO2eq")
129+
# --- END OF CORRECTION ---
130+
131+
132+
# --- 5. Run Ragas Accuracy Evaluation ---
133+
134+
print("\n--- Running Ragas Accuracy Evaluation ---")
135+
136+
# Convert your collected data into a Hugging Face Dataset object
137+
eval_dataset = Dataset.from_dict(eval_data)
138+
139+
# --- Set up the Ragas evaluator to use YOUR local model ---
140+
# We must wrap our local model for Ragas to use it as a judge.
141+
# The easiest way is to use the Langchain wrapper.
142+
143+
# 1. Import the required LangChain and Ragas wrapper classes
144+
# You may need to run: pip install langchain-community
145+
146+
147+
# 2. Create a new LangChain LlamaCpp object *just for evaluation*
148+
# This points to the same model file.
149+
eval_llm = LlamaCpp(
150+
model_path=MODEL_PATH,
151+
n_gpu_layers=1, # Match your settings from Section 2
152+
n_batch=512, # Match your settings
153+
n_ctx=3900, # Match your settings
154+
temperature=0, # Evaluators should be deterministic
155+
verbose=False,
156+
)
157+
# 3. Wrap the LangChain object for Ragas
158+
ragas_llm = LangchainLLMWrapper(eval_llm)
159+
160+
# 4. Run the evaluation, passing the wrapped LLM and embeddings directly
161+
result = evaluate(
162+
eval_dataset,
163+
metrics=[
164+
faithfulness,
165+
answer_relevancy,
166+
],
167+
llm=ragas_llm, # <-- Pass the evaluator LLM here
168+
embeddings=embed_model, # <-- Pass the embeddings here
169+
)
170+
171+
print("\n--- Ragas Accuracy Results ---")
172+
print(result)
173+
174+
# The result will be a dictionary like:
175+
# {'faithfulness': 0.85, 'answer_relevancy': 0.92}
176+
177+
print("\n--- Project Evaluation Complete ---")
5.44 MB
Binary file not shown.

MIstral7B/testing-gpu.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from llama_cpp import Llama
2+
3+
# --- Configuration ---
4+
# IMPORTANT: Update this to the correct path on your machine
5+
MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
6+
7+
print("Attempting to load model with GPU...")
8+
9+
try:
10+
llm = Llama(
11+
model_path=MODEL_PATH,
12+
n_gpu_layers=-1, # Try to offload all layers to GPU
13+
verbose=True # This is the most important part!
14+
)
15+
print("\n--- TEST SUCCESSFUL ---")
16+
# Check the output above for lines mentioning CUDA or cuBLAS and layer offloading
17+
18+
except Exception as e:
19+
print(f"\n--- TEST FAILED ---")
20+
print(f"An error occurred: {e}")

0 commit comments

Comments
 (0)