1+ import os
2+ import time
3+ from llama_index .core import VectorStoreIndex , SimpleDirectoryReader , Settings
4+ from llama_index .llms .llama_cpp import LlamaCPP
5+ from llama_index .embeddings .huggingface import HuggingFaceEmbedding
6+ from codecarbon import OfflineEmissionsTracker
7+ from ragas import evaluate
8+ from ragas .metrics import faithfulness , answer_relevancy
9+ from datasets import Dataset
10+ from langchain_community .llms import LlamaCpp
11+ from ragas .llms import LangchainLLMWrapper
12+
13+ # --- 1. Configuration ---
14+
15+ # Set the path to your downloaded GGUF model
16+ MODEL_PATH = "D:/Mistral7B/mistral-7b-instruct-v0.2.Q4_K_M.gguf" # <-- IMPORTANT: Update this path if needed
17+
18+ # Set the path to your data (PDFs, .txt, etc.)
19+ DATA_PATH = "D:/Mistral7B/data" # <-- IMPORTANT: Update this path if needed
20+
21+ # Set your country's ISO code for CodeCarbon
22+ # Find your code: https://en.wikipedia.org/wiki/List_of_ISO_3166-1_alpha-3_codes
23+ # Using "EGY" for Egypt as an example
24+ YOUR_COUNTRY_ISO_CODE = "EGY"
25+
26+ # Define your "Golden Set" of test questions
27+ TEST_QUESTIONS = [
28+ "What is the main topic of the document?" ,
29+ #"Summarize the key findings in three bullet points.",
30+ # ... add 10-15 more of your own questions ...
31+ #"What is [a specific term] according to the text?",
32+ #"What conclusion does the author reach?",
33+ ]
34+
35+ # --- 2. Initialize Models ---
36+
37+ print ("Initializing models..." )
38+
39+ # Load the local LLM (Mistral 7B)
40+ llm = LlamaCPP (
41+ model_path = MODEL_PATH ,
42+ temperature = 0.1 ,
43+ max_new_tokens = 512 ,
44+ context_window = 3900 ,
45+ generate_kwargs = {},
46+ model_kwargs = {"n_gpu_layers" : 1 }, # Set > 0 if you have GPU offloading
47+ verbose = True ,
48+ )
49+
50+ # Load the local Embedding Model
51+ embed_model = HuggingFaceEmbedding (model_name = "BAAI/bge-small-en-v1.5" )
52+
53+ # Set up LlamaIndex global settings
54+ Settings .llm = llm
55+ Settings .embed_model = embed_model
56+
57+ # --- 3. Load & Index Documents ---
58+
59+ print ("Loading documents..." )
60+ documents = SimpleDirectoryReader (DATA_PATH ).load_data ()
61+ print (f"Loaded { len (documents )} document(s)." )
62+
63+ print ("Indexing documents... (this may take a moment)" )
64+ index = VectorStoreIndex .from_documents (documents )
65+ query_engine = index .as_query_engine ()
66+ print ("Indexing complete." )
67+
68+ # --- 4. Run Evaluation (Accuracy & Environmental Cost) ---
69+
70+ # Initialize a list to hold our evaluation data
71+ eval_data = {
72+ "question" : [],
73+ "answer" : [],
74+ "contexts" : [],
75+ }
76+
77+ # Initialize the CO2 Emissions Tracker
78+ print (f"\n Initializing CodeCarbon tracker for country: { YOUR_COUNTRY_ISO_CODE } " )
79+ tracker = OfflineEmissionsTracker (country_iso_code = YOUR_COUNTRY_ISO_CODE )
80+ tracker .start ()
81+
82+ print ("\n --- Starting Evaluation Loop ---" )
83+
84+ try :
85+ for query in TEST_QUESTIONS :
86+ print (f"\n Querying: { query } " )
87+
88+ # --- Start tracking for this specific query ---
89+ tracker .start_task ("RAG Query" )
90+ start_time = time .time ()
91+
92+ # Run the query
93+ response = query_engine .query (query )
94+
95+ # --- Stop tracking for this query ---
96+ end_time = time .time ()
97+ # stop_task() returns an EmissionsData OBJECT
98+ emissions_data = tracker .stop_task ()
99+
100+ # Collect results for ragas
101+ answer = str (response )
102+ contexts = [node .get_content () for node in response .source_nodes ]
103+
104+ eval_data ["question" ].append (query )
105+ eval_data ["answer" ].append (answer )
106+ eval_data ["contexts" ].append (contexts )
107+
108+ # --- Print Results for this Query ---
109+ print (f"Answer: { answer } " )
110+ print ("-" * 30 )
111+ print (f"Latency: { end_time - start_time :.2f} seconds" )
112+
113+ # --- CORRECTED LINES ---
114+ # Access attributes using dot notation
115+ print (f"Emissions: { emissions_data .emissions * 1000 :.6f} gCO2eq" )
116+ print (f"Energy: { emissions_data .energy_consumed * 1000 :.6f} Wh" )
117+ # --- END OF CORRECTION ---
118+
119+ print ("=" * 50 )
120+
121+ finally :
122+ # --- CORRECTED LINES ---
123+ # stop() returns a FLOAT (total_emissions_kg)
124+ total_emissions_kg = tracker .stop ()
125+ print ("\n --- Total Emissions Summary (Saved to emissions.csv) ---" )
126+ # Access total energy from the tracker object itself
127+ print (f"Total Energy Consumed: { tracker .final_emissions_data .energy_consumed * 1000 :.4f} Wh" )
128+ print (f"Total CO2 Emitted: { total_emissions_kg * 1000 :.4f} gCO2eq" )
129+ # --- END OF CORRECTION ---
130+
131+
132+ # --- 5. Run Ragas Accuracy Evaluation ---
133+
134+ print ("\n --- Running Ragas Accuracy Evaluation ---" )
135+
136+ # Convert your collected data into a Hugging Face Dataset object
137+ eval_dataset = Dataset .from_dict (eval_data )
138+
139+ # --- Set up the Ragas evaluator to use YOUR local model ---
140+ # We must wrap our local model for Ragas to use it as a judge.
141+ # The easiest way is to use the Langchain wrapper.
142+
143+ # 1. Import the required LangChain and Ragas wrapper classes
144+ # You may need to run: pip install langchain-community
145+
146+
147+ # 2. Create a new LangChain LlamaCpp object *just for evaluation*
148+ # This points to the same model file.
149+ eval_llm = LlamaCpp (
150+ model_path = MODEL_PATH ,
151+ n_gpu_layers = 1 , # Match your settings from Section 2
152+ n_batch = 512 , # Match your settings
153+ n_ctx = 3900 , # Match your settings
154+ temperature = 0 , # Evaluators should be deterministic
155+ verbose = False ,
156+ )
157+ # 3. Wrap the LangChain object for Ragas
158+ ragas_llm = LangchainLLMWrapper (eval_llm )
159+
160+ # 4. Run the evaluation, passing the wrapped LLM and embeddings directly
161+ result = evaluate (
162+ eval_dataset ,
163+ metrics = [
164+ faithfulness ,
165+ answer_relevancy ,
166+ ],
167+ llm = ragas_llm , # <-- Pass the evaluator LLM here
168+ embeddings = embed_model , # <-- Pass the embeddings here
169+ )
170+
171+ print ("\n --- Ragas Accuracy Results ---" )
172+ print (result )
173+
174+ # The result will be a dictionary like:
175+ # {'faithfulness': 0.85, 'answer_relevancy': 0.92}
176+
177+ print ("\n --- Project Evaluation Complete ---" )
0 commit comments