Twitter-Sentiment-Analysis-using-LLM-Prompt-Strategies/main.py at main · Mo7amed676/Twitter-Sentiment-Analysis-using-LLM-Prompt-Strategies · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from dotenv import load_dotenv
import os
import json
import re
from langchain_google_genai import ChatGoogleGenerativeAI

# ==============================
# CONFIG
# ==============================

INPUT_FILE = "data/input.txt"
OUTPUT_FILE = "data/output.json"

# ==============================
# HELPERS
# ==============================

def read_lines(file_path: str):
    with open(file_path, "r", encoding="utf-8") as f:
        return [line.strip() for line in f.readlines() if line.strip()]

def save_json(data, file_path: str):
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

def clean_json_output(text: str):
    """
    Removes markdown wrapping like ```json ... ```
    and extracts pure JSON array.
    """
    # Remove markdown blocks
    text = re.sub(r"```json", "", text)
    text = re.sub(r"```", "", text)

    # Extract JSON array
    match = re.search(r"\[.*\]", text, re.DOTALL)
    if match:
        return match.group()

    return text.strip()

# ==============================
# LOAD ENV
# ==============================

load_dotenv()

MODEL_NAME = os.getenv("MODEL_NAME")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

if not MODEL_NAME or not GOOGLE_API_KEY:
    raise ValueError("❌ MODEL_NAME or GOOGLE_API_KEY not found in .env file")

# ==============================
# INIT MODEL
# ==============================

llm = ChatGoogleGenerativeAI(
    model=MODEL_NAME,
    google_api_key=GOOGLE_API_KEY,
    temperature=0
)

# ==============================
# READ INPUT
# ==============================

texts = read_lines(INPUT_FILE)

if not texts:
    raise ValueError("❌ No input texts found.")

print(f"📥 Loaded {len(texts)} texts")

# ==============================
# BUILD PROMPT
# ==============================
prompt = f"""
You are an advanced linguistic sentiment analysis expert.

Analyze the sentiment of the following texts.

Return ONLY a valid JSON array.
Do not add explanations outside JSON.
Keep the same order of input texts.

Each item must follow this exact structure:

{{
"text": "...",
"sentiment": "Positive | Negative | Neutral",
"reasoning": "Detailed analytical explanation explaining why the sentiment was chosen.
Mention specific words or phrases from the text that influenced the decision.
If the sentence contains mixed signals, explain which side is stronger and why.",
"reliability": 0.0
}}

Texts:
{json.dumps(texts, ensure_ascii=False)}
"""

# ==============================
# CALL MODEL
# ==============================

try:
    response = llm.invoke(prompt)
    raw_output = response.content
except Exception as e:
    print("❌ Error calling Gemini:", e)
    exit()

# ==============================
# CLEAN OUTPUT
# ==============================

cleaned_output = clean_json_output(raw_output)

try:
    analysis = json.loads(cleaned_output)
except json.JSONDecodeError:
    print("❌ Failed to parse JSON output")
    print("Raw Output:")
    print(raw_output)
    exit()

# ==============================
# SAVE OUTPUT
# ==============================

save_json(analysis, OUTPUT_FILE)

print(f"✅ Done. {len(analysis)} lines saved to {OUTPUT_FILE}")