Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
# Toxicity analysis in social media using BERT

## PHASE 1 - SEM 6
## Meet the team
- [Chethan](https://github.com/chethanv-20)
- [Darshan](https://github.com/darshangn310)
- [Janav](https://github.com/Janav20)
- [Harshini](https://github.com/harshinimurugan2004)

### This project is still under development process - check repo after some time
> Phase 1 of our project ends on 10-05-2024


## PHASE 2 - SEM 7

### Review 1
- Worked on model
- curated euphermism dataset
- Chat room application for the working model

### Streamlit Interface
<img src="https://github.com/user-attachments/assets/19de66cb-3519-46e7-8213-496ed2e736ad" width="800" height="400">

### warning and blocking
<img src="https://github.com/user-attachments/assets/191c39d8-3109-4ece-82cc-b8aa7cf8ed34" width="300" height="200">
<img src="https://github.com/user-attachments/assets/cb590165-4816-4336-883f-ab1dc73a2e5a" width="300" height="200">

### This project is still under development process - check repo after some time
146 changes: 146 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import streamlit as st
from collections import defaultdict
import pandas as pd
from model import predict_toxicity_with_context

# Load euphemism dictionary from CSV
toxic_words_path = 'toxic_words_with_variations.csv'
toxic_words_df = pd.read_csv(toxic_words_path)

# Flatten the toxic words variations into a dictionary
euphemism_dict = {}
for _, row in toxic_words_df.iterrows():
base_word = row['Original Word']
for col in toxic_words_df.columns[1:]:
variation = row[col]
if pd.notna(variation):
euphemism_dict[variation] = base_word

# Initialize session state variables if they don't exist
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
if 'user_toxic_count' not in st.session_state:
st.session_state.user_toxic_count = defaultdict(int)
if 'flagged_users' not in st.session_state:
st.session_state.flagged_users = set()
if 'blocked_users' not in st.session_state:
st.session_state.blocked_users = set()

# Function to display chat history with color-coded results
def display_chat(chat_history):
for user, msg, result, is_toxic in chat_history:
color = "red" if is_toxic else "green"
st.markdown(f"**{user}:** {msg} - <span style='color: {color};'>{result}</span>", unsafe_allow_html=True)

def view_toxicity_report():
report = pd.DataFrame(list(st.session_state.user_toxic_count.items()), columns=['User', 'Toxic Message Count'])
st.write(report)

# Design and styling
st.set_page_config(page_title="Toxicity Analysis Chat Room", page_icon=":speech_balloon:", layout="wide")
st.markdown("<h1 style='text-align: center; color: #00FFFF;'>Toxicity Analysis in Social Media</h1>", unsafe_allow_html=True)

# Define columns for user input to align them horizontally
col1, col2, col3 = st.columns([1, 1, 1])

def handle_user_input(user, user_input):
if user in st.session_state.blocked_users:
st.error(f"{user} is blocked and cannot send messages.")
return "Blocked", False

if user_input.strip() == "": # Check if input is empty
st.error("Input cannot be empty!")
return "No input provided", False

result = predict_toxicity_with_context(user_input, euphemism_dict)

# Debugging: Show the result for inspection
st.write(f"Debug: Result from toxicity analysis - '{result}'")

# Determine if the result indicates toxicity
if "classified as non-toxic" in result:
is_toxic = False
elif "classified as toxic" in result:
is_toxic = True
else:
# Handle unexpected result formats
is_toxic = "toxic" in result.lower()

# Debugging: Show whether the message is classified as toxic
st.write(f"Debug: Is toxic - {is_toxic}")

st.session_state.chat_history.append((user, user_input, result, is_toxic))

# Update the count only if the message is toxic
if is_toxic:
st.session_state.user_toxic_count[user] += 1
toxic_count = st.session_state.user_toxic_count[user]

if toxic_count == 3:
st.warning(f"{user}, you have sent 3 toxic messages. Please be mindful of your behavior.")
elif toxic_count == 5:
st.warning(f"{user}, you have sent 5 toxic messages. Further toxic behavior will result in being blocked.")
elif toxic_count >= 6:
st.session_state.blocked_users.add(user)
st.warning(f"{user} has been blocked for repeated toxic behavior. you cannot send messages anymore.")

return result, is_toxic

with col1:
user1_input = st.text_input("User 1:", key="user1_input")
if st.button("Send as User 1", key="send_user1"):
result, is_toxic = handle_user_input("User 1", user1_input)

with col2:
user2_input = st.text_input("User 2:", key="user2_input")
if st.button("Send as User 2", key="send_user2"):
result, is_toxic = handle_user_input("User 2", user2_input)

with col3:
user3_input = st.text_input("User 3:", key="user3_input")
if st.button("Send as User 3", key="send_user3"):
result, is_toxic = handle_user_input("User 3", user3_input)

# Display chat history
st.markdown("<hr style='border:1px solid #00FFFF;'>", unsafe_allow_html=True)
st.markdown("<h3 style='text-align: center; color: #00FFFF;'>Chat History</h3>", unsafe_allow_html=True)
display_chat(st.session_state.chat_history)

# Button to view toxicity report
if st.button("View Toxicity Report"):
view_toxicity_report()

# Check for flagged users
if st.session_state.flagged_users:
st.markdown("<hr style='border:1px solid red;'>", unsafe_allow_html=True)
st.markdown("<h3 style='color: red;'>Flagged Users</h3>", unsafe_allow_html=True)
for user in st.session_state.flagged_users:
st.warning(f"{user} has been flagged for repeated toxic behavior.")

# Add some background styling
st.markdown(
"""
<style>
body {
background-color: #F0F8FF;
color: #808080;
font-family: Arial, sans-serif;
}
.stTextInput>div>input {
border-radius: 12px;
}
div.stButton > button {
background-color: #000000;
color: white;
border-radius: 12px;
}
div.stButton > button:hover {
background-color: #333333;
}
.stMarkdown {
font-family: 'Arial', sans-serif;
}
</style>
""",
unsafe_allow_html=True
)
73 changes: 73 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Load the trained model and tokenizer
model = BertForSequenceClassification.from_pretrained('./saved_model')
tokenizer = BertTokenizer.from_pretrained('./saved_model')

# Initialize sentiment analyzer
sentiment_analyzer = SentimentIntensityAnalyzer()

# Preprocessing function
def preprocess_input(text, tokenizer, euphemism_dict, max_len):
# Replace euphemisms with the base toxic word
for euphemism, base_word in euphemism_dict.items():
text = text.replace(euphemism, base_word)

# Tokenize the text
encoding = tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=max_len,
return_token_type_ids=False,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt',
)

return encoding

# Prediction function with context analysis
def predict_toxicity_with_context(text, euphemism_dict, max_len=128):
model.eval()
original_text = text
reasons = []

# Replace euphemisms with the base toxic word
for euphemism, base_word in euphemism_dict.items():
if euphemism in text:
reasons.append(f"Euphemism '{euphemism}' detected, replaced with '{base_word}'")
text = text.replace(euphemism, base_word)

encoding = preprocess_input(text, tokenizer, euphemism_dict, max_len)
input_ids = encoding['input_ids']
attention_mask = encoding['attention_mask']

with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
logits = outputs.logits
predictions = torch.sigmoid(logits).cpu().numpy().flatten()

# Apply sentiment analysis
sentiment_score = sentiment_analyzer.polarity_scores(text)['compound']

# Determine if the text is non-toxic based on positive sentiment
if sentiment_score >= 0:
return "The input text is classified as non-toxic based on sentiment analysis."

# Determine toxicity based on threshold
threshold = 0.5
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
detected_labels = [labels[i] for i in range(len(predictions)) if predictions[i] >= threshold]

result = ""
if detected_labels:
result += f"Detected toxicity: {', '.join(detected_labels)}"
if reasons:
result += f"\nReasons for toxicity: {', '.join(reasons)}"
else:
result = "The input text is classified as non-toxic."

return result