-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathsentiment_analysis.py
More file actions
81 lines (56 loc) · 1.83 KB
/
sentiment_analysis.py
File metadata and controls
81 lines (56 loc) · 1.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk import sent_tokenize, word_tokenize, pos_tag
import pandas as pd
lemmatizer = WordNetLemmatizer()
def penn_to_wn(tag):
if tag.startswith('J'):
return wn.ADJ
elif tag.startswith('N'):
return wn.NOUN
elif tag.startswith('R'):
return wn.ADV
elif tag.startswith('V'):
return wn.VERB
return None
def clean_text(text):
text = text.replace("<br />", " ")
text = text.decode("utf-8")
return text
def swn_polarity(text):
text = clean_text(text)
positive_sent = 0
negative_sent = 0
raw_sentences = sent_tokenize(text)
for raw_sentence in raw_sentences:
tagged_sentence = pos_tag(word_tokenize(raw_sentence))
for word, tag in tagged_sentence:
wn_tag = penn_to_wn(tag)
if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
continue
lemma = lemmatizer.lemmatize(word, pos=wn_tag)
if not lemma:
continue
synsets = wn.synsets(lemma, pos=wn_tag)
if not synsets:
continue
synset = synsets[0]
swn_synset = swn.senti_synset(synset.name())
sentiment = swn_synset.pos_score() - swn_synset.neg_score()
if sentiment>=0:
positive_sent += sentiment
else:
negative_sent += sentiment
return positive_sent,negative_sent
def readTweets():
return list(pd.from_csv('data/tweets.csv'))
def main():
#read tweets
tweets = readTweets()
data = []
for tweet in tweets:
pos,neg = swn_polarity(tweet[2])
data.append([tweet[0],tweet[1],pos,neg])
pd.DataFrame(data).to_csv('data/generated_scores.csv')
main()