-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathEntity_detection.py
More file actions
31 lines (24 loc) · 1.19 KB
/
Entity_detection.py
File metadata and controls
31 lines (24 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import nltk
from pprint import pprint
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet as wn
def find_keywords(features):
stopwords = set(nltk.corpus.stopwords.words("english"))
important_tags = ['VBN', 'NNS', 'VBP', 'NNP', 'NN', 'VBD','JJ', 'JJR', 'JJS']
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
for qid, feature in features.items():
tagged_tokens = nltk.pos_tag(nltk.word_tokenize(feature['Question']))
tokens = [t[0].lower() for t in tagged_tokens if t[1] in important_tags]
tokens.extend([lemmatizer.lemmatize(t[0].lower()) for t in tagged_tokens if t[1] in important_tags])
# tokens.extend([stemmer.stem(t[0].lower()) for t in tagged_tokens if t[1] in important_tags])
tokens = list(set(tokens))
feature['pos_keywords'] = tokens
for qid, feature in features.items():
synonyms = []
for token in feature['pos_keywords']:
for synonym in wn.synsets(token):
synonyms.extend([str(lemma.name()) for lemma in synonym.lemmas()])
feature['wordnet_synonyms'] = synonyms
return features