Skip to content

Commit f83633c

Browse files
authored
Ready to use!
1 parent c4ea8f5 commit f83633c

8 files changed

Lines changed: 86 additions & 0 deletions

File tree

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from kivy.app import App
2+
from kivy.uix.label import Label
3+
from kivy.uix.button import Button
4+
from kivy.uix.gridlayout import GridLayout
5+
from kivy.uix.layout import Layout
6+
7+
8+
class Interface(App):
9+
def build(self):
10+
return Button(text='Button')
11+
12+
13+
if __name__ == '__main__':
14+
app = Interface().run()

textAnalizier/run.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from word2vec import word2vec
2+
3+
4+
def run():
5+
pass
6+
7+
8+
if __name__ == '__main__':
9+
run()
144 Bytes
Binary file not shown.
144 Bytes
Binary file not shown.
286 Bytes
Binary file not shown.
375 Bytes
Binary file not shown.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
types = {
2+
'NOUN': 'NOUN',
3+
'VERB': 'VERB',
4+
'NPRO': 'PROPN',
5+
'ADVB': 'ADV',
6+
'ADJF': 'ADJ',
7+
'NUMR': 'NUM',
8+
'PREP': '',
9+
'CONJ': '',
10+
'PRCL': 'ADV',
11+
'INTJ': 'ADV',
12+
'COMP': 'COMP',
13+
'UNKN': 'UNKN',
14+
'INFN': 'INFN',
15+
'ADJS': 'ADJS',
16+
'NUMB': 'NUMB'
17+
}

textAnalizier/word2vec/word2vec.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import gensim
2+
import pymorphy2
3+
from config import config
4+
5+
6+
class Comparer:
7+
def __init__(self):
8+
self.model = gensim.models.KeyedVectors.load_word2vec_format('binaries/ruwikiruscorpora_upos_skipgram_300_2_2019.bin',
9+
binary=True)
10+
self.model.init_sims(replace=True)
11+
# print(set(map(lambda x: x.split('_')[1], self.model.vocab.keys())))
12+
self.morph = pymorphy2.MorphAnalyzer()
13+
14+
def get_neighbours(self, _word: str):
15+
return self.model.most_similar(_word)
16+
17+
def get_lemma(self, _word: str) -> tuple:
18+
result = self.morph.parse(_word)
19+
return result[0].normal_form, self.get_type_for_model(str(result[0].tag)[:4])
20+
21+
@staticmethod
22+
def get_type_for_model(_type: str) -> str:
23+
return config.types[_type]
24+
25+
26+
if __name__ == '__main__':
27+
comparer = Comparer()
28+
# for word in ['сестры', 'сделали', 'него', 'красиво', 'красивого',
29+
# 'каждый', 'себя', 'он',
30+
# 'три',
31+
# 'от', 'а', 'не', 'более', 'еще', 'менее',
32+
# 'увы']:
33+
for word in input().split():
34+
lemma = comparer.get_lemma(word)
35+
if lemma[1]:
36+
lemma = '_'.join(lemma)
37+
try:
38+
print(comparer.get_neighbours(lemma))
39+
except KeyError:
40+
print(f'No suggestions to word {lemma.split("_")[0]}')
41+
else:
42+
lemma = lemma[0]
43+
print(f'No suggestions to word {lemma}')
44+
print(lemma)
45+
# print([x for x in comparer.model.vocab.keys() if x.startswith(lemma.split('_')[0])])
46+
print('-------------------------------------')

0 commit comments

Comments
 (0)