-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcorrection.py
More file actions
87 lines (70 loc) · 3.26 KB
/
correction.py
File metadata and controls
87 lines (70 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
'''
Context Spell Checker
Copyright (C) 2017
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
'''
#wraps single or several text files into words_per_query words blocks, query them to yandex and if it
#suggests a spell correction (like google's "did you mean"), then the original phrase is
#subtituted by the given suggestion. Otherwise, the phrase reamins the same in the file.
from lib import spellcheck, stringenhance, wrapper
from temp import dotwrapper
import glob
import time
import sys
#set unicode as default encoding
reload(sys)
sys.setdefaultencoding('utf8')
def printRuntime(corrections_file,runtime,total_queries,yandex_suggestions):
#write runtime statistics in the corrections file
with open (corrections_file, 'a') as f:
f.write ("Runtime:"+str(runtime))
f.write('\n')
f.write ("Queries sent to Yandex:"+str(total_queries))
f.write('\n')
f.write ("Corrected Queries:"+str(yandex_suggestions))
initial_time=time.time()
total_queries=0
yandex_suggestions=0
corrections_file="yandex_suggestions.txt"
#text will be chopped into block with words_per_query words
words_per_query=4
#for every file specified in the terminal input
for filename in sys.argv[1:]:
corrected_queries=[]
#wrap file into several phrases
#queries_list=wrapper.textWrap(filename,words_per_query)
queries_list=dotwrapper.textWrap(filename)
total_queries=total_queries+len(queries_list)
#checks every query online and substitutes the former phrases for yandex's suggestions, if any
for i in range (0,len(queries_list)):
original_query=queries_list[i]
#makes query ready for Yandex's search
queries_list[i]=stringenhance.enhanceQuery(queries_list[i])
#consult Yandex API for spell checking
corrected_queries.append(spellcheck.spellCheck(queries_list[i]))
#if Yandex suggested any spell corrections regarding the original query
if (corrected_queries[i]!=-1):
yandex_suggestions=yandex_suggestions+1
#write the corrected phrase in the corrections files
with open (corrections_file, 'a') as f:
f.write(str(filename)+'\n'+str(original_query)+'\n'+str(queries_list[i])+'\n'+str(corrected_queries[i])+'\n\n')
#write the corrected phrase in the corrected version of the file that is being spell checked
with open (filename+".corrected", 'a') as file:
file.write(corrected_queries[i])
else:
#as there were no corrections to be done in the phrase, write it in the corrected version of the file that is being spell checked
original_query=stringenhance.enhanceOriginalQuery(original_query)
with open (filename+".corrected", 'a') as file:
file.write(original_query)
print(filename+" corrected.\n")
elapsed_time=time.time()-initial_time
printRuntime(corrections_file,elapsed_time,total_queries,yandex_suggestions)