99# https://raw.githubusercontent.com/rrenaud/Gibberish-Detector/aa1d4e4555362b3dada97ebe6ecc23a84fc470fe/gib_detect_train.py
1010#
1111
12- import io
1312import math
1413import pickle
1514from pathlib import Path
@@ -32,11 +31,11 @@ def __init__(self):
3231 self .train ()
3332
3433 def persist_model (self ):
35- with io . open (model_path , mode = 'wb' ) as f :
34+ with open (model_path , mode = 'wb' ) as f :
3635 pickle .dump (vars (self ), f )
3736
3837 def load_persisted_model (self ):
39- with io . open (model_path , mode = 'rb' ) as f :
38+ with open (model_path , mode = 'rb' ) as f :
4039 persisted_model = pickle .load (f )
4140 for key , value in persisted_model .items ():
4241 setattr (self , key , value )
@@ -75,7 +74,7 @@ def train(self, bigfile=big_file_path, goodfile=good_file_path,
7574
7675 # Count transitions from big text file, taken
7776 # from http://norvig.com/spell-correct.html
78- for line in io . open (bigfile , encoding = 'utf-8' ):
77+ for line in open (bigfile , encoding = 'utf-8' ):
7978 for a , b in self .ngram (2 , line ):
8079 counts [pos [a ]][pos [b ]] += 1
8180
@@ -91,8 +90,8 @@ def train(self, bigfile=big_file_path, goodfile=good_file_path,
9190
9291 # Find the probability of generating a few arbitrarily choosen good and
9392 # bad phrases.
94- good_probs = [self .avg_transition_prob (l , counts ) for l in io . open (goodfile , encoding = 'utf-8' )]
95- bad_probs = [self .avg_transition_prob (l , counts ) for l in io . open (badfile , encoding = 'utf-8' )]
93+ good_probs = [self .avg_transition_prob (l , counts ) for l in open (goodfile , encoding = 'utf-8' )]
94+ bad_probs = [self .avg_transition_prob (l , counts ) for l in open (badfile , encoding = 'utf-8' )]
9695
9796 # Assert that we actually are capable of detecting the junk.
9897 assert min (good_probs ) > max (bad_probs )
0 commit comments