Skip to content

Commit fa7ca69

Browse files
committed
tidied code to be flake8 compliant
1 parent 920f0a3 commit fa7ca69

29 files changed

Lines changed: 1623 additions & 688 deletions

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ jobs:
4848
# stop the build if there are Python syntax errors or undefined names
4949
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
5050
# exit-zero treats all errors as warnings
51-
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
51+
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics
5252
5353
- name: Check code formatting with black
5454
run: |

corpus_module/corpus.py

Lines changed: 11 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,7 @@ def __init__(
5252
"""
5353
self.topdir = Path(topdir) if topdir else None
5454
if self.topdir and not self.topdir.is_dir():
55-
raise ValueError(
56-
f"AmiCorpus() requires valid directory {self.topdir}"
57-
)
55+
raise ValueError(f"AmiCorpus() requires valid directory {self.topdir}")
5856

5957
self.container_by_file = dict()
6058
# rootnode
@@ -154,9 +152,7 @@ def make_descendants(self, file: Optional[Union[str, Path]] = None):
154152
if file is None:
155153
file = self.root_dir
156154
if file is None or not Path(file).is_dir():
157-
logger.error(
158-
f"Cannot make file children for {file}"
159-
)
155+
logger.error(f"Cannot make file children for {file}")
160156
return
161157
files = self._get_children(file)
162158
for f in files:
@@ -269,9 +265,7 @@ def read_json_create_write_html_table(
269265

270266
# Write to file
271267
with open(outfile_h, "w", encoding="utf-8") as f:
272-
f.write(
273-
ET.tostring(htmlx, encoding="unicode", pretty_print=True)
274-
)
268+
f.write(ET.tostring(htmlx, encoding="unicode", pretty_print=True))
275269

276270
def list_files(self, globstr: str) -> List[Path]:
277271
"""
@@ -284,9 +278,7 @@ def list_files(self, globstr: str) -> List[Path]:
284278
List of matching file paths
285279
"""
286280
if globstr and self.root_dir:
287-
return self._posix_glob(
288-
str(self.root_dir / globstr), recursive=True
289-
)
281+
return self._posix_glob(str(self.root_dir / globstr), recursive=True)
290282
return []
291283

292284
def create_datatables_html_with_filenames(
@@ -358,9 +350,7 @@ def make_infiles(self, maxfiles: int = 999999999) -> List[Path]:
358350
List of file paths
359351
"""
360352
if self.globstr:
361-
self.infiles = self._posix_glob(
362-
self.globstr, recursive=True
363-
)[:maxfiles]
353+
self.infiles = self._posix_glob(self.globstr, recursive=True)[:maxfiles]
364354
return self.infiles
365355

366356
def _make_outfile(self):
@@ -424,23 +414,15 @@ def search_files_with_queries(
424414
for query_id in query_ids:
425415
query = self.corpus_queries.get(query_id)
426416
if query is None:
427-
err_msg = (
428-
"cannot find query: "
429-
+ str(query_id)
430-
)
417+
err_msg = "cannot find query: " + str(query_id)
431418
logger.error(err_msg)
432419
continue
433-
dbg_msg = (
434-
"outfile==> "
435-
+ str(query.outfile)
436-
)
420+
dbg_msg = "outfile==> " + str(query.outfile)
437421
logger.debug(dbg_msg)
438422

439423
# This would need to be implemented based on the search functionality
440424
# For now, we'll create a placeholder
441-
logger.info(
442-
f"Running query: {query_id}"
443-
)
425+
logger.info(f"Running query: {query_id}")
444426

445427
return html_by_query_id
446428

@@ -487,9 +469,7 @@ def __init__(
487469
exist_ok: Whether to allow existing directory
488470
"""
489471
if not isinstance(ami_corpus, AmiCorpus):
490-
raise ValueError(
491-
f"ami_corpus has wrong type {type(ami_corpus)}"
492-
)
472+
raise ValueError(f"ami_corpus has wrong type {type(ami_corpus)}")
493473

494474
self.ami_corpus = ami_corpus
495475
self.file = Path(file)
@@ -510,12 +490,8 @@ def child_containers(self) -> List["AmiCorpusContainer"]:
510490
if self.ami_corpus and self.file and self.file.is_dir():
511491
child_nodes = self.ami_corpus._get_children(self.file)
512492
for child_node in child_nodes:
513-
child_container = AmiCorpusContainer(
514-
self.ami_corpus, child_node
515-
)
516-
child_container.bib_type = (
517-
"" if child_node.is_dir() else "file"
518-
)
493+
child_container = AmiCorpusContainer(self.ami_corpus, child_node)
494+
child_container.bib_type = "" if child_node.is_dir() else "file"
519495
child_containers.append(child_container)
520496
return child_containers
521497

corpus_module/query.py

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,11 @@ def run_query_make_table_TUTORIAL(
8888
raise ValueError("No query_id given")
8989
query_id = query_id.strip()
9090
if " " in query_id:
91-
raise ValueError(
92-
f"no spaces allowed in query_id, found {query_id}"
93-
)
91+
raise ValueError(f"no spaces allowed in query_id, found {query_id}")
9492
self.query_id = query_id
9593

9694
if indir is None or not Path(indir).exists():
97-
logger.error(
98-
f"input directory must exist {indir}"
99-
)
95+
logger.error(f"input directory must exist {indir}")
10096
self.indir = Path(indir)
10197
self.outfile = Path(outfile) if outfile else None
10298
if not self.outfile:
@@ -112,10 +108,7 @@ def run_query_make_table_TUTORIAL(
112108

113109
# This would need to be implemented based on the search functionality
114110
# For now, we'll create a placeholder
115-
logger.info(
116-
f"Running query: {query_id} with phrases: "
117-
f"{self.phrases}"
118-
)
111+
logger.info(f"Running query: {query_id} with phrases: {self.phrases}")
119112

120113
# Create a simple HTML table as placeholder
121114
from datatables_module import Datatables
@@ -135,11 +128,7 @@ def run_query_make_table_TUTORIAL(
135128

136129
table_file = Path(outdir) / f"{self.query_id}_{TABLE_HITS_SUFFIX}"
137130
with open(table_file, "w", encoding="utf-8") as f:
138-
f.write(
139-
ET.tostring(
140-
htmlx, encoding="unicode", pretty_print=True
141-
)
142-
)
131+
f.write(ET.tostring(htmlx, encoding="unicode", pretty_print=True))
143132

144133
return htmlx, self.query_id
145134

@@ -172,14 +161,12 @@ def get_hits_as_term_ref_p_tuple_list(
172161
List of (term, ref, para) tuples
173162
"""
174163
if term_id_by_url is None:
175-
logger.error(f"term_id_by_url is None")
164+
logger.error("term_id_by_url is None")
176165
return None
177166

178167
trp_list = []
179168
for ref in term_id_by_url.keys():
180-
bits = ref.split("#")
181-
file = bits[0]
182-
idref = bits[1] if len(bits) > 1 else ""
169+
# bits = ref.split("#") # Unused, removed for F841
183170
term_p = term_id_by_url.get(ref)
184171
if term_p and len(term_p) >= 2:
185172
term = term_p[0]
@@ -201,7 +188,7 @@ def _add_hits_to_table(
201188
term_ref_p_tuple_list: List of (term, ref, para) tuples
202189
"""
203190
if term_ref_p_tuple_list is None:
204-
logger.error(f"term_ref_p_tuple_list is None")
191+
logger.error("term_ref_p_tuple_list is None")
205192
return
206193

207194
for term, ref, p in term_ref_p_tuple_list:

corpus_module/search.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def search_files_with_phrases_write_results(
7373
if outfile:
7474
outfile = Path(outfile)
7575
outfile.parent.mkdir(exist_ok=True, parents=True)
76-
with open(outfile, "w", encoding="UTF-8") as f:
76+
with open(outfile, "w", encoding="UTF-8"):
7777
if debug:
7878
print(f" hitdict {url_list_by_phrase_dict}")
7979
cls.write_html_file(html1, outfile, debug=True)
@@ -103,14 +103,12 @@ def search_paras_with_id_and_create_dict(
103103
List of paragraph elements
104104
"""
105105
infile_path = Path(infile)
106-
assert infile_path.exists(), (
107-
f"{infile} does not exist"
108-
)
106+
assert infile_path.exists(), f"{infile} does not exist"
109107

110108
try:
111109
html_tree = ET.parse(str(infile), HTMLParser())
112-
except Exception as e:
113-
logger.error(f"Error parsing {infile}: {e}")
110+
except Exception:
111+
logger.error(f"Error parsing {infile}")
114112
return []
115113

116114
paras = cls.find_paras_with_ids(html_tree, para_xpath=para_xpath)
@@ -121,9 +119,7 @@ def search_paras_with_id_and_create_dict(
121119
paras, phrases
122120
)
123121

124-
if para_id_by_phrase_dict is not None and len(
125-
para_id_by_phrase_dict
126-
) > 0:
122+
if para_id_by_phrase_dict is not None and len(para_id_by_phrase_dict) > 0:
127123
cls.add_hit_with_filename_and_para_id(
128124
all_hits_dict,
129125
url_list_by_phrase_dict,
@@ -206,18 +202,18 @@ def create_html_from_hit_dict(cls, hit_dict: Dict[str, List[str]]):
206202
ss = "ipcc/"
207203
try:
208204
idx = a.text.index(ss)
209-
except Exception as e:
210-
print(
211-
f"cannot find substring {ss} in {a.text}"
212-
)
205+
except Exception:
206+
print(f"cannot find substring {ss} in {a.text}")
213207
continue
214-
a.text = a.text[idx + len(ss):]
208+
a.text = a.text[idx + len(ss) :]
215209
a.attrib["href"] = hit
216210

217211
return html
218212

219213
@classmethod
220-
def find_paras_with_ids(cls, html_tree, para_xpath: Optional[str] = None) -> List[Any]:
214+
def find_paras_with_ids(
215+
cls, html_tree, para_xpath: Optional[str] = None
216+
) -> List[Any]:
221217
"""
222218
Find paragraphs with IDs in HTML tree.
223219
@@ -255,8 +251,8 @@ def create_search_results_para_phrase_dict(
255251
def create_html_with_empty_head_body():
256252
"""Create basic HTML document structure."""
257253
html = ET.Element("html")
258-
head = ET.SubElement(html, "head")
259-
body = ET.SubElement(html, "body")
254+
ET.SubElement(html, "head")
255+
ET.SubElement(html, "body")
260256
return html
261257

262258
@staticmethod
@@ -277,10 +273,10 @@ def write_html_file(htmlx, outfile: Union[str, Path], debug: bool = False):
277273
if debug:
278274
logger.info(f"writing HTML to {outfile}")
279275

280-
with open(outfile, "w", encoding="UTF-8") as f:
276+
with open(outfile, "w", encoding="UTF-8") as _f:
281277
text = ET.tostring(
282278
htmlx,
283279
encoding="unicode",
284280
pretty_print=True,
285281
)
286-
f.write(text)
282+
_f.write(text)

corpus_module/setup.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,7 @@
5151
"requests>=2.25.0",
5252
],
5353
},
54-
keywords=(
55-
"corpus, text-analysis, document-management, search, linguistics"
56-
),
54+
keywords=("corpus, text-analysis, document-management, search, linguistics"),
5755
project_urls={
5856
"Bug Reports": "https://github.com/amilib/corpus-module/issues",
5957
"Source": "https://github.com/amilib/corpus-module",

0 commit comments

Comments
 (0)