Skip to content

Commit 85f8f33

Browse files
committed
documentation cleanup
1 parent 66feaf9 commit 85f8f33

6 files changed

Lines changed: 0 additions & 14 deletions

File tree

docs-site/content/docs/Documentation/core/Levenshtein.mdx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ The `Levenshtein` class offers methods to measure the difference between two str
154154
if len1 <= 3:
155155
return Levenshtein._simple_distance(seq1, seq2)
156156
157-
# Optimized algorithm with only two rows (memory efficient)
158157
# Instead of full matrix, we only keep previous and current row
159158
previous_row = list(range(len2 + 1))
160159
current_row = [0] * (len2 + 1)

docs-site/content/docs/Documentation/data/composedword.mdx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ The `ComposedWord` class stores and aggregates information about multi-word keyw
7373
self._h = 1.0
7474

7575
# Check if the candidate starts or ends with stopwords
76-
# Optimized: use truthiness instead of len() > 0
7776
if self._terms:
7877
self._start_or_end_stopwords = (
7978
self._terms[0].stopword or self._terms[-1].stopword
@@ -203,7 +202,6 @@ The class provides property accessors for backward compatibility:
203202
tf_used = self.tf
204203

205204
# For virtual candidates, use mean frequency of constituent terms
206-
# Optimized: use built-in sum/len instead of numpy for small lists
207205
if is_virtual:
208206
tfs = [term_obj.tf for term_obj in self.terms]
209207
tf_used = sum(tfs) / len(tfs) if tfs else 1.0
@@ -476,7 +474,6 @@ The class provides property accessors for backward compatibility:
476474
tf_used = self.tf
477475

478476
# For virtual candidates, use mean frequency of constituent terms
479-
# Optimized: use built-in sum/len instead of numpy for small lists
480477
if is_virtual:
481478
tfs = [term_obj.tf for term_obj in self.terms]
482479
tf_used = sum(tfs) / len(tfs) if tfs else 1.0

yake/core/Levenshtein.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ def distance(seq1: str, seq2: str) -> int:
111111
if len1 <= 3:
112112
return Levenshtein._simple_distance(seq1, seq2)
113113

114-
# Optimized algorithm with only two rows (memory efficient)
115114
previous_row = list(range(len2 + 1))
116115
current_row = [0] * (len2 + 1)
117116

yake/data/composed_word.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def __init__(self, terms: Optional[List[Tuple[str, str, Any]]]):
7070
self._h = 1.0
7171

7272
# Check if the candidate starts or ends with stopwords
73-
# Optimized: use truthiness instead of len() > 0
7473
if self._terms:
7574
self._start_or_end_stopwords = (
7675
self._terms[0].stopword or self._terms[-1].stopword
@@ -389,7 +388,6 @@ def update_h(self, features=None, is_virtual=False):
389388
tf_used = self.tf
390389

391390
# For virtual candidates, use mean frequency of constituent terms
392-
# Optimized: use built-in sum/len instead of numpy for small lists
393391
if is_virtual:
394392
tfs = [term_obj.tf for term_obj in self.terms]
395393
tf_used = sum(tfs) / len(tfs) if tfs else 1.0
@@ -450,7 +448,6 @@ def update_h_old(self, features=None, is_virtual=False):
450448
tf_used = self.tf
451449

452450
# For virtual candidates, use mean frequency of constituent terms
453-
# Optimized: use built-in sum/len instead of numpy for small lists
454451
if is_virtual:
455452
tfs = [term_obj.tf for term_obj in self.terms]
456453
tf_used = sum(tfs) / len(tfs) if tfs else 1.0

yake/data/core.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -247,10 +247,8 @@ def _process_sentence(
247247
# Process each word in the sentence
248248
for pos_sent, word in enumerate(sentence):
249249
# Check if the word is just punctuation (all characters are excluded)
250-
# Optimized: use all() instead of creating a list
251250
if all(c in self.exclude for c in word):
252251
# If we have a block of words, save it and start a new block
253-
# Optimized: use truthiness instead of len() > 0
254252
if block_of_word_obj:
255253
sentence_obj_aux.append(block_of_word_obj)
256254
block_of_word_obj = []
@@ -266,12 +264,10 @@ def _process_sentence(
266264
)
267265

268266
# Save any remaining word block
269-
# Optimized: use truthiness instead of len() > 0
270267
if block_of_word_obj:
271268
sentence_obj_aux.append(block_of_word_obj)
272269

273270
# Add processed sentence to collection if not empty
274-
# Optimized: use truthiness instead of len() > 0
275271
if sentence_obj_aux:
276272
self.sentences_obj.append(sentence_obj_aux)
277273

@@ -466,7 +462,6 @@ def build_single_terms_features(self, features: Optional[List[str]] = None) -> N
466462
valid_tfs = np.array([x.tf for x in valid_terms])
467463

468464
# Skip if no valid terms
469-
# Optimized: use 'not' instead of len() == 0
470465
if not valid_tfs.size:
471466
return
472467

yake/data/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ def get_tag(word: str, i: int, exclude: frozenset) -> str:
120120
return "d"
121121

122122
# Count character types for classification
123-
# Optimized: single pass through word instead of multiple
124123
cdigit = calpha = cexclude = 0
125124
for c in word:
126125
if c.isdigit():

0 commit comments

Comments
 (0)