diff --git a/unstructured/metrics/text_extraction.py b/unstructured/metrics/text_extraction.py
index 7153852305..1e6351a621 100644
--- a/unstructured/metrics/text_extraction.py
+++ b/unstructured/metrics/text_extraction.py
@@ -80,31 +80,23 @@ def bag_of_words(text: str) -> Dict[str, int]:
     Removes sentence punctuation, but not punctuation within a word (ex. apostrophes).
     """
     bow: Dict[str, int] = {}
-    incorrect_word: str = ""
     words = clean_bullets(remove_sentence_punctuation(text.lower(), ["-", "'"])).split()
 
-    i = 0
-    while i < len(words):
-        if len(words[i]) > 1:
-            if words[i] in bow:
-                bow[words[i]] += 1
+    n = len(words)
+    for i, w in enumerate(words):
+        if len(w) > 1:
+            if w in bow:
+                bow[w] += 1
             else:
-                bow[words[i]] = 1
-            i += 1
+                bow[w] = 1
         else:
-            j = i
-            incorrect_word = ""
-
-            while j < len(words) and len(words[j]) == 1:
-                incorrect_word += words[j]
-                j += 1
-
-            if len(incorrect_word) == 1 and words[i].isalnum():
-                if incorrect_word in bow:
-                    bow[incorrect_word] += 1
+            prev_single = i > 0 and len(words[i - 1]) == 1
+            next_single = i + 1 < n and len(words[i + 1]) == 1
+            if (not prev_single) and (not next_single) and w.isalnum():
+                if w in bow:
+                    bow[w] += 1
                 else:
-                    bow[incorrect_word] = 1
-            i = j
+                    bow[w] = 1
     return bow
 
 
@@ -139,12 +131,8 @@ def calculate_percent_missing_text(
 
     for source_word, source_count in source_bow.items():
         total_source_word_count += source_count
-        if source_word not in output_bow:
-            # entire count is missing
-            total_missing_word_count += source_count
-        else:
-            output_count = output_bow[source_word]
-            total_missing_word_count += max(source_count - output_count, 0)
+        output_count = output_bow.get(source_word, 0)
+        total_missing_word_count += max(source_count - output_count, 0)
 
     # calculate percent missing text
     if total_source_word_count == 0: