Skip to content

Commit e216deb

Browse files
authored
removed self from utils
1 parent f21ec41 commit e216deb

1 file changed

Lines changed: 4 additions & 3 deletions

File tree

smallest/utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
1414
SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$')
15+
CHUNK_SIZE = 250
1516
SAMPLE_WIDTH = 2
1617
CHANNELS = 1
1718

@@ -59,7 +60,7 @@ def preprocess_text(text: str) -> str:
5960
text = mpn.normalize(text)
6061
return text.strip()
6162

62-
def split_into_chunks(self, text: str) -> List[str]:
63+
def split_into_chunks(text: str) -> List[str]:
6364
"""
6465
Splits the input text into chunks based on sentence boundaries
6566
defined by SENTENCE_END_REGEX and the maximum chunk size.
@@ -76,7 +77,7 @@ def split_into_chunks(self, text: str) -> List[str]:
7677
if SENTENCE_END_REGEX.match(current_chunk):
7778
last_break_index = i
7879

79-
if len(current_chunk) >= self.chunk_size:
80+
if len(current_chunk) >= CHUNK_SIZE:
8081
if last_break_index > 0:
8182
# Split at the last valid sentence boundary
8283
chunk = text[:last_break_index + 1].strip()
@@ -91,7 +92,7 @@ def split_into_chunks(self, text: str) -> List[str]:
9192
# No sentence boundary found, split at max length
9293
current_chunk = current_chunk.replace("—", " ")
9394
chunks.append(current_chunk.strip())
94-
text = text[self.chunk_size:]
95+
text = text[CHUNK_SIZE:]
9596
i = -1 # Reset index to process the remaining text
9697
current_chunk = ""
9798

0 commit comments

Comments
 (0)