feat: add type annotations

nijel · nijel · commit 0fb8cd171b56 · 2025-11-28T08:33:16.000+01:00
Add type annotations and clarify some docstrings.
diff --git a/src/lib.rs b/src/lib.rs
@@ -23,13 +23,13 @@ mod unicode_segmentation_rs {
             .collect())
     }
 
-    /// Split a string into words
+    /// Split a string at word boundaries (includes punctuation and whitespace).
     #[pyfunction]
     fn split_word_bounds(text: &str) -> PyResult<Vec<String>> {
         Ok(text.split_word_bounds().map(|s| s.to_string()).collect())
     }
 
-    /// Split a string into word indices
+    /// Split a string at word boundaries with byte indices.
     #[pyfunction]
     fn split_word_bound_indices(text: &str) -> PyResult<Vec<(usize, String)>> {
         Ok(text
@@ -38,13 +38,13 @@ mod unicode_segmentation_rs {
             .collect())
     }
 
-    /// Get Unicode words (excluding punctuation and whitespace)
+    /// Get Unicode words from a string (excludes punctuation and whitespace).
     #[pyfunction]
     fn unicode_words(text: &str) -> PyResult<Vec<String>> {
         Ok(text.unicode_words().map(|s| s.to_string()).collect())
     }
 
-    /// Split a string into sentences
+    /// Split a string at word boundaries (includes punctuation and whitespace).
     #[pyfunction]
     fn unicode_sentences(text: &str) -> PyResult<Vec<String>> {
         Ok(text.unicode_sentences().map(|s| s.to_string()).collect())
diff --git a/unicode_segmentation_rs.pyi b/unicode_segmentation_rs.pyi
@@ -0,0 +1,29 @@
+# Copyright © Michal Čihař <michal@weblate.org>
+#
+# SPDX-License-Identifier: MIT
+"""
+Type stubs for unicode-segmentation-rs
+
+This module provides Unicode text segmentation and width calculation.
+"""
+
+def graphemes(text: str, is_extended: bool) -> list[str]:
+    ...
+
+def grapheme_indices(text: str, is_extended: bool) -> list[tuple[int, str]]:
+    ...
+
+def split_word_bounds(text: str) -> list[str]:
+    ...
+
+def split_word_bound_indices(text: str) -> list[tuple[int, str]]:
+    ...
+
+def unicode_words(text: str) -> list[str]:
+    ...
+
+def unicode_sentences(text: str) -> list[str]:
+    ...
+
+def text_width(text: str) -> int:
+    ...