Skip to content

Commit b1410eb

Browse files
committed
fix: clarify documentation examples
1 parent 28e5add commit b1410eb

1 file changed

Lines changed: 26 additions & 26 deletions

File tree

README.md

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,63 +30,63 @@ maturin develop --release
3030
## Usage
3131

3232
```python
33-
import unicode_segmentation_py as us
33+
import unicode_segmentation_rs
3434

3535
# Grapheme clusters (user-perceived characters)
3636
text = "Hello 👨‍👩‍👧‍👦 World"
37-
clusters = us.graphemes(text, is_extended=True)
37+
clusters = unicode_segmentation_py.graphemes(text, is_extended=True)
3838
print(clusters) # ['H', 'e', 'l', 'l', 'o', ' ', '👨‍👩‍👧‍👦', ' ', 'W', 'o', 'r', 'l', 'd']
3939

4040
# Get grapheme clusters with their byte indices
41-
indices = us.grapheme_indices(text, is_extended=True)
41+
indices = unicode_segmentation_py.grapheme_indices(text, is_extended=True)
4242
print(indices) # [(0, 'H'), (1, 'e'), ...]
4343

4444
# Word boundaries (includes punctuation and whitespace)
4545
text = "Hello, world!"
46-
words = us.split_word_bounds(text)
46+
words = unicode_segmentation_py.split_word_bounds(text)
4747
print(words) # ['Hello', ',', ' ', 'world', '!']
4848

4949
# Unicode words (excludes punctuation and whitespace)
50-
words = us.unicode_words(text)
50+
words = unicode_segmentation_py.unicode_words(text)
5151
print(words) # ['Hello', 'world']
5252

5353
# Word indices
54-
indices = us.split_word_bound_indices(text)
54+
indices = unicode_segmentation_py.split_word_bound_indices(text)
5555
print(indices) # [(0, 'Hello'), (5, ','), ...]
5656

5757
# Sentence segmentation
5858
text = "Hello world. How are you? I'm fine."
59-
sentences = us.unicode_sentences(text)
59+
sentences = unicode_segmentation_py.unicode_sentences(text)
6060
print(sentences) # ['Hello world. ', 'How are you? ', "I'm fine."]
6161

6262
# Display width calculation
6363
text = "Hello 世界"
64-
width = us.text_width(text)
64+
width = unicode_segmentation_py.text_width(text)
6565
print(width) # 10 (Hello=5, space=1, 世=2, 界=2, but depends on terminal)
6666

6767
# Character width
68-
print(us.text_width('A')) # Some(1)
69-
print(us.text_width('')) # Some(2)
70-
print(us.text_width('\t')) # None (control character)
68+
print(unicode_segmentation_py.text_width('A')) # Some(1)
69+
print(unicode_segmentation_py.text_width('')) # Some(2)
70+
print(unicode_segmentation_py.text_width('\t')) # None (control character)
7171
```
7272

7373
## Examples
7474

7575
### Grapheme Cluster Segmentation
7676

7777
```python
78-
import unicode_segmentation_py as us
78+
import unicode_segmentation_rs
7979

8080
# Complex emojis and combining characters
8181
text = "Hello 👨‍👩‍👧‍👦 नमस्ते"
8282
print(f"Text: {text}")
83-
print(f"Graphemes: {us.graphemes(text, is_extended=True)}")
84-
print(f"Length (graphemes): {len(us.graphemes(text, is_extended=True))}")
83+
print(f"Graphemes: {unicode_segmentation_py.graphemes(text, is_extended=True)}")
84+
print(f"Length (graphemes): {len(unicode_segmentation_py.graphemes(text, is_extended=True))}")
8585
print(f"Length (chars): {len(text)}")
8686

8787
# With indices
8888
print("Grapheme indices:")
89-
for idx, cluster in us.grapheme_indices(text, is_extended=True):
89+
for idx, cluster in unicode_segmentation_py.grapheme_indices(text, is_extended=True):
9090
print(f" {idx:3d}: {cluster!r}")
9191
```
9292

@@ -95,12 +95,12 @@ for idx, cluster in us.grapheme_indices(text, is_extended=True):
9595
```python
9696
text = "Hello, world! How are you?"
9797
print(f"Text: {text}")
98-
print(f"Word bounds: {us.split_word_bounds(text)}")
99-
print(f"Unicode words: {us.unicode_words(text)}")
98+
print(f"Word bounds: {unicode_segmentation_py.split_word_bounds(text)}")
99+
print(f"Unicode words: {unicode_segmentation_py.unicode_words(text)}")
100100

101101
# With indices
102102
print("Word boundary indices:")
103-
for idx, word in us.split_word_bound_indices(text):
103+
for idx, word in unicode_segmentation_py.split_word_bound_indices(text):
104104
print(f" {idx:3d}: {word!r}")
105105
```
106106

@@ -109,7 +109,7 @@ for idx, word in us.split_word_bound_indices(text):
109109
```python
110110
text = "Hello world. How are you? I'm fine, thanks! What about you?"
111111
print(f"Text: {text}")
112-
sentences = us.unicode_sentences(text)
112+
sentences = unicode_segmentation_py.unicode_sentences(text)
113113
print("Sentences:")
114114
for i, sentence in enumerate(sentences, 1):
115115
print(f" {i}. {sentence!r}")
@@ -121,17 +121,17 @@ for i, sentence in enumerate(sentences, 1):
121121
# Arabic
122122
arabic = "مرحبا بك. كيف حالك؟"
123123
print(f"Arabic: {arabic}")
124-
print(f"Sentences: {us.unicode_sentences(arabic)}")
124+
print(f"Sentences: {unicode_segmentation_py.unicode_sentences(arabic)}")
125125

126126
# Japanese
127127
japanese = "こんにちは。お元気ですか?"
128128
print(f"Japanese: {japanese}")
129-
print(f"Sentences: {us.unicode_sentences(japanese)}")
129+
print(f"Sentences: {unicode_segmentation_py.unicode_sentences(japanese)}")
130130

131131
# Mixed languages
132132
mixed = "Hello世界! This is a test文章."
133133
print(f"Mixed: {mixed}")
134-
print(f"Words: {us.unicode_words(mixed)}")
134+
print(f"Words: {unicode_segmentation_py.unicode_words(mixed)}")
135135
```
136136

137137
### Display Width Calculation
@@ -147,15 +147,15 @@ examples = [
147147
]
148148

149149
for text in examples:
150-
width = us.text_width(text)
151-
width_cjk = us.text_width_cjk(text)
150+
width = unicode_segmentation_py.text_width(text)
151+
width_cjk = unicode_segmentation_py.text_width_cjk(text)
152152
print(f"Text: {text!r:20} Width: {width:2} CJK: {width_cjk:2} Chars: {len(text):2}")
153153

154154
# Character widths
155155
chars = ['a', 'A', '1', ' ', '', '', '', '🎉', '\t', '\n']
156156
for c in chars:
157-
w = us.text_width(c)
158-
w_cjk = us.text_width_cjk(c)
157+
w = unicode_segmentation_py.text_width(c)
158+
w_cjk = unicode_segmentation_py.text_width_cjk(c)
159159
w_str = str(w) if w is not None else "None"
160160
w_cjk_str = str(w_cjk) if w_cjk is not None else "None"
161161
print(f" {c!r:6} width: {w_str:4} cjk: {w_cjk_str:4}")

0 commit comments

Comments
 (0)