Skip to content

Commit 360e0c3

Browse files
committed
feat!: update general-sam to 1.0.0
1 parent 1f59cec commit 360e0c3

13 files changed

Lines changed: 107 additions & 102 deletions

Cargo.lock

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "general-sam-py"
3-
version = "0.7.0-post0"
3+
version = "1.0.0"
44
edition = "2021"
55
license = "MIT OR Apache-2.0"
66
description = "Python bindings for general-sam and some utilities"
@@ -15,5 +15,10 @@ crate-type = ["cdylib"]
1515

1616
[dependencies]
1717
either = "1.10.0"
18-
general-sam = { version = "0.7.0", features = ["all"] }
18+
general-sam = { version = "1.0.0", features = ["all"] }
1919
pyo3 = { version = "0.21.0", features = ["extension-module", "generate-import-lib", "abi3-py38"] }
20+
21+
[profile.release]
22+
lto = true
23+
strip = true
24+
opt-level = "z"

README.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,12 @@ pip install general-sam
3939

4040
## Usage
4141

42-
### `GeneralSAM`
42+
### `GeneralSam`
4343

4444
```python
45-
from general_sam import GeneralSAM
45+
from general_sam import GeneralSam
4646

47-
sam = GeneralSAM.from_bytes(b"abcbc")
47+
sam = GeneralSam.from_bytes(b"abcbc")
4848

4949
# "cbc" is a suffix of "abcbc"
5050
state = sam.get_root_state()
@@ -58,9 +58,9 @@ assert not state.is_accepting()
5858
```
5959

6060
```python
61-
from general_sam import GeneralSAM
61+
from general_sam import GeneralSam
6262

63-
sam = GeneralSAM.from_chars("abcbc")
63+
sam = GeneralSam.from_chars("abcbc")
6464
state = sam.get_root_state()
6565

6666
# "b" is not a suffix but at least a substring of "abcbc"
@@ -81,13 +81,13 @@ assert not state.is_accepting() and state.is_nil()
8181
```
8282

8383
```python
84-
from general_sam import GeneralSAM, GeneralSAMState, build_trie_from_chars
84+
from general_sam import GeneralSam, GeneralSamState, build_trie_from_chars
8585

8686
trie, _ = build_trie_from_chars(["hello", "Chielo"])
87-
sam = GeneralSAM.from_trie(trie)
87+
sam = GeneralSam.from_trie(trie)
8888

8989

90-
def fetch_state(s: str) -> GeneralSAMState:
90+
def fetch_state(s: str) -> GeneralSamState:
9191
state = sam.get_root_state()
9292
state.feed_chars(s)
9393
return state
@@ -193,7 +193,7 @@ assert state.is_nil()
193193
### `GreedyTokenizer`
194194

195195
```python
196-
from general_sam import GeneralSAM, GreedyTokenizer, build_trie_from_chars
196+
from general_sam import GeneralSam, GreedyTokenizer, build_trie_from_chars
197197

198198
vocab = ["a", "ab", "b", "bc", "c", "d", "e", "f", "cd", "abcde"]
199199
trie, token_to_trie_node = build_trie_from_chars(vocab)
@@ -202,7 +202,7 @@ trie_node_to_token = [-1] * trie.num_of_nodes()
202202
for i, j in enumerate(token_to_trie_node):
203203
trie_node_to_token[j] = i
204204

205-
sam = GeneralSAM.from_trie(trie)
205+
sam = GeneralSam.from_trie(trie)
206206
tokenizer = GreedyTokenizer.from_sam_and_trie(sam, trie)
207207

208208

general_sam/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from .general_sam import (
2-
GeneralSAM,
3-
GeneralSAMState,
2+
GeneralSam,
3+
GeneralSamState,
44
GreedyTokenizer,
55
Trie,
66
TrieNode,
@@ -20,8 +20,8 @@
2020
)
2121

2222
__all__ = [
23-
"GeneralSAM",
24-
"GeneralSAMState",
23+
"GeneralSam",
24+
"GeneralSamState",
2525
"GreedyTokenizer",
2626
"Trie",
2727
"TrieNode",

general_sam/general_sam.pyi

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ from typing import Callable, Mapping, Optional, Sequence, Tuple, Union
22

33
ByteOrChar = Union[str, int]
44
TrieNodeID = int
5-
GeneralSAMNodeID = int
5+
GeneralSamNodeID = int
66

77
NIL_NODE_ID = 0
88
ROOT_NODE_ID = 1
@@ -41,16 +41,16 @@ class Trie:
4141
root_node_id: Optional[TrieNodeID] = None,
4242
) -> TrieNode: ...
4343

44-
class GeneralSAMState:
44+
class GeneralSamState:
4545
def is_in_chars(self) -> bool: ...
4646
def is_in_bytes(self) -> bool: ...
47-
def get_node_id(self) -> GeneralSAMNodeID: ...
47+
def get_node_id(self) -> GeneralSamNodeID: ...
4848
def is_nil(self) -> bool: ...
4949
def is_root(self) -> bool: ...
5050
def is_accepting(self) -> bool: ...
51-
def get_trans(self) -> Mapping[ByteOrChar, GeneralSAMNodeID]: ...
52-
def get_suffix_parent_id(self) -> GeneralSAMNodeID: ...
53-
def copy(self) -> "GeneralSAMState": ...
51+
def get_trans(self) -> Mapping[ByteOrChar, GeneralSamNodeID]: ...
52+
def get_suffix_parent_id(self) -> GeneralSamNodeID: ...
53+
def clone(self) -> "GeneralSamState": ...
5454
def goto_suffix_parent(self) -> None: ...
5555
def goto_char(self, t: str) -> None: ...
5656
def goto_byte(self, t: int) -> None: ...
@@ -60,39 +60,39 @@ class GeneralSAMState:
6060
self,
6161
trie: Trie,
6262
in_stack_callback: Callable[
63-
["GeneralSAMState", TrieNodeID, Optional[ByteOrChar]], None
63+
["GeneralSamState", TrieNodeID, Optional[ByteOrChar]], None
6464
],
65-
out_stack_callback: Callable[["GeneralSAMState", TrieNodeID], None],
65+
out_stack_callback: Callable[["GeneralSamState", TrieNodeID], None],
6666
trie_node_id: Optional[TrieNodeID] = None,
6767
) -> TrieNode: ...
6868
def bfs_along(
6969
self,
7070
trie: Trie,
7171
in_queue_callback: Callable[
72-
["GeneralSAMState", TrieNodeID, Optional[ByteOrChar]], None
72+
["GeneralSamState", TrieNodeID, Optional[ByteOrChar]], None
7373
],
74-
out_queue_callback: Callable[["GeneralSAMState", TrieNodeID], None],
74+
out_queue_callback: Callable[["GeneralSamState", TrieNodeID], None],
7575
trie_node_id: Optional[TrieNodeID] = None,
7676
) -> TrieNode: ...
7777

78-
class GeneralSAM:
78+
class GeneralSam:
7979
@staticmethod
80-
def from_chars(s: str) -> "GeneralSAM": ...
80+
def from_chars(s: str) -> "GeneralSam": ...
8181
@staticmethod
82-
def from_bytes(s: bytes) -> "GeneralSAM": ...
82+
def from_bytes(s: bytes) -> "GeneralSam": ...
8383
@staticmethod
84-
def from_trie(trie: Trie) -> "GeneralSAM": ...
84+
def from_trie(trie: Trie) -> "GeneralSam": ...
8585
def is_in_chars(self) -> bool: ...
8686
def is_in_bytes(self) -> bool: ...
8787
def num_of_nodes(self) -> int: ...
88-
def get_root_state(self) -> GeneralSAMState: ...
89-
def get_state(self, node_id: GeneralSAMNodeID) -> GeneralSAMState: ...
90-
def get_topo_and_suf_len_sorted_states(self) -> Sequence[GeneralSAMState]: ...
88+
def get_root_state(self) -> GeneralSamState: ...
89+
def get_state(self, node_id: GeneralSamNodeID) -> GeneralSamState: ...
90+
def get_topo_and_suf_len_sorted_states(self) -> Sequence[GeneralSamState]: ...
9191

9292
class GreedyTokenizer:
9393
@staticmethod
94-
def from_sam_and_trie(sam: GeneralSAM, trie: Trie) -> "GreedyTokenizer": ...
95-
def get_sam(self) -> GeneralSAM: ...
94+
def from_sam_and_trie(sam: GeneralSam, trie: Trie) -> "GreedyTokenizer": ...
95+
def get_sam(self) -> GeneralSam: ...
9696
def is_in_chars(self) -> bool: ...
9797
def is_in_bytes(self) -> bool: ...
9898
def tokenize_str(

general_sam/vocab_prefix.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
cast,
1212
)
1313

14-
from .general_sam import GeneralSAM, GeneralSAMState, Trie
14+
from .general_sam import GeneralSam, GeneralSamState, Trie
1515
from .trie_utils import (
1616
CountInfo,
1717
SortResult,
@@ -63,14 +63,14 @@ def __init__(
6363
trie_builder(self.vocab_rev),
6464
)
6565

66-
self.sam_rev = GeneralSAM.from_trie(self.trie_rev)
66+
self.sam_rev = GeneralSam.from_trie(self.trie_rev)
6767
self._gen_cnt_info_in_sam()
6868

6969
@property
70-
def _state_feed_fn(self) -> Callable[[GeneralSAMState, Union[bytes, str]], None]:
70+
def _state_feed_fn(self) -> Callable[[GeneralSamState, Union[bytes, str]], None]:
7171
return {
72-
VocabPrefixBytesOrChars.BYTES: GeneralSAMState.feed_bytes,
73-
VocabPrefixBytesOrChars.CHARS: GeneralSAMState.feed_chars,
72+
VocabPrefixBytesOrChars.BYTES: GeneralSamState.feed_bytes,
73+
VocabPrefixBytesOrChars.CHARS: GeneralSamState.feed_chars,
7474
}[self.bytes_or_chars]
7575

7676
def _gen_cnt_info_in_sam(self):
@@ -127,11 +127,11 @@ def _gen_cnt_info_in_sam(self):
127127
assert link_cnt_info.tot_cnt_lower <= state_cnt_info.tot_cnt_lower
128128
assert link_cnt_info.tot_cnt_upper >= state_cnt_info.tot_cnt_upper
129129

130-
def get_root_state(self) -> GeneralSAMState:
130+
def get_root_state(self) -> GeneralSamState:
131131
return self.sam_rev.get_root_state()
132132

133133
def prepend_feed(
134-
self, state: GeneralSAMState, token: Union[str, bytes]
134+
self, state: GeneralSamState, token: Union[str, bytes]
135135
) -> Optional[CountInfo]:
136136
if self.bytes_or_chars == VocabPrefixBytesOrChars.BYTES and isinstance(
137137
token, str

src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ use pyo3::prelude::*;
99
fn general_sam(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
1010
m.add_class::<trie::TrieNode>()?;
1111
m.add_class::<trie::Trie>()?;
12-
m.add_class::<sam::GeneralSAMState>()?;
13-
m.add_class::<sam::GeneralSAM>()?;
12+
m.add_class::<sam::GeneralSamState>()?;
13+
m.add_class::<sam::GeneralSam>()?;
1414
m.add_class::<tokenizer::GreedyTokenizer>()?;
1515
Ok(())
1616
}

0 commit comments

Comments
 (0)