Skip to content

Commit e4f4029

Browse files
committed
[fel] update llama splitter tool meta
1 parent 3a0b9b1 commit e4f4029

7 files changed

Lines changed: 257 additions & 464 deletions

File tree

framework/fel/python/plugins/fel_llama_splitter_tools/callable_registers.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py

Lines changed: 14 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import traceback
77
from typing import Tuple, List, Any, Callable
88

9-
from fitframework import fit_logger
9+
from fitframework import fit_logger, fitable
1010
from llama_index.core.node_parser import (
1111
SentenceSplitter,
1212
TokenTextSplitter,
@@ -17,11 +17,11 @@
1717
from llama_index.core.schema import Document as LDocument
1818
from llama_index.embeddings.openai import OpenAIEmbedding
1919

20-
from .callable_registers import register_callable_tool
2120
from .node_utils import to_llama_index_document
21+
from .types.semantic_splitter_options import SemanticSplitterOptions
2222

23-
24-
def sentence_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int, **kwargs) -> List[str]:
23+
@fitable("llama.tools.sentence_splitter", "default")
24+
def sentence_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int) -> List[str]:
2525
"""Parse text with a preference for complete sentences."""
2626
if len(text) == 0:
2727
return []
@@ -38,7 +38,8 @@ def sentence_splitter(text: str, separator: str, chunk_size: int, chunk_overlap:
3838
return []
3939

4040

41-
def token_text_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int, **kwargs) -> List[str]:
41+
@fitable("llama.tools.token_text_splitter", "default")
42+
def token_text_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int) -> List[str]:
4243
"""Splitting text that looks at word tokens."""
4344
if len(text) == 0:
4445
return []
@@ -55,14 +56,15 @@ def token_text_splitter(text: str, separator: str, chunk_size: int, chunk_overla
5556
return []
5657

5758

58-
def semantic_splitter(buffer_size: int, breakpoint_percentile_threshold: int, docs: List[LDocument], **kwargs) \
59+
# @fitable("llama.tools.semantic_splitter", "default")
60+
def semantic_splitter(buffer_size: int, breakpoint_percentile_threshold: int, docs: List[LDocument], options: SemanticSplitterOptions) \
5961
-> List[BaseNode]:
6062
"""Splitting text that looks at word tokens."""
6163
if len(docs) == 0:
6264
return []
63-
api_key = kwargs.get("api_key")
64-
model_name = kwargs.get("model_name")
65-
api_base = kwargs.get("api_base")
65+
api_key = options.api_key
66+
model_name = options.model_name
67+
api_base = options.api_base
6668

6769
embed_model = OpenAIEmbedding(model_name=model_name, api_base=api_base, api_key=api_key, max_tokens=4096)
6870

@@ -80,8 +82,9 @@ def semantic_splitter(buffer_size: int, breakpoint_percentile_threshold: int, do
8082
return []
8183

8284

85+
# @fitable("llama.tools.sentence_window_node_parser", "default")
8386
def sentence_window_node_parser(window_size: int, window_metadata_key: str, original_text_metadata_key: str,
84-
docs: List[LDocument], **kwargs) -> List[BaseNode]:
87+
docs: List[LDocument]) -> List[BaseNode]:
8588
"""Splitting text that looks at word tokens."""
8689
if len(docs) == 0:
8790
return []
@@ -96,26 +99,4 @@ def sentence_window_node_parser(window_size: int, window_metadata_key: str, orig
9699
except BaseException:
97100
fit_logger.error("Invoke semantic splitter failed.")
98101
traceback.print_exc()
99-
return []
100-
101-
102-
# Tuple 结构: (tool_func, config_args, return_description)
103-
splitter_basic_toolkit: List[Tuple[Callable[..., Any], List[str], str]] = [
104-
(sentence_splitter, ["text", "separator", "chunk_size", "chunk_overlap"], "Split sentences by sentence."),
105-
(token_text_splitter, ["text", "separator", "chunk_size", "chunk_overlap"], "Split sentences by token."),
106-
(semantic_splitter,
107-
["docs", "buffer_size", "breakpoint_percentile_threshold", "chunk_overlap", "model_name", "api_key", "api_base"],
108-
"Split sentences by semantic."),
109-
(sentence_window_node_parser, ["docs", "window_size", "window_metadata_key", "original_text_metadata_key"],
110-
"Splits all documents into individual sentences")
111-
]
112-
113-
for tool in splitter_basic_toolkit:
114-
register_callable_tool(tool, sentence_splitter.__module__, "llama_index.rag.toolkit")
115-
116-
if __name__ == '__main__':
117-
import time
118-
from .llama_schema_helper import dump_llama_schema
119-
120-
current_timestamp = time.strftime('%Y%m%d%H%M%S')
121-
dump_llama_schema(splitter_basic_toolkit, f"./llama_tool_schema-{str(current_timestamp)}.json")
102+
return []

0 commit comments

Comments
 (0)