Skip to content

Commit 5bc7fca

Browse files
feat: support knowledgebase local profile (#453)
* feat: support knowledgebase local profile * add file header * add line length with 80 * add line length with 80 * remove length * add log
1 parent a1d39cd commit 5bc7fca

File tree

7 files changed

+233
-82
lines changed

7 files changed

+233
-82
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,4 @@ include-package-data = true
9797
exclude = [
9898
"veadk/integrations/ve_faas/template/*",
9999
"veadk/integrations/ve_faas/web_template/*"
100-
]
100+
]

veadk/agent.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from __future__ import annotations
1616

1717
import os
18-
from typing import Dict, Optional, Union, Literal
18+
from typing import Dict, Literal, Optional, Union
1919

2020
# If user didn't set LITELLM_LOCAL_MODEL_COST_MAP, set it to True
2121
# to enable local model cost map.
@@ -231,6 +231,16 @@ def model_post_init(self, __context: Any) -> None:
231231
)
232232
self.tools.append(load_knowledgebase_tool)
233233

234+
if self.knowledgebase.enable_profile:
235+
logger.debug(
236+
f"Knowledgebase {self.knowledgebase.index} profile enabled"
237+
)
238+
from veadk.tools.builtin_tools.load_kb_queries import (
239+
load_kb_queries,
240+
)
241+
242+
self.tools.append(load_kb_queries)
243+
234244
if self.long_term_memory is not None:
235245
from google.adk.tools import load_memory
236246

@@ -333,7 +343,11 @@ def load_skills(self):
333343
f"- name: {skill.name}\n- description: {skill.description}\n\n"
334344
)
335345

336-
if self.skills_mode not in ["skills_sandbox", "aio_sandbox", "local"]:
346+
if self.skills_mode not in [
347+
"skills_sandbox",
348+
"aio_sandbox",
349+
"local",
350+
]:
337351
raise ValueError(
338352
f"Unsupported skill mode {self.skills_mode}, use `skills_sandbox`, `aio_sandbox` or `local` instead."
339353
)

veadk/knowledgebase/knowledgebase.py

Lines changed: 65 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
2222
from veadk.knowledgebase.entry import KnowledgebaseEntry
23+
from veadk.knowledgebase.types import KnowledgebaseProfile
2324
from veadk.utils.logger import get_logger
2425

2526
logger = get_logger(__name__)
@@ -86,84 +87,6 @@ class KnowledgeBase(BaseModel):
8687
8788
Notes:
8889
Please ensure that you have set the embedding-related configurations in environment variables.
89-
90-
Examples:
91-
### Simple backend
92-
93-
Create a local knowledgebase:
94-
95-
```python
96-
from veadk import Agent, Runner
97-
from veadk.knowledgebase.knowledgebase import KnowledgeBase
98-
from veadk.memory.short_term_memory import ShortTermMemory
99-
100-
app_name = "veadk_playground_app"
101-
user_id = "veadk_playground_user"
102-
session_id = "veadk_playground_session"
103-
104-
105-
knowledgebase = KnowledgeBase(backend="opensearch", app_name=app_name)
106-
knowledgebase.add_from_files(files=[knowledgebase_file])
107-
108-
agent = Agent(knowledgebase=knowledgebase)
109-
110-
runner = Runner(
111-
agent=agent,
112-
short_term_memory=ShortTermMemory(),
113-
app_name=app_name,
114-
user_id=user_id,
115-
)
116-
117-
response = await runner.run(
118-
messages="Tell me the secret of green.", session_id=session_id
119-
)
120-
print(response)
121-
```
122-
123-
### Initialize knowledgebase with metadata
124-
125-
```python
126-
from veadk.knowledgebase import KnowledgeBase
127-
128-
knowledgebase = KnowledgeBase(
129-
name="user_data",
130-
description="A knowledgebase contains user hobbies.",
131-
index="my_app",
132-
)
133-
```
134-
135-
### Initialize knowledgebase with backend instance
136-
137-
```python
138-
import veadk.config # noqa
139-
140-
from veadk.knowledgebase import KnowledgeBase
141-
from veadk.knowledgebase.backends.in_memory_backend import InMemoryKnowledgeBackend
142-
143-
backend = InMemoryKnowledgeBackend(
144-
index="my_app",
145-
embedding_config=...,
146-
)
147-
148-
knowledgebase = KnowledgeBase(
149-
name="user_data",
150-
description="A knowledgebase contains user hobbies.",
151-
backend=backend,
152-
)
153-
```
154-
155-
### Initialize knowledgebase with backend config
156-
157-
```python
158-
from veadk.knowledgebase import KnowledgeBase
159-
160-
knowledgebase = KnowledgeBase(
161-
name="user_data",
162-
description="A knowledgebase contains user hobbies.",
163-
backend="local",
164-
backend_config={"index": "user_app"},
165-
)
166-
```
16790
"""
16891

16992
name: str = "user_knowledgebase"
@@ -183,6 +106,8 @@ class KnowledgeBase(BaseModel):
183106

184107
index: str = ""
185108

109+
enable_profile: bool = False
110+
186111
def model_post_init(self, __context: Any) -> None:
187112
if isinstance(self.backend, BaseKnowledgebaseBackend):
188113
self._backend = self.backend
@@ -312,3 +237,65 @@ def __getattr__(self, name) -> Callable:
312237
For example, knowledgebase.delete(...) -> self._backend.delete(...)
313238
"""
314239
return getattr(self._backend, name)
240+
241+
async def generate_profiles(self, files: list[str], profile_path: str = ""):
242+
"""Generate knowledgebase profiles.
243+
244+
Args:
245+
files (list[str]): The list of files.
246+
name (str): The name of the knowledgebase.
247+
profile_path (str, optional): The path to store the generated profiles. If empty, the profiles will be stored in a default path.
248+
249+
Returns:
250+
list[KnowledgebaseProfile]: A list of knowledgebase profiles.
251+
"""
252+
import json
253+
from pathlib import Path
254+
255+
from veadk import Agent, Runner
256+
from veadk.utils.misc import write_string_to_file
257+
258+
file_contents = [Path(file).read_text() for file in files]
259+
260+
agent = Agent(
261+
name="profile_generator",
262+
model_name="deepseek-v3-2-251201",
263+
# model_extra_config={
264+
# "extra_body": {"thinking": {"type": "disabled"}},
265+
# },
266+
description="A generator for generating knowledgebase profiles for the given files.",
267+
instruction='Generate JSON-formatted profile for the given file content. The corresponding language should be consistent with the file content. Respond ONLY with a JSON object containing the capitalized fields. Format: {"name": "", "description": "", "tags": [], "keywords": []} (3-5 tags, 3-5 keywords)',
268+
output_schema=KnowledgebaseProfile,
269+
)
270+
runner = Runner(agent=agent)
271+
272+
profiles = []
273+
for idx, file_content in enumerate(file_contents):
274+
response = await runner.run(
275+
messages="file content: " + file_content,
276+
session_id=f"profile_{idx}",
277+
)
278+
try:
279+
profiles.append(KnowledgebaseProfile(**json.loads(response)))
280+
except json.JSONDecodeError:
281+
logger.error(
282+
f"Failed to parse JSON response for file {files[idx]}: {response}. Skip for this file."
283+
)
284+
continue
285+
286+
logger.debug(f"Generated {len(profiles)} profiles: {profiles}.")
287+
288+
for idx, profile in enumerate(profiles):
289+
if not profile_path:
290+
profile_path = f"./profiles/knowledgebase/profiles_{self.index}"
291+
write_string_to_file(
292+
profile_path + f"/profile_{profile.name}.json",
293+
json.dumps(profile.model_dump(), indent=4, ensure_ascii=False),
294+
)
295+
296+
profile_names = [profile.name for profile in profiles]
297+
298+
write_string_to_file(
299+
profile_path + "/profile_list.json",
300+
json.dumps(profile_names, indent=4, ensure_ascii=False),
301+
)

veadk/knowledgebase/types.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from pydantic import BaseModel, Field
16+
17+
18+
class KnowledgebaseProfile(BaseModel):
19+
name: str = Field(description="The name of the knowledgebase.")
20+
21+
description: str = Field(description="The description of the knowledgebase.")
22+
23+
tags: list[str] = Field(
24+
description="Some tags of the knowledgebase. It represents the category of the knowledgebase. About 3-5 tags should be provided."
25+
)
26+
27+
keywords: list[str] = Field(
28+
description="Recommanded query keywords of the knowledgebase. About 3-5 keywords should be provided."
29+
)
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import json
16+
from pathlib import Path
17+
18+
from google.adk.tools.tool_context import ToolContext
19+
20+
from veadk import Agent
21+
from veadk.utils.logger import get_logger
22+
23+
logger = get_logger(__name__)
24+
25+
26+
def load_profile(profile_path: Path) -> dict:
27+
# read file content
28+
with open(profile_path, "r") as f:
29+
content = f.read()
30+
return json.loads(content)
31+
32+
33+
def load_kb_queries(profile_names: list[str], tool_context: ToolContext) -> list[str]:
34+
"""Load recommanded knowledgebase queries based on the knowledgebase profiles.
35+
36+
Args:
37+
profile_names: The list of knowledgebase profile names to load the profile for.
38+
39+
Returns:
40+
A list of knowledgebase profile results.
41+
"""
42+
logger.info(f"Loading knowledgebase profiles: {profile_names}")
43+
44+
if not isinstance(tool_context._invocation_context.agent, Agent):
45+
logger.warning("Agent is not VeADK Agent, cannot load knowledgebase profile")
46+
return ["Error: Agent is not VeADK Agent, cannot load knowledgebase profile"]
47+
48+
if not tool_context._invocation_context.agent.knowledgebase:
49+
logger.warning("Agent has no knowledgebase, cannot load knowledgebase profile")
50+
return ["Error: Agent has no knowledgebase, cannot load knowledgebase profile"]
51+
52+
index = tool_context._invocation_context.agent.knowledgebase.index
53+
54+
recommanded_queries = []
55+
for profile_name in profile_names:
56+
profile_path = Path(
57+
f"./profiles/knowledgebase/profiles_{index}/profile_{profile_name}.json"
58+
)
59+
profile = load_profile(profile_path)
60+
recommanded_queries.extend(profile.get("keywords", []))
61+
logger.debug(
62+
f"Loaded keywords from profile {profile_name}: {profile.get('keywords', [])}"
63+
)
64+
logger.debug(
65+
f"Loaded total keywords for knowledgebase {index}: {recommanded_queries}"
66+
)
67+
return recommanded_queries

veadk/tools/builtin_tools/load_knowledgebase.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
from __future__ import annotations
1616

17+
import json
18+
1719
from google.adk.models.llm_request import LlmRequest
1820
from google.adk.tools.function_tool import FunctionTool
1921
from google.adk.tools.tool_context import ToolContext
@@ -23,6 +25,7 @@
2325

2426
from veadk.knowledgebase import KnowledgeBase
2527
from veadk.knowledgebase.entry import KnowledgebaseEntry
28+
from veadk.tools.builtin_tools.load_kb_queries import load_profile
2629
from veadk.utils.logger import get_logger
2730

2831
logger = get_logger(__name__)
@@ -70,16 +73,57 @@ async def process_llm_request(
7073
await super().process_llm_request(
7174
tool_context=tool_context, llm_request=llm_request
7275
)
76+
77+
index = self.knowledgebase.index
78+
if self.knowledgebase.enable_profile:
79+
from pathlib import Path
80+
81+
profile_names = []
82+
profile_descriptions = []
83+
84+
with open(
85+
f"./profiles/knowledgebase/profiles_{index}/profile_list.json",
86+
"r",
87+
) as f:
88+
profile_names = json.load(f)
89+
90+
for profile_name in profile_names:
91+
profile_descriptions.append(
92+
load_profile(
93+
Path(
94+
f"./profiles/knowledgebase/profiles_{index}/profile_{profile_name}.json"
95+
),
96+
)["description"]
97+
)
98+
99+
profiles_text = "\n".join(
100+
f"- profile_name: {name}\n profile_description: {profile_descriptions[idx]}"
101+
for idx, name in enumerate(profile_names)
102+
)
103+
73104
# Tell the model about the knowledgebase.
74105
llm_request.append_instructions(
75106
[
76107
f"""
77108
You have a knowledgebase (knowledegebase name is `{self.knowledgebase.name}`, knowledgebase description is `{self.knowledgebase.description}`). You can use it to answer questions. If any questions need
78109
you to look up the knowledgebase, you should call load_knowledgebase function with a query.
79110
"""
80-
]
111+
],
81112
)
82113

114+
if self.knowledgebase.enable_profile:
115+
llm_request.append_instructions(
116+
[
117+
f"""
118+
The knowledgebase is divided into the following profiles:
119+
120+
{profiles_text}
121+
122+
You should choose some profiles which are relevant to the user question. Before load the knowledgebase, you must call `load_kb_queries` to load the recommanded queries of the knowledgebase profiles. You should generate final knowledgebase queries based on the user question and recommanded queries.
123+
"""
124+
]
125+
)
126+
83127
async def load_knowledgebase(
84128
self, query: str, tool_context: ToolContext
85129
) -> LoadKnowledgebaseResponse:

0 commit comments

Comments
 (0)