Skip to content

Commit ec8318a

Browse files
committed
feat: support knowledgebase local profile
1 parent a1d39cd commit ec8318a

File tree

6 files changed

+201
-81
lines changed

6 files changed

+201
-81
lines changed

veadk/agent.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from __future__ import annotations
1616

1717
import os
18-
from typing import Dict, Optional, Union, Literal
18+
from typing import Dict, Literal, Optional, Union
1919

2020
# If user didn't set LITELLM_LOCAL_MODEL_COST_MAP, set it to True
2121
# to enable local model cost map.
@@ -231,6 +231,13 @@ def model_post_init(self, __context: Any) -> None:
231231
)
232232
self.tools.append(load_knowledgebase_tool)
233233

234+
if self.knowledgebase.enable_profile:
235+
from veadk.tools.builtin_tools.load_kb_queries import (
236+
load_kb_queries,
237+
)
238+
239+
self.tools.append(load_kb_queries)
240+
234241
if self.long_term_memory is not None:
235242
from google.adk.tools import load_memory
236243

@@ -333,7 +340,11 @@ def load_skills(self):
333340
f"- name: {skill.name}\n- description: {skill.description}\n\n"
334341
)
335342

336-
if self.skills_mode not in ["skills_sandbox", "aio_sandbox", "local"]:
343+
if self.skills_mode not in [
344+
"skills_sandbox",
345+
"aio_sandbox",
346+
"local",
347+
]:
337348
raise ValueError(
338349
f"Unsupported skill mode {self.skills_mode}, use `skills_sandbox`, `aio_sandbox` or `local` instead."
339350
)

veadk/knowledgebase/knowledgebase.py

Lines changed: 65 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@
1414

1515
from __future__ import annotations
1616

17+
from pathlib import Path
1718
from typing import Any, Callable, Literal, Union
1819

1920
from pydantic import BaseModel, Field
2021

2122
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
2223
from veadk.knowledgebase.entry import KnowledgebaseEntry
24+
from veadk.knowledgebase.types import KnowledgebaseProfile
2325
from veadk.utils.logger import get_logger
2426

2527
logger = get_logger(__name__)
@@ -86,84 +88,6 @@ class KnowledgeBase(BaseModel):
8688
8789
Notes:
8890
Please ensure that you have set the embedding-related configurations in environment variables.
89-
90-
Examples:
91-
### Simple backend
92-
93-
Create a local knowledgebase:
94-
95-
```python
96-
from veadk import Agent, Runner
97-
from veadk.knowledgebase.knowledgebase import KnowledgeBase
98-
from veadk.memory.short_term_memory import ShortTermMemory
99-
100-
app_name = "veadk_playground_app"
101-
user_id = "veadk_playground_user"
102-
session_id = "veadk_playground_session"
103-
104-
105-
knowledgebase = KnowledgeBase(backend="opensearch", app_name=app_name)
106-
knowledgebase.add_from_files(files=[knowledgebase_file])
107-
108-
agent = Agent(knowledgebase=knowledgebase)
109-
110-
runner = Runner(
111-
agent=agent,
112-
short_term_memory=ShortTermMemory(),
113-
app_name=app_name,
114-
user_id=user_id,
115-
)
116-
117-
response = await runner.run(
118-
messages="Tell me the secret of green.", session_id=session_id
119-
)
120-
print(response)
121-
```
122-
123-
### Initialize knowledgebase with metadata
124-
125-
```python
126-
from veadk.knowledgebase import KnowledgeBase
127-
128-
knowledgebase = KnowledgeBase(
129-
name="user_data",
130-
description="A knowledgebase contains user hobbies.",
131-
index="my_app",
132-
)
133-
```
134-
135-
### Initialize knowledgebase with backend instance
136-
137-
```python
138-
import veadk.config # noqa
139-
140-
from veadk.knowledgebase import KnowledgeBase
141-
from veadk.knowledgebase.backends.in_memory_backend import InMemoryKnowledgeBackend
142-
143-
backend = InMemoryKnowledgeBackend(
144-
index="my_app",
145-
embedding_config=...,
146-
)
147-
148-
knowledgebase = KnowledgeBase(
149-
name="user_data",
150-
description="A knowledgebase contains user hobbies.",
151-
backend=backend,
152-
)
153-
```
154-
155-
### Initialize knowledgebase with backend config
156-
157-
```python
158-
from veadk.knowledgebase import KnowledgeBase
159-
160-
knowledgebase = KnowledgeBase(
161-
name="user_data",
162-
description="A knowledgebase contains user hobbies.",
163-
backend="local",
164-
backend_config={"index": "user_app"},
165-
)
166-
```
16791
"""
16892

16993
name: str = "user_knowledgebase"
@@ -183,6 +107,8 @@ class KnowledgeBase(BaseModel):
183107

184108
index: str = ""
185109

110+
enable_profile: bool = False
111+
186112
def model_post_init(self, __context: Any) -> None:
187113
if isinstance(self.backend, BaseKnowledgebaseBackend):
188114
self._backend = self.backend
@@ -312,3 +238,64 @@ def __getattr__(self, name) -> Callable:
312238
For example, knowledgebase.delete(...) -> self._backend.delete(...)
313239
"""
314240
return getattr(self._backend, name)
241+
242+
async def generate_profiles(self, files: list[str], profile_path: str = ""):
243+
"""Generate knowledgebase profiles.
244+
245+
Args:
246+
files (list[str]): The list of files.
247+
name (str): The name of the knowledgebase.
248+
profile_path (str, optional): The path to store the generated profiles. If empty, the profiles will be stored in a default path.
249+
250+
Returns:
251+
list[KnowledgebaseProfile]: A list of knowledgebase profiles.
252+
"""
253+
import json
254+
255+
from veadk import Agent, Runner
256+
from veadk.utils.misc import write_string_to_file
257+
258+
file_contents = [Path(file).read_text() for file in files]
259+
260+
agent = Agent(
261+
name="profile_generator",
262+
model_name="deepseek-v3-2-251201",
263+
# model_extra_config={
264+
# "extra_body": {"thinking": {"type": "disabled"}},
265+
# },
266+
description="A generator for generating knowledgebase profiles for the given files.",
267+
instruction='Generate JSON-formatted profile for the given file content. The corresponding language should be consistent with the file content. Respond ONLY with a JSON object containing the capitalized fields. Format: {"name": "", "description": "", "tags": [], "keywords": []} (3-5 tags, 3-5 keywords)',
268+
output_schema=KnowledgebaseProfile,
269+
)
270+
runner = Runner(agent=agent)
271+
272+
profiles = []
273+
for idx, file_content in enumerate(file_contents):
274+
response = await runner.run(
275+
messages="file content: " + file_content,
276+
session_id=f"profile_{idx}",
277+
)
278+
try:
279+
profiles.append(KnowledgebaseProfile(**json.loads(response)))
280+
except json.JSONDecodeError:
281+
logger.error(
282+
f"Failed to parse JSON response for file {files[idx]}: {response}. Skip for this file."
283+
)
284+
continue
285+
286+
logger.debug(f"Generated {len(profiles)} profiles: {profiles}.")
287+
288+
for idx, profile in enumerate(profiles):
289+
if not profile_path:
290+
profile_path = f"./profiles/knowledgebase/profiles_{self.index}"
291+
write_string_to_file(
292+
profile_path + f"/profile_{profile.name}.json",
293+
json.dumps(profile.model_dump(), indent=4, ensure_ascii=False),
294+
)
295+
296+
profile_names = [profile.name for profile in profiles]
297+
298+
write_string_to_file(
299+
profile_path + "/profile_list.json",
300+
json.dumps(profile_names, indent=4, ensure_ascii=False),
301+
)

veadk/knowledgebase/types.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from pydantic import BaseModel, Field
2+
3+
4+
class KnowledgebaseProfile(BaseModel):
5+
name: str = Field(description="The name of the knowledgebase.")
6+
7+
description: str = Field(description="The description of the knowledgebase.")
8+
9+
tags: list[str] = Field(
10+
description="Some tags of the knowledgebase. It represents the category of the knowledgebase. About 3-5 tags should be provided."
11+
)
12+
13+
keywords: list[str] = Field(
14+
description="Recommanded query keywords of the knowledgebase. About 3-5 keywords should be provided."
15+
)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import json
2+
from pathlib import Path
3+
4+
from google.adk.tools.tool_context import ToolContext
5+
6+
from veadk import Agent
7+
from veadk.utils.logger import get_logger
8+
9+
logger = get_logger(__name__)
10+
11+
12+
def load_profile(profile_path: Path) -> dict:
13+
# read file content
14+
with open(profile_path, "r") as f:
15+
content = f.read()
16+
return json.loads(content)
17+
18+
19+
def load_kb_queries(profile_names: list[str], tool_context: ToolContext) -> list[str]:
20+
"""Load recommanded knowledgebase queries based on the knowledgebase profiles.
21+
22+
Args:
23+
profile_names: The list of knowledgebase profile names to load the profile for.
24+
25+
Returns:
26+
A list of knowledgebase profile results.
27+
"""
28+
logger.info(f"Loading knowledgebase profiles: {profile_names}")
29+
30+
if not isinstance(tool_context._invocation_context.agent, Agent):
31+
logger.warning("Agent is not VeADK Agent, cannot load knowledgebase profile")
32+
return ["Error: Agent is not VeADK Agent, cannot load knowledgebase profile"]
33+
34+
if not tool_context._invocation_context.agent.knowledgebase:
35+
logger.warning("Agent has no knowledgebase, cannot load knowledgebase profile")
36+
return ["Error: Agent has no knowledgebase, cannot load knowledgebase profile"]
37+
38+
index = tool_context._invocation_context.agent.knowledgebase.index
39+
40+
recommanded_queries = []
41+
for profile_name in profile_names:
42+
profile_path = Path(
43+
f"./profiles/knowledgebase/profiles_{index}/profile_{profile_name}.json"
44+
)
45+
profile = load_profile(profile_path)
46+
recommanded_queries.extend(profile.get("keywords", []))
47+
logger.debug(
48+
f"Loaded keywords from profile {profile_name}: {profile.get('keywords', [])}"
49+
)
50+
logger.debug(
51+
f"Loaded total keywords for knowledgebase {index}: {recommanded_queries}"
52+
)
53+
return recommanded_queries

veadk/tools/builtin_tools/load_knowledgebase.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
from __future__ import annotations
1616

17+
import json
18+
1719
from google.adk.models.llm_request import LlmRequest
1820
from google.adk.tools.function_tool import FunctionTool
1921
from google.adk.tools.tool_context import ToolContext
@@ -23,6 +25,7 @@
2325

2426
from veadk.knowledgebase import KnowledgeBase
2527
from veadk.knowledgebase.entry import KnowledgebaseEntry
28+
from veadk.tools.builtin_tools.load_kb_queries import load_profile
2629
from veadk.utils.logger import get_logger
2730

2831
logger = get_logger(__name__)
@@ -70,16 +73,57 @@ async def process_llm_request(
7073
await super().process_llm_request(
7174
tool_context=tool_context, llm_request=llm_request
7275
)
76+
77+
index = self.knowledgebase.index
78+
if self.knowledgebase.enable_profile:
79+
from pathlib import Path
80+
81+
profile_names = []
82+
profile_descriptions = []
83+
84+
with open(
85+
f"./profiles/knowledgebase/profiles_{index}/profile_list.json",
86+
"r",
87+
) as f:
88+
profile_names = json.load(f)
89+
90+
for profile_name in profile_names:
91+
profile_descriptions.append(
92+
load_profile(
93+
Path(
94+
f"./profiles/knowledgebase/profiles_{index}/profile_{profile_name}.json"
95+
),
96+
)["description"]
97+
)
98+
99+
profiles_text = "\n".join(
100+
f"- profile_name: {name}\n profile_description: {profile_descriptions[idx]}"
101+
for idx, name in enumerate(profile_names)
102+
)
103+
73104
# Tell the model about the knowledgebase.
74105
llm_request.append_instructions(
75106
[
76107
f"""
77108
You have a knowledgebase (knowledegebase name is `{self.knowledgebase.name}`, knowledgebase description is `{self.knowledgebase.description}`). You can use it to answer questions. If any questions need
78109
you to look up the knowledgebase, you should call load_knowledgebase function with a query.
79110
"""
80-
]
111+
],
81112
)
82113

114+
if self.knowledgebase.enable_profile:
115+
llm_request.append_instructions(
116+
[
117+
f"""
118+
The knowledgebase is divided into the following profiles:
119+
120+
{profiles_text}
121+
122+
You should choose some profiles which are relevant to the user question. Before load the knowledgebase, you must call `load_kb_queries` to load the recommanded queries of the knowledgebase profiles. You should generate final knowledgebase queries based on the user question and recommanded queries.
123+
"""
124+
]
125+
)
126+
83127
async def load_knowledgebase(
84128
self, query: str, tool_context: ToolContext
85129
) -> LoadKnowledgebaseResponse:

veadk/utils/misc.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,13 @@ async def upload_to_files_api(
217217
max_wait_seconds=max_wait_seconds,
218218
)
219219
return file.id
220+
221+
222+
def write_string_to_file(file_path: str, content: str):
223+
dir_path = os.path.dirname(file_path)
224+
225+
if dir_path:
226+
os.makedirs(dir_path, exist_ok=True)
227+
228+
with open(file_path, "w", encoding="utf-8") as f:
229+
f.write(content)

0 commit comments

Comments
 (0)