Skip to content

Commit 0533daf

Browse files
authored
feat: support ark embedding (#449)
* fix: vision embedding * feat: openai like embedding to ark embedding factory * feat: header and consts * fix: comment
1 parent 132edeb commit 0533daf

File tree

11 files changed

+327
-19
lines changed

11 files changed

+327
-19
lines changed

config.yaml.full

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ model:
1515
api_key:
1616
# [optional] for knowledgebase
1717
embedding:
18-
name: doubao-embedding-text-240715
19-
dim: 2560
18+
name: doubao-embedding-vision-250615
19+
dim: 2048
2020
api_base: https://ark.cn-beijing.volces.com/api/v3/
2121
api_key:
2222
video:

veadk/configs/model_configs.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
DEFAULT_MODEL_AGENT_API_BASE,
2424
DEFAULT_MODEL_AGENT_NAME,
2525
DEFAULT_MODEL_AGENT_PROVIDER,
26+
DEFAULT_MODEL_EMBEDDING_NAME,
27+
DEFAULT_MODEL_EMBEDDING_DIM,
2628
)
2729

2830

@@ -46,13 +48,13 @@ def api_key(self) -> str:
4648
class EmbeddingModelConfig(BaseSettings):
4749
model_config = SettingsConfigDict(env_prefix="MODEL_EMBEDDING_")
4850

49-
name: str = "doubao-embedding-text-240715"
51+
name: str = DEFAULT_MODEL_EMBEDDING_NAME
5052
"""Model name for embedding."""
5153

52-
dim: int = 2560
54+
dim: int = DEFAULT_MODEL_EMBEDDING_DIM
5355
"""Embedding dim is different from different models."""
5456

55-
api_base: str = "https://ark.cn-beijing.volces.com/api/v3/"
57+
api_base: str = DEFAULT_MODEL_AGENT_API_BASE
5658
"""The api base of the model for embedding."""
5759

5860
@cached_property

veadk/consts.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,7 @@
7575

7676
DEFAULT_NACOS_GROUP = "VEADK_GROUP"
7777
DEFAULT_NACOS_INSTANCE_NAME = "veadk"
78+
79+
DEFAULT_MODEL_EMBEDDING_NAME = "doubao-embedding-vision-250615"
80+
DEFAULT_MODEL_EMBEDDING_API_BASE = "https://ark.cn-beijing.volces.com/api/v3/"
81+
DEFAULT_MODEL_EMBEDDING_DIM = 2048

veadk/knowledgebase/backends/in_memory_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414

1515
from llama_index.core import Document, SimpleDirectoryReader, VectorStoreIndex
1616
from llama_index.core.schema import BaseNode
17-
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
1817
from pydantic import Field
1918
from typing_extensions import Any, override
2019

2120
from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
2221
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
2322
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
23+
from veadk.models.ark_embedding import create_embedding_model
2424

2525

2626
class InMemoryKnowledgeBackend(BaseKnowledgebaseBackend):
@@ -39,7 +39,7 @@ class InMemoryKnowledgeBackend(BaseKnowledgebaseBackend):
3939
)
4040

4141
def model_post_init(self, __context: Any) -> None:
42-
self._embed_model = OpenAILikeEmbedding(
42+
self._embed_model = create_embedding_model(
4343
model_name=self.embedding_config.name,
4444
api_key=self.embedding_config.api_key,
4545
api_base=self.embedding_config.api_base,

veadk/knowledgebase/backends/opensearch_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
VectorStoreIndex,
2222
)
2323
from llama_index.core.schema import BaseNode
24-
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
2524
from pydantic import Field
2625
from typing_extensions import Any, override
2726

@@ -33,6 +32,7 @@
3332
)
3433
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
3534
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
35+
from veadk.models.ark_embedding import create_embedding_model
3636
from veadk.utils.logger import get_logger
3737

3838
try:
@@ -112,7 +112,7 @@ def model_post_init(self, __context: Any) -> None:
112112
vector_store=self._vector_store
113113
)
114114

115-
self._embed_model = OpenAILikeEmbedding(
115+
self._embed_model = create_embedding_model(
116116
model_name=self.embedding_config.name,
117117
api_key=self.embedding_config.api_key,
118118
api_base=self.embedding_config.api_base,

veadk/knowledgebase/backends/redis_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
VectorStoreIndex,
2020
)
2121
from llama_index.core.schema import BaseNode
22-
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
2322
from pydantic import Field
2423
from typing_extensions import Any, override
2524

@@ -28,6 +27,7 @@
2827
from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
2928
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
3029
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
30+
from veadk.models.ark_embedding import create_embedding_model
3131

3232
try:
3333
from llama_index.vector_stores.redis import RedisVectorStore
@@ -92,7 +92,7 @@ def model_post_init(self, __context: Any) -> None:
9292
password=self.redis_config.password,
9393
)
9494

95-
self._embed_model = OpenAILikeEmbedding(
95+
self._embed_model = create_embedding_model(
9696
model_name=self.embedding_config.name,
9797
api_key=self.embedding_config.api_key,
9898
api_base=self.embedding_config.api_base,

veadk/knowledgebase/backends/tos_vector_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
SimpleDirectoryReader,
2121
)
2222
from llama_index.core.schema import BaseNode
23-
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
2423
from pydantic import Field
2524
from typing_extensions import Any, override
2625

@@ -30,6 +29,7 @@
3029

3130
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
3231
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
32+
from veadk.models.ark_embedding import create_embedding_model
3333
from veadk.utils.logger import get_logger
3434

3535
logger = get_logger(__name__)
@@ -77,7 +77,7 @@ def model_post_init(self, __context: Any) -> None:
7777
# create_bucket and index if not exist
7878
self._create_index()
7979

80-
self._embed_model = OpenAILikeEmbedding(
80+
self._embed_model = create_embedding_model(
8181
model_name=self.embedding_config.name,
8282
api_key=self.embedding_config.api_key,
8383
api_base=self.embedding_config.api_base,

veadk/memory/long_term_memory_backends/in_memory_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from llama_index.core import Document, VectorStoreIndex
1616
from llama_index.core.schema import BaseNode
17-
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
1817
from pydantic import Field
1918
from typing_extensions import Any, override
2019

@@ -23,14 +22,15 @@
2322
from veadk.memory.long_term_memory_backends.base_backend import (
2423
BaseLongTermMemoryBackend,
2524
)
25+
from veadk.models.ark_embedding import create_embedding_model
2626

2727

2828
class InMemoryLTMBackend(BaseLongTermMemoryBackend):
2929
embedding_config: EmbeddingModelConfig = Field(default_factory=EmbeddingModelConfig)
3030
"""Embedding model configs"""
3131

3232
def model_post_init(self, __context: Any) -> None:
33-
self._embed_model = OpenAILikeEmbedding(
33+
self._embed_model = create_embedding_model(
3434
model_name=self.embedding_config.name,
3535
api_key=self.embedding_config.api_key,
3636
api_base=self.embedding_config.api_base,

veadk/memory/long_term_memory_backends/opensearch_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from llama_index.core import Document, VectorStoreIndex
1818
from llama_index.core.schema import BaseNode
19-
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
2019
from pydantic import Field
2120
from typing_extensions import Any, override
2221

@@ -30,6 +29,7 @@
3029
from veadk.memory.long_term_memory_backends.base_backend import (
3130
BaseLongTermMemoryBackend,
3231
)
32+
from veadk.models.ark_embedding import create_embedding_model
3333
from veadk.utils.logger import get_logger
3434

3535
try:
@@ -55,7 +55,7 @@ class OpensearchLTMBackend(BaseLongTermMemoryBackend):
5555
"""Embedding model configs"""
5656

5757
def model_post_init(self, __context: Any) -> None:
58-
self._embed_model = OpenAILikeEmbedding(
58+
self._embed_model = create_embedding_model(
5959
model_name=self.embedding_config.name,
6060
api_key=self.embedding_config.api_key,
6161
api_base=self.embedding_config.api_base,

veadk/memory/long_term_memory_backends/redis_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from llama_index.core import Document, VectorStoreIndex
1616
from llama_index.core.schema import BaseNode
17-
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
1817
from pydantic import Field
1918
from typing_extensions import Any, override
2019

@@ -25,6 +24,7 @@
2524
from veadk.memory.long_term_memory_backends.base_backend import (
2625
BaseLongTermMemoryBackend,
2726
)
27+
from veadk.models.ark_embedding import create_embedding_model
2828
from veadk.utils.logger import get_logger
2929

3030
try:
@@ -51,7 +51,7 @@ class RedisLTMBackend(BaseLongTermMemoryBackend):
5151
"""Embedding model configs"""
5252

5353
def model_post_init(self, __context: Any) -> None:
54-
self._embed_model = OpenAILikeEmbedding(
54+
self._embed_model = create_embedding_model(
5555
model_name=self.embedding_config.name,
5656
api_key=self.embedding_config.api_key,
5757
api_base=self.embedding_config.api_base,

0 commit comments

Comments
 (0)