-
Notifications
You must be signed in to change notification settings - Fork 2.8k
refactor: Support different types of aliyun stt model #4448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| # coding=utf-8 | ||
| """ | ||
| @project: MaxKB | ||
| @Author:niu | ||
| @file: __init__.py.py | ||
| @date:2025/12/5 15:11 | ||
| @desc: | ||
| """ | ||
| from .stt import AliyunBaiLianSTTModelCredential | ||
| from .omni_stt import AliyunBaiLianOmiSTTModelCredential | ||
| from .default_stt import AliyunBaiLianDefaultSTTModelCredential | ||
| from .asr_stt import AliyunBaiLianAsrSTTModelCredential |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| # coding=utf-8 | ||
| """ | ||
| @project: MaxKB | ||
| @Author:niu | ||
| @file: default_stt.py | ||
| @date:2025/12/5 15:12 | ||
| @desc: | ||
| """ | ||
| from typing import Dict, Any | ||
|
|
||
| from common import forms | ||
| from common.exception.app_exception import AppApiException | ||
| from common.forms import BaseForm | ||
| from maxkb.settings import maxkb_logger | ||
| from models_provider.base_model_provider import BaseModelCredential, ValidCode | ||
| from django.utils.translation import gettext as _ | ||
|
|
||
|
|
||
|
|
||
| class AliyunBaiLianDefaultSTTModelCredential(BaseForm, BaseModelCredential): | ||
| type = forms.Radio(_("Type"), required=True, text_field='label', default_value='qwen', provider='', method='', | ||
| value_field='value', option_list=[ | ||
| {'label': _('Audio file recognition - Tongyi Qwen'), | ||
| 'value': 'qwen'}, | ||
| {'label': _('Qwen-Omni'), | ||
| 'value': 'omni'}, | ||
| {'label': _('Audio file recognition - Fun-ASR/Paraformer/SenseVoice'), | ||
| 'value': 'other'} | ||
| ]) | ||
| api_url = forms.TextInputField(_('API URL'), required=True, relation_show_field_dict={'type': ['qwen', 'omni']}) | ||
| api_key = forms.PasswordInputField(_('API Key'), required=True) | ||
|
|
||
| def is_valid(self, | ||
| model_type: str, | ||
| model_name: str, | ||
| model_credential: Dict[str, Any], | ||
| model_params: Dict[str, Any], | ||
| provider, | ||
| raise_exception: bool = False | ||
| ) -> bool: | ||
| model_type_list = provider.get_model_type_list() | ||
| model_type_list = provider.get_model_type_list() | ||
| if not any(mt.get('value') == model_type for mt in model_type_list): | ||
| raise AppApiException( | ||
| ValidCode.valid_error.value, | ||
| _('{model_type} Model type is not supported').format(model_type=model_type) | ||
| ) | ||
|
|
||
| required_keys = ['api_key'] | ||
| for key in required_keys: | ||
| if key not in model_credential: | ||
| if raise_exception: | ||
| raise AppApiException( | ||
| ValidCode.valid_error.value, | ||
| _('{key} is required').format(key=key) | ||
| ) | ||
| return False | ||
|
|
||
| try: | ||
| model = provider.get_model(model_type, model_name, model_credential) | ||
| model.check_auth() | ||
| except Exception as e: | ||
| maxkb_logger.error(f'Exception: {e}', exc_info=True) | ||
| if isinstance(e, AppApiException): | ||
| raise e | ||
| if raise_exception: | ||
| raise AppApiException( | ||
| ValidCode.valid_error.value, | ||
| _('Verification failed, please check whether the parameters are correct: {error}').format( | ||
| error=str(e)) | ||
| ) | ||
| return False | ||
| return True | ||
|
|
||
| def encryption_dict(self, model: Dict[str, object]) -> Dict[str, object]: | ||
|
|
||
| return { | ||
| **model, | ||
| 'api_key': super().encryption(model.get('api_key', '')) | ||
| } | ||
|
|
||
| def get_model_params_setting_form(self, model_name): | ||
|
|
||
| pass |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| #coding=utf-8 | ||
| """ | ||
| @project: MaxKB | ||
| @Author:niu | ||
| @file: __init__.py.py | ||
| @date:2025/12/5 15:39 | ||
| @desc: | ||
| """ | ||
|
|
||
| from .asr_stt import AliyunBaiLianAsrSpeechToText | ||
| from .default_stt import AliyunBaiLianDefaultSpeechToText | ||
| from .stt import AliyunBaiLianSpeechToText | ||
| from .omni_stt import AliyunBaiLianOmiSpeechToText |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| # coding=utf-8 | ||
| """ | ||
| @project: MaxKB | ||
| @Author:niu | ||
| @file: default_stt.py | ||
| @date:2025/12/5 15:40 | ||
| @desc: | ||
| """ | ||
| import os | ||
| from typing import Dict | ||
|
|
||
| from models_provider.base_model_provider import MaxKBBaseModel | ||
|
|
||
| from models_provider.impl.base_stt import BaseSpeechToText | ||
|
|
||
|
|
||
| class AliyunBaiLianDefaultSpeechToText(MaxKBBaseModel, BaseSpeechToText): | ||
| def check_auth(self): | ||
| pass | ||
|
|
||
| def speech_to_text(self, audio_file): | ||
| pass | ||
|
|
||
| @staticmethod | ||
| def is_cache_model(): | ||
| return False | ||
|
|
||
| @staticmethod | ||
| def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): | ||
| from models_provider.impl.aliyun_bai_lian_model_provider.model.stt import AliyunBaiLianOmiSpeechToText, \ | ||
| AliyunBaiLianSpeechToText, AliyunBaiLianAsrSpeechToText | ||
| stt_type=model_credential.get('type') | ||
| if stt_type == 'qwen': | ||
| return AliyunBaiLianAsrSpeechToText( | ||
| model=model_name, | ||
| api_key=model_credential.get('api_key'), | ||
| api_url=model_credential.get('api_url'), | ||
| params=model_kwargs, | ||
| **model_kwargs | ||
| ) | ||
| elif stt_type == 'omni': | ||
| return AliyunBaiLianOmiSpeechToText( | ||
| model=model_name, | ||
| api_key=model_credential.get('api_key'), | ||
| api_url=model_credential.get('api_url'), | ||
| params=model_kwargs, | ||
| **model_kwargs | ||
| ) | ||
| else: | ||
| return AliyunBaiLianSpeechToText( | ||
| model=model_name, | ||
| api_key=model_credential.get('api_key'), | ||
| params=model_kwargs, | ||
| **model_kwargs, | ||
| ) | ||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The provided code has several issues:
Here’s an optimized version while addressing some of those points: # coding=utf-8
"""
@project: MaxKB
@Author:niu
@file:default_stt.py
@date:2025/12/5 15:40
@desc:
"""
import os
from typing import Dict
from models_provider.base_model_provider import MaxKBBaseModel
from models_provider.impl.base_stt import BaseSpeechToText
class AliyunBaiLianDefaultSpeechToText(MaxKBBaseModel, BaseSpeechToText):
def check_auth(self):
# Implement authentication logic here
pass
def speech_to_text(self, audio_file):
raise NotImplementedError("This method should be implemented.")
@staticmethod
def is_cache_model() -> bool:
"""Check if the model supports caching."""
return False
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs) -> 'AliyunBaiLianDefaultSpeechToText':
from models_provider.impl.aliyun_bai_lian_model_provider.model.stt import (
AliyunBaiLianOmiSpeechToText,
AliyunBaiLianSpeechToText,
AliyunBaiLianAsrSpeechToText,
)
stt_type = model_credential.get('type')
if stt_type == 'qwen':
return AliyunBaiLianAsrSpeechToText(
model=model_name,
api_key=model_credential['api_key'],
api_url=model_credential['api_url'],
params=model_kwargs,
**model_kwargs,
)
elif stt_type == 'omni':
return AliyunBaiLianOmiSpeechToText(
model=model_name,
api_key=model_credential['api_key'],
api_url=model_credential['api_url'],
params=model_kwargs,
**model_kwargs,
)
else:
return AliyunBaiLianSpeechToText(
model=model_name,
api_key=model_credential['api_key'],
params=model_kwargs,
**model_kwargs,
)
# Missing closing brace for AliyunBaiLianDefaultSpeechToText closureEnsure that the remaining lines include proper closures ( |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The provided code contains several improvements and corrections:
Imports:
AliyunBaiLianDefaultSTTModelCredentialto match its implementation (note that it seems there was a typo).AliyunBaiLianOriTTIConfig.Initialization of Models:
module_type,credential, etc.).Appending Default Model Info:
Module Name Correction:
QwenOriTTIConfigtoQwenTTIConfig.Here's the updated code snippet:
This should resolve the issues and improve the structure of the model initialization process.