diff --git a/dtable_events/app/config.py b/dtable_events/app/config.py index 0f36c87a..2c8611ac 100644 --- a/dtable_events/app/config.py +++ b/dtable_events/app/config.py @@ -100,7 +100,16 @@ def get_llm_prices(models): # AI models and prices LLM_MODELS = configs.get('LLM_MODELS', []) AI_PRICES = get_llm_prices(LLM_MODELS) -BAIDU_OCR_TOKENS = configs.get('BAIDU_OCR_TOKENS', default={}) + +## OCR +OCR_SERVICE_CONFIG = configs.get('OCR', {}) +OCR_SERVICE_MODEL = OCR_SERVICE_CONFIG.get('model', 'baidu-ocr') +OCR_SERVICE_PRICE = OCR_SERVICE_CONFIG.get('price', 0) +AI_PRICES.update({ + OCR_SERVICE_MODEL: { + 'times': OCR_SERVICE_PRICE + } +}) # IO server ARCHIVE_VIEW_EXPORT_ROW_LIMIT = configs.get('ARCHIVE_VIEW_EXPORT_ROW_LIMIT', default=250000) diff --git a/dtable_events/tasks/ai_stats_worker.py b/dtable_events/tasks/ai_stats_worker.py index b6561ba7..e5913d49 100644 --- a/dtable_events/tasks/ai_stats_worker.py +++ b/dtable_events/tasks/ai_stats_worker.py @@ -10,10 +10,10 @@ from dateutil import relativedelta from sqlalchemy import text -from dtable_events.app.config import AI_PRICES, BAIDU_OCR_TOKENS, AI_STATS_ENABLED +from dtable_events.app.config import AI_PRICES, OCR_SERVICE_MODEL_NAME, OCR_SERVICE_PRICE, AI_STATS_ENABLED from dtable_events.app.event_redis import RedisClient, redis_cache from dtable_events.db import init_db_session_class -from dtable_events.utils import get_opt_from_conf_or_env, parse_bool, uuid_str_to_36_chars, uuid_str_to_32_chars +from dtable_events.utils import uuid_str_to_36_chars, uuid_str_to_32_chars logger = logging.getLogger(__name__) @@ -61,9 +61,6 @@ def save_to_memory(self, usage_info, session): if not isinstance(usage.get('output_tokens'), int): usage['output_tokens'] = 0 - if model in BAIDU_OCR_TOKENS: - usage['output_tokens'] = BAIDU_OCR_TOKENS[model] - if usage_info.get('assistant_uuid'): # for assistant call, set stat object to the assistant owner, i.e., table admin, group admin ... @@ -74,23 +71,28 @@ def save_to_memory(self, usage_info, session): logger.warning('assistant %s has no owner', assistant_uuid) return if owner_info['org_id'] != -1: + self.org_stats[owner_info['org_id']][model]['times'] += usage['times'] self.org_stats[owner_info['org_id']][model]['input_tokens'] += usage.get('input_tokens') or 0 self.org_stats[owner_info['org_id']][model]['output_tokens'] += usage.get('output_tokens') or 0 else: + self.owner_stats[owner_info['owner_id']][model]['times'] += usage['times'] self.owner_stats[owner_info['owner_id']][model]['input_tokens'] += usage.get('input_tokens') or 0 self.owner_stats[owner_info['owner_id']][model]['output_tokens'] += usage.get('output_tokens') or 0 else: # for non-assistant call, set stat obj to common user if usage_info['org_id'] != -1: + self.org_stats[usage_info['org_id']][model]['times'] += usage['times'] self.org_stats[usage_info['org_id']][model]['input_tokens'] += usage.get('input_tokens') or 0 self.org_stats[usage_info['org_id']][model]['output_tokens'] += usage.get('output_tokens') or 0 else: + self.owner_stats[usage_info['username']][model]['times'] += usage['times'] self.owner_stats[usage_info['username']][model]['input_tokens'] += usage.get('input_tokens') or 0 self.owner_stats[usage_info['username']][model]['output_tokens'] += usage.get('output_tokens') or 0 dtable_uuid = usage_info.get('dtable_uuid') if dtable_uuid: dtable_uuid = uuid_str_to_32_chars(usage_info['dtable_uuid']) + self.dtable_stats[dtable_uuid][model]['times'] += usage['times'] self.dtable_stats[dtable_uuid][model]['input_tokens'] += usage.get('input_tokens') or 0 self.dtable_stats[dtable_uuid][model]['output_tokens'] += usage.get('output_tokens') or 0 @@ -190,20 +192,28 @@ def stats_worker(self): team_data = [] team_sql = ''' - INSERT INTO `stats_ai_by_team`(`org_id`, `month`, `model`, `input_tokens`, `output_tokens`, `cost`, `created_at`, `updated_at`) - VALUES (:org_id, :month, :model, :input_tokens, :output_tokens, :cost, :created_at, :updated_at) - ON DUPLICATE KEY UPDATE `input_tokens`=`input_tokens`+VALUES(`input_tokens`), + INSERT INTO `stats_ai_by_team`(`org_id`, `month`, `model`, `times`, `input_tokens`, `output_tokens`, `cost`, `created_at`, `updated_at`) + VALUES (:org_id, :month, :model, :times, :input_tokens, :output_tokens, :cost, :created_at, :updated_at) + ON DUPLICATE KEY UPDATE `times`=`times`+VALUES(`times`), + `input_tokens`=`input_tokens`+VALUES(`input_tokens`), `output_tokens`=`output_tokens`+VALUES(`output_tokens`), `cost`=`cost`+VALUES(`cost`), `updated_at`=VALUES(`updated_at`) ''' for org_id, models_dict in org_stats.items(): for model, usage in models_dict.items(): + # get usage metadata + times = usage.get('times') or 0 input_tokens = usage.get('input_tokens') or 0 output_tokens = usage.get('output_tokens') or 0 + # get price + times_price = AI_PRICES[model].get('times') or 0 input_tokens_price = AI_PRICES[model].get('input_tokens') or 0 output_tokens_price = AI_PRICES[model].get('output_tokens') or 0 + + # calculate cost + times_cost = times_price * times input_cost = input_tokens_price * (input_tokens / 1000000) output_cost = output_tokens_price * (output_tokens / 1000000) logger.info('org %s model %s, input_tokens %s cost %s, output_tokens %s cost %s', org_id, model, input_tokens, input_cost, output_tokens, output_cost) @@ -212,9 +222,10 @@ def stats_worker(self): 'org_id': org_id, 'month': month, 'model': model, + 'times': times, 'input_tokens': input_tokens, 'output_tokens': output_tokens, - 'cost': input_cost + output_cost, + 'cost': times_cost + input_cost + output_cost, 'created_at': datetime.now(), 'updated_at': datetime.now() } @@ -222,20 +233,28 @@ def stats_worker(self): owner_data = [] owner_sql = ''' - INSERT INTO `stats_ai_by_owner`(`owner_id`, `month`, `model`, `input_tokens`, `output_tokens`, `cost`, `created_at`, `updated_at`) - VALUES (:owner_id, :month, :model, :input_tokens, :output_tokens, :cost, :created_at, :updated_at) - ON DUPLICATE KEY UPDATE `input_tokens`=`input_tokens`+VALUES(`input_tokens`), + INSERT INTO `stats_ai_by_owner`(`owner_id`, `month`, `model`, `times`, `input_tokens`, `output_tokens`, `cost`, `created_at`, `updated_at`) + VALUES (:owner_id, :month, :model, :times, :input_tokens, :output_tokens, :cost, :created_at, :updated_at) + ON DUPLICATE KEY UPDATE `times`=`times`+VALUES(`times`), + `input_tokens`=`input_tokens`+VALUES(`input_tokens`), `output_tokens`=`output_tokens`+VALUES(`output_tokens`), `cost`=`cost`+VALUES(`cost`), `updated_at`=VALUES(`updated_at`) ''' for owner_id, models_dict in owner_stats.items(): for model, usage in models_dict.items(): + # get usage metadata + times = usage.get('times') or 0 input_tokens = usage.get('input_tokens') or 0 output_tokens = usage.get('output_tokens') or 0 + # get price + times_price = AI_PRICES[model].get('times') or 0 input_tokens_price = AI_PRICES[model].get('input_tokens') or 0 output_tokens_price = AI_PRICES[model].get('output_tokens') or 0 + + # calculate cost + times_cost = times * times_price input_cost = input_tokens_price * (input_tokens / 1000000) output_cost = output_tokens_price * (output_tokens / 1000000) logger.info('owner %s model %s, input_tokens %s cost %s, output_tokens %s cost %s', owner_id, model, input_tokens, input_cost, output_tokens, output_cost) @@ -244,9 +263,10 @@ def stats_worker(self): 'owner_id': owner_id, 'month': month, 'model': model, + 'times': times, 'input_tokens': input_tokens, 'output_tokens': output_tokens, - 'cost': input_cost + output_cost, + 'cost': times_cost + input_cost + output_cost, 'created_at': datetime.now(), 'updated_at': datetime.now() } @@ -254,9 +274,10 @@ def stats_worker(self): dtable_data = [] dtable_sql = ''' - INSERT INTO `stats_ai_by_dtable`(`dtable_uuid`, `date`, `model`, `owner`, `org_id`, `input_tokens`, `output_tokens`, `cost`, `created_at`, `updated_at`) - VALUES (:dtable_uuid, :date, :model, :owner, :org_id, :input_tokens, :output_tokens, :cost, :created_at, :updated_at) - ON DUPLICATE KEY UPDATE `input_tokens`=`input_tokens`+VALUES(`input_tokens`), + INSERT INTO `stats_ai_by_dtable`(`dtable_uuid`, `date`, `model`, `owner`, `org_id`, `times`, `input_tokens`, `output_tokens`, `cost`, `created_at`, `updated_at`) + VALUES (:dtable_uuid, :date, :model, :owner, :org_id, :times, :input_tokens, :output_tokens, :cost, :created_at, :updated_at) + ON DUPLICATE KEY UPDATE `times`=`times`+VALUES(`times`), + `input_tokens`=`input_tokens`+VALUES(`input_tokens`), `output_tokens`=`output_tokens`+VALUES(`output_tokens`), `cost`=`cost`+VALUES(`cost`), `updated_at`=VALUES(`updated_at`) @@ -264,6 +285,8 @@ def stats_worker(self): dtable_owners_dict = self.query_dtable_owners(list(dtable_stats.keys())) for dtable_uuid, models_dict in dtable_stats.items(): for model, usage in models_dict.items(): + # get usage metadata + times = usage.get('times') or 0 input_tokens = usage.get('input_tokens') or 0 output_tokens = usage.get('output_tokens') or 0 owner_info = dtable_owners_dict.get(dtable_uuid) @@ -274,8 +297,13 @@ def stats_worker(self): owner = None org_id = None + # get price + times_price = AI_PRICES[model].get('times') or 0 input_tokens_price = AI_PRICES[model].get('input_tokens') or 0 output_tokens_price = AI_PRICES[model].get('output_tokens') or 0 + + # calculate cost + times_cost = times * times_price input_cost = input_tokens_price * (input_tokens / 1000000) output_cost = output_tokens_price * (output_tokens / 1000000) logger.info('dtable %s model %s, input_tokens %s cost %s, output_tokens %s cost %s', dtable_uuid, model, input_tokens, input_cost, output_tokens, output_cost) @@ -286,9 +314,10 @@ def stats_worker(self): 'model': model, 'owner': owner, 'org_id': org_id, + 'times': times, 'input_tokens': input_tokens, 'output_tokens': output_tokens, - 'cost': input_cost + output_cost, + 'cost': times_cost + input_cost + output_cost, 'created_at': datetime.now(), 'updated_at': datetime.now() }