Skip to content

Commit 119718a

Browse files
fix: fix TPM & RPM
1 parent 8d71fdc commit 119718a

3 files changed

Lines changed: 32 additions & 33 deletions

File tree

graphgen/models/llm/limitter.py

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
1+
import asyncio
12
import time
23
from datetime import datetime, timedelta
3-
import asyncio
44

55
from graphgen.utils import logger
66

77

88
class RPM:
9-
109
def __init__(self, rpm: int = 1000):
1110
self.rpm = rpm
12-
self.record = {'rpm_slot': self.get_minute_slot(), 'counter': 0}
11+
self.record = {"rpm_slot": self.get_minute_slot(), "counter": 0}
1312

14-
def get_minute_slot(self):
13+
@staticmethod
14+
def get_minute_slot():
1515
current_time = time.time()
1616
dt_object = datetime.fromtimestamp(current_time)
1717
total_minutes_since_midnight = dt_object.hour * 60 + dt_object.minute
@@ -22,37 +22,35 @@ async def wait(self, silent=False):
2222
dt_object = datetime.fromtimestamp(current)
2323
minute_slot = self.get_minute_slot()
2424

25-
if self.record['rpm_slot'] == minute_slot:
25+
if self.record["rpm_slot"] == minute_slot:
2626
# check RPM exceed
27-
if self.record['counter'] >= self.rpm:
27+
if self.record["counter"] >= self.rpm:
2828
# wait until next minute
29-
next_minute = dt_object.replace(
30-
second=0, microsecond=0) + timedelta(minutes=1)
29+
next_minute = dt_object.replace(second=0, microsecond=0) + timedelta(
30+
minutes=1
31+
)
3132
_next = next_minute.timestamp()
3233
sleep_time = abs(_next - current)
3334
if not silent:
34-
logger.info('RPM sleep %s', sleep_time)
35+
logger.info("RPM sleep %s", sleep_time)
3536
await asyncio.sleep(sleep_time)
3637

37-
self.record = {
38-
'rpm_slot': self.get_minute_slot(),
39-
'counter': 0
40-
}
38+
self.record = {"rpm_slot": self.get_minute_slot(), "counter": 0}
4139
else:
42-
self.record = {'rpm_slot': self.get_minute_slot(), 'counter': 0}
43-
self.record['counter'] += 1
40+
self.record = {"rpm_slot": self.get_minute_slot(), "counter": 0}
41+
self.record["counter"] += 1
4442

4543
if not silent:
4644
logger.debug(self.record)
4745

4846

4947
class TPM:
50-
51-
def __init__(self, tpm: int = 20000):
48+
def __init__(self, tpm: int = 100000):
5249
self.tpm = tpm
53-
self.record = {'tpm_slot': self.get_minute_slot(), 'counter': 0}
50+
self.record = {"tpm_slot": self.get_minute_slot(), "counter": 0}
5451

55-
def get_minute_slot(self):
52+
@staticmethod
53+
def get_minute_slot():
5654
current_time = time.time()
5755
dt_object = datetime.fromtimestamp(current_time)
5856
total_minutes_since_midnight = dt_object.hour * 60 + dt_object.minute
@@ -64,25 +62,24 @@ async def wait(self, token_count, silent=False):
6462
minute_slot = self.get_minute_slot()
6563

6664
# get next slot, skip
67-
if self.record['tpm_slot'] != minute_slot:
68-
self.record = {'tpm_slot': minute_slot, 'counter': token_count}
65+
if self.record["tpm_slot"] != minute_slot:
66+
self.record = {"tpm_slot": minute_slot, "counter": token_count}
6967
return
7068

7169
# check RPM exceed
72-
self.record['counter'] += token_count
73-
if self.record['counter'] > self.tpm:
70+
self.record["counter"] += token_count
71+
if self.record["counter"] > self.tpm:
7472
# wait until next minute
75-
next_minute = dt_object.replace(
76-
second=0, microsecond=0) + timedelta(minutes=1)
73+
next_minute = dt_object.replace(second=0, microsecond=0) + timedelta(
74+
minutes=1
75+
)
7776
_next = next_minute.timestamp()
7877
sleep_time = abs(_next - current)
79-
logger.info('TPM sleep %s', sleep_time)
78+
logger.info("Current TPM: %s, limit: %s", self.record["counter"], self.tpm)
79+
logger.warning("TPM limit exceeded, wait %s seconds", sleep_time)
8080
await asyncio.sleep(sleep_time)
8181

82-
self.record = {
83-
'tpm_slot': self.get_minute_slot(),
84-
'counter': token_count
85-
}
82+
self.record = {"tpm_slot": self.get_minute_slot(), "counter": token_count}
8683

8784
if not silent:
8885
logger.debug(self.record)

graphgen/models/llm/openai_client.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ def __init__(
3939
seed: Optional[int] = None,
4040
topk_per_token: int = 5, # number of topk tokens to generate for each token
4141
request_limit: bool = False,
42+
rpm: RPM = None,
43+
tpm: TPM = None,
4244
**kwargs: Any,
4345
):
4446
super().__init__(**kwargs)
@@ -51,8 +53,8 @@ def __init__(
5153

5254
self.token_usage: list = []
5355
self.request_limit = request_limit
54-
self.rpm = RPM(rpm=1000)
55-
self.tpm = TPM(tpm=50000)
56+
self.rpm = rpm or RPM()
57+
self.tpm = tpm or TPM()
5658

5759
self.__post_init__()
5860

webui/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ def sum_tokens(client):
468468
label="TPM",
469469
minimum=5000,
470470
maximum=5000000,
471-
value=50000,
471+
value=100000,
472472
step=1000,
473473
interactive=True,
474474
visible=True,

0 commit comments

Comments
 (0)