Skip to content

Commit 0692edb

Browse files
author
李宜杰
committed
local eval add mindformers model
1 parent 2d794cf commit 0692edb

5 files changed

Lines changed: 374 additions & 9 deletions

File tree

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from ais_bench.benchmark.models import MindFormerModel
2+
3+
models = [
4+
dict(
5+
attr="local", # local or service
6+
type=MindFormerModel, # transformers < 4.33.0 用这个,优先AutoModelForCausalLM.from_pretrained加载模型,失败则用AutoModel.from_pretrained加载
7+
abbr='mindformer-model',
8+
path='THUDM/chatglm-6b', # path to model dir, current value is just a example
9+
checkpoint = 'THUDM/your_checkpoint', # path to checkpoint file, current value is just a example
10+
yaml_cfg_file = 'THUDM/your.yaml',
11+
tokenizer_path='THUDM/chatglm-6b', # path to tokenizer dir, current value is just a example
12+
model_kwargs=dict( # 模型参数参考 huggingface.co/docs/transformers/v4.50.0/en/model_doc/auto#transformers.AutoModel.from_pretrained
13+
device_map='npu',
14+
),
15+
tokenizer_kwargs=dict( # tokenizer参数参考 huggingface.co/docs/transformers/v4.50.0/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase
16+
padding_side='right',
17+
),
18+
generation_kwargs = dict( # 后处理参数参考huggingface.co/docs/transformers/main_classes/test_generation
19+
temperature = 0.5,
20+
top_k = 10,
21+
top_p = 0.95,
22+
do_sample = True,
23+
seed = None,
24+
repetition_penalty = 1.03,
25+
),
26+
run_cfg = dict(num_gpus=1, num_procs=1), # 多卡/多机多卡 参数,使用torchrun拉起任务
27+
max_out_len=100, # 最大输出token长度
28+
batch_size=2, # 每次推理的batch size
29+
max_seq_len=2048,
30+
batch_padding=True,
31+
)
32+
]

ais_bench/benchmark/models/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@
1414
from ais_bench.benchmark.models.api_models.triton_api import TritonCustomAPIStream # noqa: F401
1515
from ais_bench.benchmark.models.api_models.tgi_api import TGICustomAPIStream # noqa: F401
1616
from ais_bench.benchmark.models.api_models.vllm_custom_api_chat import VllmMultiturnAPIChatStream # noqa: F401
17-
from ais_bench.benchmark.models.local_models.vllm_offline_vl import VLLMOfflineVLModel
17+
from ais_bench.benchmark.models.local_models.vllm_offline_vl import VLLMOfflineVLModel
18+
from ais_bench.benchmark.models.local_models.mindformers_model import MindFormerModel
Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
import os, sys
2+
from typing import Dict, List, Optional, Union
3+
4+
import numpy as np
5+
import torch
6+
import transformers
7+
8+
from ais_bench.benchmark.models.local_models.base import BaseModel
9+
from ais_bench.benchmark.models import APITemplateParser
10+
from ais_bench.benchmark.registry import MODELS
11+
12+
from mindspore import Tensor, Model
13+
from mindformers import MindFormerConfig, build_context
14+
from mindformers.models import build_network
15+
from mindformers.core.parallel_config import build_parallel_config
16+
from mindformers.utils.load_checkpoint_utils import get_load_path_after_hf_convert
17+
from mindformers.trainer.utils import transform_and_load_checkpoint
18+
19+
20+
21+
class MultiTokenEOSCriteria(transformers.StoppingCriteria):
22+
"""Criteria to stop on the specified multi-token sequence."""
23+
24+
def __init__(
25+
self,
26+
sequence: str,
27+
tokenizer: transformers.PreTrainedTokenizer,
28+
batch_size: int,
29+
):
30+
self.done_tracker = [False] * batch_size
31+
self.sequence = sequence
32+
self.sequence_ids = tokenizer.encode(sequence,
33+
add_special_tokens=False)
34+
self.sequence_id_len = len(self.sequence_ids)
35+
self.tokenizer = tokenizer
36+
37+
def __call__(self, input_ids, scores, **kwargs) -> bool:
38+
# compare the last len(stop) tokens
39+
lookback_ids_batch = input_ids[:, -self.sequence_id_len:]
40+
lookback_tokens_batch = self.tokenizer.batch_decode(lookback_ids_batch)
41+
for i, done in enumerate(self.done_tracker):
42+
if done:
43+
continue
44+
self.done_tracker[i] = self.sequence in lookback_tokens_batch[i]
45+
return False not in self.done_tracker
46+
47+
48+
def drop_error_generation_kwargs(generation_kwargs: dict) -> dict:
49+
for key in ['is_synthetic', 'batch_size', 'do_performance']:
50+
if key in generation_kwargs:
51+
generation_kwargs.pop(key)
52+
return generation_kwargs
53+
54+
55+
@MODELS.register_module()
56+
class MindFormerModel(BaseModel):
57+
58+
def __init__(self,
59+
path: str,
60+
checkpoint: Optional[str] = None,
61+
yaml_cfg_file: Optional[str] = None,
62+
batch_size: int = 1,
63+
max_seq_len: int = 2048,
64+
tokenizer_path: Optional[str] = None,
65+
tokenizer_kwargs: dict = dict(),
66+
tokenizer_only: bool = False,
67+
generation_kwargs: dict = dict(),
68+
meta_template: Optional[Dict] = None,
69+
extract_pred_after_decode: bool = False,
70+
batch_padding: bool = False,
71+
pad_token_id: Optional[int] = None,
72+
mode: str = 'none',
73+
use_fastchat_template: bool = False,
74+
end_str: Optional[str] = None,
75+
**kwargs):
76+
super().__init__(path=path,
77+
max_seq_len=max_seq_len,
78+
tokenizer_only=tokenizer_only,
79+
meta_template=meta_template)
80+
self.batch_size = batch_size
81+
self.pad_token_id = pad_token_id
82+
self.pretrained_model_path = path
83+
if mode not in ['none', 'mid']:
84+
raise ValueError(f"mode must be 'none' or 'mid', but got {mode}")
85+
self.mode = mode
86+
if not yaml_cfg_file:
87+
raise ValueError('`yaml_cfg_file` is required for MindFormerModel')
88+
self.config = MindFormerConfig(yaml_cfg_file)
89+
self.checkpoint = checkpoint
90+
self._load_tokenizer(path=path,
91+
tokenizer_path=tokenizer_path,
92+
tokenizer_kwargs=tokenizer_kwargs)
93+
self.batch_padding = batch_padding
94+
self.extract_pred_after_decode = extract_pred_after_decode
95+
if not tokenizer_only:
96+
self._load_model(self.config, self.batch_size, self.max_seq_len)
97+
self.generation_kwargs = generation_kwargs
98+
self.use_fastchat_template = use_fastchat_template
99+
self.end_str = end_str
100+
101+
def _load_tokenizer(self, path: str, tokenizer_path: Optional[str],
102+
tokenizer_kwargs: dict):
103+
from transformers import AutoTokenizer, GenerationConfig
104+
105+
DEFAULT_TOKENIZER_KWARGS = dict(padding_side='left', truncation_side='left', trust_remote_code=True)
106+
kwargs = DEFAULT_TOKENIZER_KWARGS.copy()
107+
kwargs.update(tokenizer_kwargs)
108+
109+
load_path = tokenizer_path if tokenizer_path else path
110+
self.tokenizer = AutoTokenizer.from_pretrained(load_path, **kwargs)
111+
112+
pad_token_id = self.pad_token_id
113+
114+
# A patch for some models without pad_token_id
115+
if pad_token_id is not None:
116+
if self.tokenizer.pad_token_id is None:
117+
self.logger.debug(f'Using {pad_token_id} as pad_token_id')
118+
elif self.tokenizer.pad_token_id != pad_token_id:
119+
self.logger.warning(f'pad_token_id is not consistent. Using {pad_token_id} as pad_token_id')
120+
self.tokenizer.pad_token_id = pad_token_id
121+
return
122+
if self.tokenizer.pad_token_id is not None:
123+
return
124+
self.logger.warning('pad_token_id is not set for the tokenizer.')
125+
126+
try:
127+
generation_config = GenerationConfig.from_pretrained(path)
128+
except Exception:
129+
generation_config = None
130+
131+
if generation_config and generation_config.pad_token_id is not None:
132+
self.logger.warning(f'Using {generation_config.pad_token_id} as pad_token_id.')
133+
self.tokenizer.pad_token_id = generation_config.pad_token_id
134+
return
135+
if self.tokenizer.eos_token_id is not None:
136+
self.logger.warning(f'Using eos_token_id {self.tokenizer.eos_token_id} as pad_token_id.')
137+
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
138+
return
139+
raise ValueError('pad_token_id is not set for this tokenizer. Please set `pad_token_id={PAD_TOKEN_ID}` in model_cfg.')
140+
141+
def _set_config_from_yaml(self):
142+
if self.checkpoint is not None:
143+
self.config.load_checkpoint = self.checkpoint
144+
elif self.checkpoint is None and self.config.load_checkpoint is None:
145+
self.config.load_checkpoint = self.path
146+
self.config.model.pretrained_model_dir = self.pretrained_model_path
147+
self.config.model.model_config.seq_length = self.max_seq_len
148+
build_context(self.config)
149+
build_parallel_config(self.config)
150+
151+
def _load_model(self, config, batch_size, max_seq_len):
152+
153+
self._set_config_from_yaml()
154+
try:
155+
self.model = build_network(
156+
config.model,
157+
default_args={
158+
"parallel_config": config.parallel_config,
159+
"moe_config": config.moe_config
160+
})
161+
self.logger.info("..........Network Built Successfully..........")
162+
self.model.set_train(False)
163+
config.load_checkpoint = get_load_path_after_hf_convert(config, self.model)
164+
self.logger.info(f"load checkpoint path : {config.load_checkpoint}")
165+
run_mode = config.get("run_mode", None)
166+
if run_mode == "predict":
167+
self.model.load_weights(config.load_checkpoint)
168+
else:
169+
model = Model(self.model)
170+
input_ids = Tensor(np.ones((batch_size, max_seq_len), dtype=np.int32))
171+
infer_data = self.model.prepare_inputs_for_predict_layout(input_ids)
172+
transform_and_load_checkpoint(config, model, self.model, infer_data, do_eval=True)
173+
174+
self.logger.info("..........Checkpoint Load Successfully..........")
175+
except ValueError as e:
176+
raise ValueError('Failed to load MindFormers model, please check configuration') from e
177+
178+
179+
def generate(self,
180+
inputs: List[str],
181+
max_out_len: int,
182+
min_out_len: Optional[int] = None,
183+
stopping_criteria: List[str] = [],
184+
**kwargs) -> List[str]:
185+
"""Generate results given a list of inputs.
186+
187+
Args:
188+
inputs (List[str]): A list of strings.
189+
max_out_len (int): The maximum length of the output.
190+
min_out_len (Optional[int]): The minimum length of the output.
191+
192+
Returns:
193+
List[str]: A list of generated strings.
194+
"""
195+
generation_kwargs = kwargs.copy()
196+
generation_kwargs.update(self.generation_kwargs)
197+
198+
messages = list(inputs)
199+
batch_size = len(messages)
200+
prompt_char_lens = None
201+
202+
if self.extract_pred_after_decode:
203+
prompt_char_lens = [len(text) for text in messages]
204+
205+
if self.use_fastchat_template:
206+
try:
207+
from fastchat.model import get_conversation_template
208+
except ModuleNotFoundError:
209+
raise ModuleNotFoundError(
210+
'Fastchat is not implemented. You can use '
211+
"'pip install \"fschat[model_worker,webui]\"' "
212+
'to implement fastchat.')
213+
for idx, text in enumerate(messages):
214+
conv = get_conversation_template('vicuna')
215+
conv.append_message(conv.roles[0], text)
216+
conv.append_message(conv.roles[1], None)
217+
messages[idx] = conv.get_prompt()
218+
if self.mode == 'mid':
219+
assert len(messages) == 1
220+
tokens = self.tokenizer(messages, padding=False, truncation=False, return_tensors='np')
221+
input_ids = tokens['input_ids']
222+
if input_ids.shape[-1] > self.max_seq_len:
223+
input_ids = np.concatenate([input_ids[:, : self.max_seq_len // 2], input_ids[:, - self.max_seq_len // 2:]], axis=-1)
224+
tokens = {'input_ids': input_ids}
225+
else:
226+
tokenize_kwargs = dict(
227+
padding=True,
228+
truncation=True,
229+
max_length=self.max_seq_len,
230+
return_tensors='np'
231+
)
232+
tokens = self.tokenizer(messages, **tokenize_kwargs)
233+
234+
input_ids = tokens['input_ids']
235+
if len(messages) > 1:
236+
attention_mask = tokens.get('attention_mask')
237+
prompt_token_lens = (
238+
attention_mask.sum(axis=1).astype(int).tolist()
239+
if attention_mask is not None else
240+
[input_ids.shape[1]] * batch_size
241+
)
242+
else:
243+
prompt_token_lens = [len(ids) for ids in input_ids]
244+
245+
input_ids_tensor = Tensor(input_ids)
246+
247+
if min_out_len is not None:
248+
generation_kwargs['min_new_tokens'] = min_out_len
249+
generation_kwargs['max_new_tokens'] = max_out_len
250+
generation_kwargs.setdefault('top_k', 1)
251+
generation_kwargs.setdefault('return_dict_in_generate', False)
252+
253+
origin_stopping_criteria = list(stopping_criteria)
254+
if stopping_criteria:
255+
if self.tokenizer.eos_token is not None:
256+
stopping_criteria = stopping_criteria + [
257+
self.tokenizer.eos_token
258+
]
259+
stopping_list = transformers.StoppingCriteriaList([
260+
*[
261+
MultiTokenEOSCriteria(sequence, self.tokenizer,
262+
input_ids_tensor.shape[0])
263+
for sequence in stopping_criteria
264+
],
265+
])
266+
generation_kwargs['stopping_criteria'] = stopping_list
267+
268+
generation_kwargs = drop_error_generation_kwargs(generation_kwargs)
269+
270+
outputs = self.model.generate(input_ids=input_ids_tensor,
271+
**generation_kwargs)
272+
273+
if isinstance(outputs, dict):
274+
outputs = outputs.get('sequences', outputs)
275+
if outputs is None:
276+
raise ValueError("Model output dictionary is missing 'sequence' key.")
277+
278+
sequences = [seq.tolist() for seq in outputs]
279+
280+
if not self.extract_pred_after_decode:
281+
sequences = [
282+
seq[prompt_len:]
283+
for seq, prompt_len in zip(sequences, prompt_token_lens)
284+
]
285+
286+
decodeds = [
287+
self.tokenizer.decode(seq, skip_special_tokens=True)
288+
for seq in sequences
289+
]
290+
291+
if self.extract_pred_after_decode and prompt_char_lens is not None:
292+
decodeds = [
293+
text[length:]
294+
for text, length in zip(decodeds, prompt_char_lens)
295+
]
296+
297+
if self.end_str:
298+
decodeds = [text.split(self.end_str)[0] for text in decodeds]
299+
if origin_stopping_criteria:
300+
for token in origin_stopping_criteria:
301+
decodeds = [text.split(token)[0] for text in decodeds]
302+
return decodeds

ais_bench/benchmark/tasks/openicl_infer.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def __init__(self, cfg: ConfigDict):
4343
self.node_rank = run_cfg.get('node_rank', 0)
4444
self.master_addr = run_cfg.get('master_addr', "localhost")
4545
self.logger.debug(f"Local infer task config: {run_cfg}")
46+
self.abbr = self.model_cfg.get('abbr', '')
4647

4748
def get_command(self, cfg_path, template):
4849
"""Get the command template for the task.
@@ -61,12 +62,38 @@ def get_command(self, cfg_path, template):
6162
for key in backend_keys)
6263
if self.num_gpus > 1 and not use_backend and self.nnodes == 1:
6364
port = random.randint(12000, 32000)
64-
command = (f'torchrun --master_port={port} '
65-
f'--nproc_per_node {self.num_procs} '
66-
f'{script_path} {cfg_path}')
65+
if self.abbr == 'mindformer-model':
66+
command = (
67+
f"msrun "
68+
f"--worker_num={self.num_gpus} "
69+
f"--local_worker_num={self.num_gpus} "
70+
f"--master_port={port} "
71+
f"--log_dir='output/msrun_log' "
72+
f"--join=True "
73+
f"--cluster_time_out=7200 "
74+
f'{script_path} {cfg_path}'
75+
)
76+
else :
77+
command = (f'torchrun --master_port={port} '
78+
f'--nproc_per_node {self.num_procs} '
79+
f'{script_path} {cfg_path}')
6780
elif self.nnodes > 1:
6881
port = 12345
69-
command = (f'torchrun --master_port={port} '
82+
if self.abbr == "mindformer-model" :
83+
command = (
84+
f"msrun "
85+
f"--worker_num={self.nnodes*self.num_procs} "
86+
f"--local_worker_num={self.num_procs} "
87+
f"--master_port={port} "
88+
f"--master_addr={self.master_addr} "
89+
f"--node_rank={self.node_rank} "
90+
f"--log_dir='output/msrun_log' "
91+
f"--join=True "
92+
f"--cluster_time_out=7200 "
93+
f'{script_path} {cfg_path}'
94+
)
95+
else :
96+
command = (f'torchrun --master_port={port} '
7097
f'--nproc_per_node {self.num_procs} '
7198
f'--nnodes {self.nnodes} '
7299
f'--node_rank {self.node_rank} '

0 commit comments

Comments
 (0)