Skip to content

Commit 9377abd

Browse files
author
李宜杰
committed
local eval add mindformers model
1 parent 2d794cf commit 9377abd

5 files changed

Lines changed: 370 additions & 9 deletions

File tree

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from ais_bench.benchmark.models import MindFormerModel
2+
3+
models = [
4+
dict(
5+
attr="local", # local or service
6+
type=MindFormerModel, # transformers < 4.33.0 用这个,优先AutoModelForCausalLM.from_pretrained加载模型,失败则用AutoModel.from_pretrained加载
7+
abbr='mindformer-model',
8+
path='THUDM/chatglm-6b', # path to model dir, current value is just a example
9+
checkpoint = 'THUDM/your_checkpoint', # path to checkpoint file, current value is just a example
10+
yaml_cfg_file = 'THUDM/your.yaml',
11+
tokenizer_path='THUDM/chatglm-6b', # path to tokenizer dir, current value is just a example
12+
model_kwargs=dict( # 模型参数参考 huggingface.co/docs/transformers/v4.50.0/en/model_doc/auto#transformers.AutoModel.from_pretrained
13+
device_map='npu',
14+
),
15+
tokenizer_kwargs=dict( # tokenizer参数参考 huggingface.co/docs/transformers/v4.50.0/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase
16+
padding_side='right',
17+
),
18+
generation_kwargs = dict( # 后处理参数参考huggingface.co/docs/transformers/main_classes/test_generation
19+
temperature = 0.5,
20+
top_k = 10,
21+
top_p = 0.95,
22+
do_sample = True,
23+
seed = None,
24+
repetition_penalty = 1.03,
25+
),
26+
run_cfg = dict(num_gpus=1, num_procs=1), # 多卡/多机多卡 参数,使用torchrun拉起任务
27+
max_out_len=100, # 最大输出token长度
28+
batch_size=2, # 每次推理的batch size
29+
max_seq_len=2048,
30+
batch_padding=True,
31+
)
32+
]

ais_bench/benchmark/models/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@
1414
from ais_bench.benchmark.models.api_models.triton_api import TritonCustomAPIStream # noqa: F401
1515
from ais_bench.benchmark.models.api_models.tgi_api import TGICustomAPIStream # noqa: F401
1616
from ais_bench.benchmark.models.api_models.vllm_custom_api_chat import VllmMultiturnAPIChatStream # noqa: F401
17-
from ais_bench.benchmark.models.local_models.vllm_offline_vl import VLLMOfflineVLModel
17+
from ais_bench.benchmark.models.local_models.vllm_offline_vl import VLLMOfflineVLModel
18+
from ais_bench.benchmark.models.local_models.mindformers_model import MindFormerModel
Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
import os, sys
2+
from typing import Dict, List, Optional, Union
3+
4+
import numpy as np
5+
import torch
6+
import transformers
7+
8+
from ais_bench.benchmark.models.base import BaseModel
9+
from ais_bench.benchmark.models.base_api import APITemplateParser
10+
from ais_bench.benchmark.registry import MODELS
11+
from ais_bench.benchmark.utils.logging import get_logger
12+
from ais_bench.benchmark.utils.prompt import PromptList
13+
14+
from mindspore import Tensor, Model
15+
from mindformers import MindFormerConfig, build_context
16+
from mindformers.models import build_network
17+
from mindformers.core.parallel_config import build_parallel_config
18+
from mindformers.utils.load_checkpoint_utils import get_load_path_after_hf_convert
19+
from mindformers.trainer.utils import transform_and_load_checkpoint
20+
21+
PromptType = Union[PromptList, str, dict]
22+
23+
24+
class MultiTokenEOSCriteria(transformers.StoppingCriteria):
25+
"""Criteria to stop on the specified multi-token sequence."""
26+
27+
def __init__(
28+
self,
29+
sequence: str,
30+
tokenizer: transformers.PreTrainedTokenizer,
31+
batch_size: int,
32+
):
33+
self.done_tracker = [False] * batch_size
34+
self.sequence = sequence
35+
self.sequence_ids = tokenizer.encode(sequence,
36+
add_special_tokens=False)
37+
self.sequence_id_len = len(self.sequence_ids)
38+
self.tokenizer = tokenizer
39+
40+
def __call__(self, input_ids, scores, **kwargs) -> bool:
41+
# compare the last len(stop) tokens
42+
lookback_ids_batch = input_ids[:, -self.sequence_id_len:]
43+
lookback_tokens_batch = self.tokenizer.batch_decode(lookback_ids_batch)
44+
for i, done in enumerate(self.done_tracker):
45+
if done:
46+
continue
47+
self.done_tracker[i] = self.sequence in lookback_tokens_batch[i]
48+
return False not in self.done_tracker
49+
50+
51+
def drop_error_generation_kwargs(generation_kwargs: dict) -> dict:
52+
for key in ['is_synthetic', 'batch_size', 'do_performance']:
53+
if key in generation_kwargs:
54+
generation_kwargs.pop(key)
55+
return generation_kwargs
56+
57+
58+
@MODELS.register_module()
59+
class MindFormerModel(BaseModel):
60+
61+
def __init__(self,
62+
path: str,
63+
checkpoint: Optional[str] = None,
64+
yaml_cfg_file: Optional[str] = None,
65+
batch_size: int = 1,
66+
max_seq_len: int = 2048,
67+
tokenizer_path: Optional[str] = None,
68+
tokenizer_kwargs: dict = dict(),
69+
tokenizer_only: bool = False,
70+
generation_kwargs: dict = dict(),
71+
meta_template: Optional[Dict] = None,
72+
extract_pred_after_decode: bool = False,
73+
batch_padding: bool = False,
74+
pad_token_id: Optional[int] = None,
75+
mode: str = 'none',
76+
use_fastchat_template: bool = False,
77+
end_str: Optional[str] = None,
78+
**kwargs):
79+
super().__init__(path=path,
80+
max_seq_len=max_seq_len,
81+
tokenizer_only=tokenizer_only,
82+
meta_template=meta_template)
83+
self.logger = get_logger()
84+
self.batch_size = batch_size
85+
self.pad_token_id = pad_token_id
86+
self.pretrained_model_path = path
87+
assert mode in ['none', 'mid']
88+
self.mode = mode
89+
self.config = MindFormerConfig(yaml_cfg_file)
90+
self.checkpoint = checkpoint
91+
self._load_tokenizer(path=path,
92+
tokenizer_path=tokenizer_path,
93+
tokenizer_kwargs=tokenizer_kwargs)
94+
self.batch_padding = batch_padding
95+
self.extract_pred_after_decode = extract_pred_after_decode
96+
if not tokenizer_only:
97+
self._load_model(self.config, self.batch_size, self.max_seq_len)
98+
self.generation_kwargs = generation_kwargs
99+
self.use_fastchat_template = use_fastchat_template
100+
self.end_str = end_str
101+
102+
def _load_tokenizer(self, path: str, tokenizer_path: Optional[str],
103+
tokenizer_kwargs: dict):
104+
from transformers import AutoTokenizer, GenerationConfig
105+
106+
DEFAULT_TOKENIZER_KWARGS = dict(padding_side='left', truncation_side='left', trust_remote_code=True)
107+
kwargs = DEFAULT_TOKENIZER_KWARGS
108+
kwargs.update(tokenizer_kwargs)
109+
110+
load_path = tokenizer_path if tokenizer_path else path
111+
self.tokenizer = AutoTokenizer.from_pretrained(load_path, **kwargs)
112+
113+
pad_token_id = self.pad_token_id
114+
115+
# A patch for some models without pad_token_id
116+
if pad_token_id is not None:
117+
if self.tokenizer.pad_token_id is None:
118+
self.logger.debug(f'Using {pad_token_id} as pad_token_id')
119+
elif self.tokenizer.pad_token_id != pad_token_id:
120+
self.logger.warning(f'pad_token_id is not consistent. Using {pad_token_id} as pad_token_id')
121+
self.tokenizer.pad_token_id = pad_token_id
122+
return
123+
if self.tokenizer.pad_token_id is not None:
124+
return
125+
self.logger.warning('pad_token_id is not set for the tokenizer.')
126+
127+
try:
128+
generation_config = GenerationConfig.from_pretrained(path)
129+
except Exception:
130+
generation_config = None
131+
132+
if generation_config and generation_config.pad_token_id is not None:
133+
self.logger.warning(f'Using {generation_config.pad_token_id} as pad_token_id.')
134+
self.tokenizer.pad_token_id = generation_config.pad_token_id
135+
return
136+
if self.tokenizer.eos_token_id is not None:
137+
self.logger.warning(f'Using eos_token_id {self.tokenizer.eos_token_id} as pad_token_id.')
138+
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
139+
return
140+
raise ValueError('pad_token_id is not set for this tokenizer. Please set `pad_token_id={PAD_TOKEN_ID}` in model_cfg.')
141+
142+
def _set_config_from_yaml(self):
143+
self.config.load_checkpoint = self.checkpoint
144+
self.config.model.pretrained_model_dir = self.pretrained_model_path
145+
self.config.model.model_config.seq_length = self.max_seq_len
146+
build_context(self.config)
147+
build_parallel_config(self.config)
148+
149+
def _load_model(self, config, batch_size, max_seq_len):
150+
151+
self._set_config_from_yaml()
152+
try:
153+
self.model = build_network(
154+
config.model,
155+
default_args={
156+
"parallel_config": config.parallel_config,
157+
"moe_config": config.moe_config
158+
})
159+
self.logger.info("..........Network Built Successfully..........")
160+
self.model.set_train(False)
161+
config.load_checkpoint = get_load_path_after_hf_convert(config, self.model)
162+
self.logger.info("load checkpoint path : ",config.load_checkpoint)
163+
run_mode = config.get("run_mode", None)
164+
if run_mode == "predict":
165+
self.model.load_weights(config.load_checkpoint)
166+
else:
167+
model = Model(self.model)
168+
input_ids = Tensor(np.ones((batch_size, max_seq_len), dtype=np.int32))
169+
infer_data = self.model.prepare_inputs_for_predict_layout(input_ids)
170+
transform_and_load_checkpoint(config, model, self.model, infer_data, do_eval=True)
171+
172+
self.logger.info("..........Checkpoint Load Successfully..........")
173+
except ValueError :
174+
raise ValueError('Failed to load MindFormers model, please check configuration')
175+
176+
177+
def generate(self,
178+
inputs: List[str],
179+
max_out_len: int,
180+
min_out_len: Optional[int] = None,
181+
stopping_criteria: List[str] = [],
182+
**kwargs) -> List[str]:
183+
"""Generate results given a list of inputs.
184+
185+
Args:
186+
inputs (List[str]): A list of strings.
187+
max_out_len (int): The maximum length of the output.
188+
min_out_len (Optional[int]): The minimum length of the output.
189+
190+
Returns:
191+
List[str]: A list of generated strings.
192+
"""
193+
generation_kwargs = kwargs.copy()
194+
generation_kwargs.update(self.generation_kwargs)
195+
196+
messages = list(inputs)
197+
batch_size = len(messages)
198+
prompt_char_lens = None
199+
200+
if self.extract_pred_after_decode:
201+
prompt_char_lens = [len(text) for text in messages]
202+
203+
if self.use_fastchat_template:
204+
try:
205+
from fastchat.model import get_conversation_template
206+
except ModuleNotFoundError:
207+
raise ModuleNotFoundError(
208+
'Fastchat is not implemented. You can use '
209+
"'pip install \"fschat[model_worker,webui]\"' "
210+
'to implement fastchat.')
211+
for idx, text in enumerate(messages):
212+
conv = get_conversation_template('vicuna')
213+
conv.append_message(conv.roles[0], text)
214+
conv.append_message(conv.roles[1], None)
215+
messages[idx] = conv.get_prompt()
216+
if self.mode == 'mid':
217+
assert len(messages) == 1
218+
tokens = self.tokenizer(messages, padding=False, truncation=False, return_tensors='np')
219+
input_ids = tokens['input_ids']
220+
if input_ids.shape[-1] > self.max_seq_len:
221+
input_ids = np.concatenate([input_ids[:, : self.max_seq_len // 2], input_ids[:, - self.max_seq_len // 2:]], axis=-1)
222+
tokens = {'input_ids': input_ids}
223+
else:
224+
tokenize_kwargs = dict(
225+
padding=True,
226+
truncation=True,
227+
max_length=self.max_seq_len,
228+
return_tensors='np'
229+
)
230+
tokens = self.tokenizer(messages, **tokenize_kwargs)
231+
232+
input_ids = tokens['input_ids']
233+
if len(messages) > 1:
234+
attention_mask = tokens.get('attention_mask')
235+
prompt_token_lens = (
236+
attention_mask.sum(axis=1).astype(int).tolist()
237+
if attention_mask is not None else
238+
[input_ids.shape[1]] * batch_size
239+
)
240+
else:
241+
prompt_token_lens = [len(ids) for ids in input_ids]
242+
243+
input_ids_tensor = Tensor(input_ids)
244+
245+
if min_out_len is not None:
246+
generation_kwargs['min_new_tokens'] = min_out_len
247+
generation_kwargs['max_new_tokens'] = max_out_len
248+
generation_kwargs.setdefault('top_k', 1)
249+
generation_kwargs.setdefault('return_dict_in_generate', False)
250+
251+
origin_stopping_criteria = list(stopping_criteria)
252+
if stopping_criteria:
253+
if self.tokenizer.eos_token is not None:
254+
stopping_criteria = stopping_criteria + [
255+
self.tokenizer.eos_token
256+
]
257+
stopping_list = transformers.StoppingCriteriaList([
258+
*[
259+
MultiTokenEOSCriteria(sequence, self.tokenizer,
260+
input_ids_tensor.shape[0])
261+
for sequence in stopping_criteria
262+
],
263+
])
264+
generation_kwargs['stopping_criteria'] = stopping_list
265+
266+
generation_kwargs = drop_error_generation_kwargs(generation_kwargs)
267+
268+
outputs = self.model.generate(input_ids=input_ids_tensor,
269+
**generation_kwargs)
270+
271+
if isinstance(outputs, dict):
272+
outputs = outputs.get('sequences', outputs)
273+
274+
sequences = [seq.tolist() for seq in outputs]
275+
276+
if not self.extract_pred_after_decode:
277+
sequences = [
278+
seq[prompt_len:]
279+
for seq, prompt_len in zip(sequences, prompt_token_lens)
280+
]
281+
282+
decodeds = [
283+
self.tokenizer.decode(seq, skip_special_tokens=True)
284+
for seq in sequences
285+
]
286+
287+
if self.extract_pred_after_decode and prompt_char_lens is not None:
288+
decodeds = [
289+
text[length:]
290+
for text, length in zip(decodeds, prompt_char_lens)
291+
]
292+
293+
if self.end_str:
294+
decodeds = [text.split(self.end_str)[0] for text in decodeds]
295+
if origin_stopping_criteria:
296+
for token in origin_stopping_criteria:
297+
decodeds = [text.split(token)[0] for text in decodeds]
298+
return decodeds

ais_bench/benchmark/tasks/openicl_infer.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def __init__(self, cfg: ConfigDict):
3838
super().__init__(cfg)
3939
run_cfg = self.model_cfg.get('run_cfg', {})
4040
self.num_gpus = run_cfg.get('num_gpus', 0)
41+
self.local_worker_num = run_cfg.get('local_worker_num',0)
4142
self.num_procs = run_cfg.get('num_procs', 1)
4243
self.nnodes = run_cfg.get('nnodes', 1)
4344
self.node_rank = run_cfg.get('node_rank', 0)
@@ -61,12 +62,38 @@ def get_command(self, cfg_path, template):
6162
for key in backend_keys)
6263
if self.num_gpus > 1 and not use_backend and self.nnodes == 1:
6364
port = random.randint(12000, 32000)
64-
command = (f'torchrun --master_port={port} '
65-
f'--nproc_per_node {self.num_procs} '
66-
f'{script_path} {cfg_path}')
65+
if self.abbr == 'mindformer-model':
66+
command = (
67+
f"msrun "
68+
f"--worker_num={self.num_gpus} "
69+
f"--local_worker_num={self.num_gpus} "
70+
f"--master_port={port} "
71+
f"--log_dir='output/msrun_log' "
72+
f"--join=True "
73+
f"--cluster_time_out=7200 "
74+
f'{script_path} {cfg_path}'
75+
)
76+
else :
77+
command = (f'torchrun --master_port={port} '
78+
f'--nproc_per_node {self.num_procs} '
79+
f'{script_path} {cfg_path}')
6780
elif self.nnodes > 1:
6881
port = 12345
69-
command = (f'torchrun --master_port={port} '
82+
if self.abbr == "mindformer-model" :
83+
command = (
84+
f"msrun "
85+
f"--worker_num={self.num_procs} "
86+
f"--local_worker_num={self.local_worker_num} "
87+
f"--master_port={port} "
88+
f"--master_addr={self.master_addr} "
89+
f"--node_rank={self.node_rank} "
90+
f"--log_dir='output/msrun_log' "
91+
f"--join=True "
92+
f"--cluster_time_out=7200 "
93+
f'{script_path} {cfg_path}'
94+
)
95+
else :
96+
command = (f'torchrun --master_port={port} '
7097
f'--nproc_per_node {self.num_procs} '
7198
f'--nnodes {self.nnodes} '
7299
f'--node_rank {self.node_rank} '

0 commit comments

Comments
 (0)