问题描述
使用Ascend算力微调模型时,虽然已指定Ascend设备,但在测试模型推理时仍使用CPU导致报错
微调模型
`deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B`
!modelscope download --model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --local_dir ./Qwen-1.5B
版本说明
- mindspore==2.7.0
- mindnlp==0.5.0
源代码
import mindspore as ms
# 只有显示的指定"Ascend"才会使用NPU
ms.set_device('Ascend')
!npu-smi info
from mindnlp.transformers import AutoModelForCausalLM, AutoTokenizer
import mindspore as ms
model = AutoModelForCausalLM.from_pretrained("./Qwen-1.5B", ms_dtype=ms.bfloat16) # 注意,之后希望更换基座模型的话,请修改这里的模型下载后的地址
tokenizer = AutoTokenizer.from_pretrained("./Qwen-1.5B", ms_dtype=ms.bfloat16)
# 输入的文本⬇️
prompt = "告诉我你是谁"
# 模型推理代码⬇️
inputs = tokenizer(prompt, return_tensors="ms")
print("inputs:",inputs)
outputs = model.generate(**inputs, temperature=0.5, max_new_tokens=128, do_sample=True)
result = tokenizer.decode(outputs[0])
print(result)
报错信息
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File /usr/local/python3.10.14/lib/python3.10/site-packages/transformers/utils/generic.py:1071, in check_model_inputs.<locals>.wrapper(self, *args, **kwargs)
1070 try:
-> 1071 outputs = func(self, *args, **kwargs_without_recordable)
1072 except TypeError:
File /usr/local/python3.10.14/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py:345, in Qwen2Model.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, cache_position, **kwargs)
344 if inputs_embeds is None:
--> 345 inputs_embeds = self.embed_tokens(input_ids)
347 if use_cache and past_key_values is None:
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/module.py:826, in Module._wrapped_call_impl(self, *args, **kwargs)
825 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
--> 826 return self._call_impl(*args, **kwargs)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/module.py:840, in Module._call_impl(self, *args, **kwargs)
837 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
838 or _global_backward_pre_hooks or _global_backward_hooks
839 or _global_forward_hooks or _global_forward_pre_hooks):
--> 840 return forward_call(*args, **kwargs)
842 try:
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/sparse.py:192, in Embedding.forward(self, input)
191 def forward(self, input: Tensor) -> Tensor:
--> 192 return F.embedding(
193 input,
194 self.weight,
195 self.padding_idx,
196 self.max_norm,
197 self.norm_type,
198 self.scale_grad_by_freq,
199 self.sparse,
200 )
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/functional.py:184, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
183 def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False):
--> 184 return execute('embedding', input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/executor.py:6, in execute(func_name, *args, **kwargs)
5 def execute(func_name, *args, **kwargs):
----> 6 out, device = dispatcher.dispatch(func_name, *args, **kwargs)
7 # if MS27:
8 # return out
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/dispatcher.py:57, in Dispatcher.dispatch(self, func_name, *args, **kwargs)
54 raise RuntimeError(
55 f"No implementation for function: {func_name} on {device_type}."
56 )
---> 57 return func(*args, **kwargs), device
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/_apis/cpu.py:405, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
404 def embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq):
--> 405 return cast(legacy.gather(weight, input, 0, 0), weight.dtype)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/_op_prim/cpu/legacy.py:1770, in gather(*args)
1769 op = _get_cache_prim(Gather)(*args[-1:]).set_device('CPU')
-> 1770 return op(*args[:-1])
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindspore/ops/auto_generate/gen_ops_prim.py:6025, in Gather.__call__(self, input_params, input_indices, axis)
6024 def __call__(self, input_params, input_indices, axis):
-> 6025 return super().__call__(input_params, input_indices, axis, self.batch_dims)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindspore/ops/primitive.py:413, in Primitive.__call__(self, *args)
412 return output
--> 413 return _run_op(self, self.name, args)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindspore/ops/primitive.py:1022, in _run_op(obj, op_name, args)
1021 """Single op execution function supported by ge in PyNative mode."""
-> 1022 res = _pynative_executor.run_op_async(obj, op_name, args)
1023 # Add for jit context.
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindspore/common/api.py:1638, in _PyNativeExecutor.run_op_async(self, *args)
1629 """
1630 Run single op async.
1631
(...)
1636 StubNode, result of run op.
1637 """
-> 1638 return self._executor.run_op_async(*args)
TypeError:
----------------------------------------------------
- Kernel select failed:
----------------------------------------------------
Select CPU operator[Gather] fail! Unsupported data type!
The supported data types are input[UInt8 Int32 Int64 Int64], output[UInt8]; input[UInt16 Int32 Int64 Int64], output[UInt16]; input[UInt32 Int32 Int64 Int64], output[UInt32]; input[UInt64 Int32 Int64 Int64], output[UInt64]; input[Int8 Int32 Int64 Int64], output[Int8]; input[Int16 Int32 Int64 Int64], output[Int16]; input[Int32 Int32 Int64 Int64], output[Int32]; input[Int64 Int32 Int64 Int64], output[Int64]; input[Float16 Int32 Int64 Int64], output[Float16]; input[Float32 Int32 Int64 Int64], output[Float32]; input[Float64 Int32 Int64 Int64], output[Float64]; input[Bool Int32 Int64 Int64], output[Bool]; input[Complex64 Int32 Int64 Int64], output[Complex64]; input[Complex128 Int32 Int64 Int64], output[Complex128]; , but get input[BFloat16 Int64 Int64 Int64 ] and output[BFloat16 ]
node: @pynative_kernel_graph4000000013:CNode_10{[0]: ValueNode<Primitive> PrimFunc_Gather, [1]: @pynative_kernel_graph4000000013:param_Parameter_11, [2]: @pynative_kernel_graph4000000013:param_Parameter_12, [3]: ValueNode<Int64Imm> 0, [4]: ValueNode<Int64Imm> 0}
----------------------------------------------------
- C++ Call Stack: (For framework developers)
----------------------------------------------------
mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc:514 SetOperatorInfo
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
Cell In[12], line 6
4 inputs = tokenizer(prompt, return_tensors="ms")
5 print("inputs:",inputs)
----> 6 outputs = model.generate(**inputs, temperature=0.5, max_new_tokens=128, do_sample=True)
7 result = tokenizer.decode(outputs[0])
8 print(result)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/utils/_contextlib.py:117, in context_decorator.<locals>.decorate_context(*args, **kwargs)
114 @functools.wraps(func)
115 def decorate_context(*args, **kwargs):
116 with ctx_factory():
--> 117 return func(*args, **kwargs)
File /usr/local/python3.10.14/lib/python3.10/site-packages/transformers/generation/utils.py:2564, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, use_model_defaults, custom_generate, **kwargs)
2561 model_kwargs["use_cache"] = generation_config.use_cache
2563 # 9. Call generation mode
-> 2564 result = decoding_method(
2565 self,
2566 input_ids,
2567 logits_processor=prepared_logits_processor,
2568 stopping_criteria=prepared_stopping_criteria,
2569 generation_config=generation_config,
2570 **generation_mode_kwargs,
2571 **model_kwargs,
2572 )
2574 # Convert to legacy cache format if requested
2575 if (
2576 generation_config.return_legacy_cache is True
2577 and hasattr(result, "past_key_values")
2578 and getattr(result.past_key_values, "to_legacy_cache") is not None
2579 ):
File /usr/local/python3.10.14/lib/python3.10/site-packages/transformers/generation/utils.py:2784, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
2781 model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
2783 if is_prefill:
-> 2784 outputs = self(**model_inputs, return_dict=True)
2785 is_prefill = False
2786 else:
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/module.py:826, in Module._wrapped_call_impl(self, *args, **kwargs)
824 if self._compiled_call_impl is not None:
825 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
--> 826 return self._call_impl(*args, **kwargs)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/module.py:840, in Module._call_impl(self, *args, **kwargs)
835 return forward_call(*args, **kwargs)
837 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
838 or _global_backward_pre_hooks or _global_backward_hooks
839 or _global_forward_hooks or _global_forward_pre_hooks):
--> 840 return forward_call(*args, **kwargs)
842 try:
843 result = None
File /usr/local/python3.10.14/lib/python3.10/site-packages/transformers/utils/generic.py:918, in can_return_tuple.<locals>.wrapper(self, *args, **kwargs)
916 if return_dict_passed is not None:
917 return_dict = return_dict_passed
--> 918 output = func(self, *args, **kwargs)
919 if not return_dict and not isinstance(output, tuple):
920 output = output.to_tuple()
File /usr/local/python3.10.14/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py:449, in Qwen2ForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, cache_position, logits_to_keep, **kwargs)
417 @can_return_tuple
418 @auto_docstring
419 def forward(
(...)
430 **kwargs: Unpack[TransformersKwargs],
431 ) -> CausalLMOutputWithPast:
432 r"""
433 Example:
434
(...)
447 "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
448 ```"""
--> 449 outputs: BaseModelOutputWithPast = self.model(
450 input_ids=input_ids,
451 attention_mask=attention_mask,
452 position_ids=position_ids,
453 past_key_values=past_key_values,
454 inputs_embeds=inputs_embeds,
455 use_cache=use_cache,
456 cache_position=cache_position,
457 **kwargs,
458 )
460 hidden_states = outputs.last_hidden_state
461 # Only compute necessary logits, and do not upcast them to float if we are not computing the loss
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/module.py:826, in Module._wrapped_call_impl(self, *args, **kwargs)
824 if self._compiled_call_impl is not None:
825 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
--> 826 return self._call_impl(*args, **kwargs)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/module.py:840, in Module._call_impl(self, *args, **kwargs)
835 return forward_call(*args, **kwargs)
837 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
838 or _global_backward_pre_hooks or _global_backward_hooks
839 or _global_forward_hooks or _global_forward_pre_hooks):
--> 840 return forward_call(*args, **kwargs)
842 try:
843 result = None
File /usr/local/python3.10.14/lib/python3.10/site-packages/transformers/utils/generic.py:1073, in check_model_inputs.<locals>.wrapper(self, *args, **kwargs)
1071 outputs = func(self, *args, **kwargs_without_recordable)
1072 except TypeError:
-> 1073 raise original_exception
1074 raise TypeError(
1075 "Missing `**kwargs` in the signature of the `@check_model_inputs`-decorated function "
1076 f"({func.__qualname__})"
1077 )
1079 # Restore original forward methods
File /usr/local/python3.10.14/lib/python3.10/site-packages/transformers/utils/generic.py:1064, in check_model_inputs.<locals>.wrapper(self, *args, **kwargs)
1061 monkey_patched_layers.append((module, original_forward))
1063 try:
-> 1064 outputs = func(self, *args, **kwargs)
1065 except TypeError as original_exception:
1066 # If we get a TypeError, it's possible that the model is not receiving the recordable kwargs correctly.
1067 # Get a TypeError even after removing the recordable kwargs -> re-raise the original exception
1068 # Otherwise -> we're probably missing `**kwargs` in the decorated function
1069 kwargs_without_recordable = {k: v for k, v in kwargs.items() if k not in recordable_keys}
File /usr/local/python3.10.14/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py:345, in Qwen2Model.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, cache_position, **kwargs)
342 raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
344 if inputs_embeds is None:
--> 345 inputs_embeds = self.embed_tokens(input_ids)
347 if use_cache and past_key_values is None:
348 past_key_values = DynamicCache(config=self.config)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/module.py:826, in Module._wrapped_call_impl(self, *args, **kwargs)
824 if self._compiled_call_impl is not None:
825 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
--> 826 return self._call_impl(*args, **kwargs)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/module.py:840, in Module._call_impl(self, *args, **kwargs)
835 return forward_call(*args, **kwargs)
837 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
838 or _global_backward_pre_hooks or _global_backward_hooks
839 or _global_forward_hooks or _global_forward_pre_hooks):
--> 840 return forward_call(*args, **kwargs)
842 try:
843 result = None
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/modules/sparse.py:192, in Embedding.forward(self, input)
191 def forward(self, input: Tensor) -> Tensor:
--> 192 return F.embedding(
193 input,
194 self.weight,
195 self.padding_idx,
196 self.max_norm,
197 self.norm_type,
198 self.scale_grad_by_freq,
199 self.sparse,
200 )
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/nn/functional.py:184, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
183 def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False):
--> 184 return execute('embedding', input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/executor.py:6, in execute(func_name, *args, **kwargs)
5 def execute(func_name, *args, **kwargs):
----> 6 out, device = dispatcher.dispatch(func_name, *args, **kwargs)
7 # if MS27:
8 # return out
10 if not isinstance(out, (tuple, list)):
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/dispatcher.py:57, in Dispatcher.dispatch(self, func_name, *args, **kwargs)
53 if func is None:
54 raise RuntimeError(
55 f"No implementation for function: {func_name} on {device_type}."
56 )
---> 57 return func(*args, **kwargs), device
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/_apis/cpu.py:405, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
404 def embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq):
--> 405 return cast(legacy.gather(weight, input, 0, 0), weight.dtype)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindtorch/_op_prim/cpu/legacy.py:1770, in gather(*args)
1768 def gather(*args):
1769 op = _get_cache_prim(Gather)(*args[-1:]).set_device('CPU')
-> 1770 return op(*args[:-1])
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindspore/ops/auto_generate/gen_ops_prim.py:6025, in Gather.__call__(self, input_params, input_indices, axis)
6024 def __call__(self, input_params, input_indices, axis):
-> 6025 return super().__call__(input_params, input_indices, axis, self.batch_dims)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindspore/ops/primitive.py:413, in Primitive.__call__(self, *args)
411 if should_elim:
412 return output
--> 413 return _run_op(self, self.name, args)
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindspore/ops/primitive.py:1022, in _run_op(obj, op_name, args)
1020 def _run_op(obj, op_name, args):
1021 """Single op execution function supported by ge in PyNative mode."""
-> 1022 res = _pynative_executor.run_op_async(obj, op_name, args)
1023 # Add for jit context.
1024 if jit_context():
File /usr/local/python3.10.14/lib/python3.10/site-packages/mindspore/common/api.py:1638, in _PyNativeExecutor.run_op_async(self, *args)
1628 def run_op_async(self, *args):
1629 """
1630 Run single op async.
1631
(...)
1636 StubNode, result of run op.
1637 """
-> 1638 return self._executor.run_op_async(*args)
TypeError:
----------------------------------------------------
- Kernel select failed:
----------------------------------------------------
Select CPU operator[Gather] fail! Unsupported data type!
The supported data types are input[UInt8 Int32 Int64 Int64], output[UInt8]; input[UInt16 Int32 Int64 Int64], output[UInt16]; input[UInt32 Int32 Int64 Int64], output[UInt32]; input[UInt64 Int32 Int64 Int64], output[UInt64]; input[Int8 Int32 Int64 Int64], output[Int8]; input[Int16 Int32 Int64 Int64], output[Int16]; input[Int32 Int32 Int64 Int64], output[Int32]; input[Int64 Int32 Int64 Int64], output[Int64]; input[Float16 Int32 Int64 Int64], output[Float16]; input[Float32 Int32 Int64 Int64], output[Float32]; input[Float64 Int32 Int64 Int64], output[Float64]; input[Bool Int32 Int64 Int64], output[Bool]; input[Complex64 Int32 Int64 Int64], output[Complex64]; input[Complex128 Int32 Int64 Int64], output[Complex128]; , but get input[BFloat16 Int64 Int64 Int64 ] and output[BFloat16 ]
node: @pynative_kernel_graph4000000012:CNode_7{[0]: ValueNode<Primitive> PrimFunc_Gather, [1]: @pynative_kernel_graph4000000012:param_Parameter_8, [2]: @pynative_kernel_graph4000000012:param_Parameter_9, [3]: ValueNode<Int64Imm> 0, [4]: ValueNode<Int64Imm> 0}
----------------------------------------------------
- C++ Call Stack: (For framework developers)
----------------------------------------------------
mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc:514 SetOperatorInfo