1 系统环境
硬件环境(Ascend/GPU/CPU): Ascend
MindSpore版本: mindspore=1.7.0
执行模式(PyNative/ Graph):不限
Python版本: Python=3.7
操作系统平台: 不限
2 报错信息
2.1 问题描述
MindSpore自定义算子使用ops.Custom注册遇到问题
2.2 报错信息
TypeError: In the function ascend_add written in the Hybrid DSL, function output_tensor expects two inputs, but get 1
2.3 脚本代码
import mindspore as ms
from mindspore import nn, ops
from mindspore.ops import ms_hybrid
import numpy as np
import time
@ms_hybrid
def ascend_add(a, b):
c = output_tensor(a.shape, dtype=a.dtype)
for arg in grid(a.shape):
c[arg] = a[arg] + b[arg]
return c
def cpu_add(a, b):
c = np.zeros(a.shape, dtype=a.dtype)
for i in range(a.shape[0]):
for j in range(a.shape[1]):
c[i, j] = a[i, j] + b[i, j]
return c
np_x = np.random.normal(0, 1, [4, 4]).astype(np.float32)
np_y = np.random.normal(0, 1, [4, 4]).astype(np.float32)
start = time.time()
input_x = ms.Tensor(np_x)
input_y = ms.Tensor(np_y)
test_op_akg = ops.Custom(ascend_add)
for _ in range(1000):
c = test_op_akg(input_x, input_y)
#for _ in range(1000):
end = time.time()
print(end-start)
start = time.time()
for _ in range(1000):
c = cpu_add(np_x, np_y)
end = time.time()
print(end-start)
3 根因分析
查看MindSpore Hybrid 语法规范
可以看到 output_tensor的使用规则是:output_tensor(shape, dtype), 而给的脚本的用法是output_tensor(a.shape, dtype=a.dtype)
用法错误,导致报错
4 解决方案
把output_tensor(a.shape, dtype=a.dtype) 改成output_tensor(a.shape, a.dtype)
import mindspore as ms
from mindspore import nn, ops
from mindspore.ops import ms_hybrid
import numpy as np
import time
@ms_hybrid
def ascend_add(a, b):
c = output_tensor(a.shape, a.dtype)
for arg in grid(a.shape):
c[arg] = a[arg] + b[arg]
return c
def cpu_add(a, b):
c = np.zeros(a.shape, dtype=a.dtype)
for i in range(a.shape[0]):
for j in range(a.shape[1]):
c[i, j] = a[i, j] + b[i, j]
return c
np_x = np.random.normal(0, 1, [4, 4]).astype(np.float32)
np_y = np.random.normal(0, 1, [4, 4]).astype(np.float32)
start = time.time()
input_x = ms.Tensor(np_x)
input_y = ms.Tensor(np_y)
test_op_akg = ops.Custom(ascend_add)
for _ in range(1000):
c = test_op_akg(input_x, input_y)
#for _ in range(1000):
end = time.time()
print(end-start)
start = time.time()
for _ in range(1000):
c = cpu_add(np_x, np_y)
end = time.time()
print(end-start)
执行结果:

