在NPU上的切片操作x=x[:,::-1,:,:]不生效的分析解决

1 系统环境

硬件环境(Ascend/GPU/CPU): Ascend&GPU
MindSpore版本: 1.9.0
执行模式(PyNative/ Graph): 不限
Python版本: 3.7.5
操作系统平台:Linux

2 报错信息

2.1 问题描述

NPU上的切片操作x=x[:,::-1,:,:]不生效,在GPU上生效

2.2 报错信息

GPU运行结果:

import os
import cv2
import sys
import numpy as np
import mindspore
from mindspore import context
from mindspore import nn
if __name__ == '__main__':
    # device_id = int(os.getenv('DEVICE_ID'), 0)
    # context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend",
                        device_id=device_id, save_graphs=False)
    context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False)
    # create backbone
    x=cv2.imread('0100000000.jpg').transpose(2,0,1).astype(np.float32)
    x=mindspore.Tensor(x)[None,:,:,:]
    x=x[:,::-1,:,:]
    print(x)

Tensor(shape=[1,3,1080,1920],dtype=Float32, value= 
[[[[ 4.000000000+01, 4.100000000+81, 4.180000000+01...1.000080880+88, 1.080808880+80, 1.000000000+00],
    [ 4.100000000+01, 4.100000000+01, 4.100000000+01...1.000000000+80, 1.000008000+00, 1.000000000+00],
    [ 4.900000000+01, 4.980000000+01, 4.900000000+01...1.000000000+00, 1.000008080+00, 1.000000000+00],
    ...
    [ 5.500000000+01, 5.400000080+01, 5.480808080+01...1.000000000+01, 1.000000080+01, 1.000000000+01],
    [ 5.000000000+01, 5.000000000+01, 5.000008000+01...9.000000000+00, 9.000800000+00, 9.000000000+00],
    [ 5.000000000+01, 5.000000000+01, 4.900000000+01...8.000000000+00, 8.080808080+88, 8.000000000+00]],
    [[ 4.400000000+01, 4.400000000+01, 4.400000000+01...1.000000000+01, 1.000008000+01, 1.000000ø00+01],
    [ 4.400000000+01, 4.400000000+01, 4.400000000+01...1.000000000+01, 1.000000000+01, 1.000000000+01],
    [ 4.400000000+01, 4.400000000+01, 4.400000000+01...1.000000000+01, 1.000000080+01, 1.000000000+01],
    ...
    [ 4.500000000+01, 4.400000000+01, 4.400000080+01...1.000000000+01, 1.000000000+01, 1.000800080+01],
    [ 4.100000000+01, 4.100008000+01, 4.100008000+01...9.080080880+80, 9.080808080+88, 9.000000000+00],
    [ 4.100000000+01, 4.100000000+01, 4.000000000+01...8.000000000+00, 8.000000000+00, 8.000000000+00]],
    [[ 4.000000000+01, 4.000000000+01, 4.000000000+01...1.400000000+01, 1.400000000+01, 1.400008000+01],
    [ 4.000000000+01, 4.080000000+01, 4.000000000+01...1.400000000+01, 1.480000000+01, 1.400000000+01],
    [ 4.000000000+01, 4.000000000+01, 4.000000000+01...1.400000000+01, 1.400000000+01, 1.400000000+01],
    ...
    [ 9.500000000+01, 9.400000000+01, 9.480808080+01...7.088000000+01, 7.080808080+01, 7.000000080+01],
    [ 9.100000000+01, 9.100000000+01, 9.100000000+01...7.000000000+00, 7.000000000+00, 7.000000000+00],
    [ 9.100000000+01, 9.100000000+01, 9.000008080+01...7.000000000+00, 7.000000000+00, 7.000000000+00]]]])

ASCEND 910上运行结果:

2.3 脚本代码(代码格式,可上传附件)

import os
import cv2
import sys
import numpy as np
import mindspore
from mindspore import context
from mindspore import nn
if __name__ == '__main__':
    device_id = int(os.getenv('DEVICE_ID'), 0)
    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend",
                        device_id=device_id, save_graphs=False)
    # context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False)
    #create backbone
    x=cv2.imread('0100000000.jpg').transpose(2,0,1).astype(np.float32)
    x=mindspore.Tensor(x)[None,:,:,:]
    x=x[:,::-1,:,:]
    print(x)

Tensor(shape=[1,3,1080,1920],dtype=Float32, value= 
[[[[ 9.100000000+01, 9.100000000+81, 9.180000000+01...7.000080880+88, 7.080808880+80, 7.000000000+00],
    [ 9.100000000+01, 9.100000000+01, 9.100000000+01...7.000000000+80, 7.000008000+00, 7.000000000+00],
    [ 8.900000000+01, 8.980000000+01, 8.900000000+01...7.000000000+00, 7.000008080+00, 7.000000000+00],
    ...
    [ 5.500000000+01, 5.400000080+01, 5.480808080+01...1.000000000+01, 1.000000080+01, 1.000000000+01],
    [ 5.000000000+01, 5.000000000+01, 5.000008000+01...9.000000000+00, 9.000800000+00, 9.000000000+00],
    [ 5.000000000+01, 5.000000000+01, 4.900000000+01...8.000000000+00, 8.080808080+88, 8.000000000+00]],
    [[ 4.400000000+01, 4.400000000+01, 4.400000000+01...1.000000000+01, 1.000008000+01, 1.000000ø00+01],
    [ 4.400000000+01, 4.400000000+01, 4.400000000+01...1.000000000+01, 1.000000000+01, 1.000000000+01],
    [ 4.400000000+01, 4.400000000+01, 4.400000000+01...1.000000000+01, 1.000000080+01, 1.000000000+01],
    ...
    [ 4.500000000+01, 4.400000000+01, 4.400000080+01...1.000000000+01, 1.000000000+01, 1.000800080+01],
    [ 4.100000000+01, 4.100008000+01, 4.100008000+01...9.080080880+80, 9.080808080+88, 9.000000000+00],
    [ 4.100000000+01, 4.100000000+01, 4.000000000+01...8.000000000+00, 8.000000000+00, 8.000000000+00]],
    [[ 4.000000000+01, 4.000000000+01, 4.000000000+01...1.400000000+01, 1.400000000+01, 1.400008000+01],
    [ 4.000000000+01, 4.080000000+01, 4.000000000+01...1.400000000+01, 1.480000000+01, 1.400000000+01],
    [ 4.000000000+01, 4.000000000+01, 4.000000000+01...1.400000000+01, 1.400000000+01, 1.400000000+01],
    ...
    [ 3.500000000+01, 3.400000000+01, 3.480808080+01...1.088000000+01, 1.080808080+01, 1.000000080+01],
    [ 3.100000000+01, 3.100000000+01, 3.100000000+01...9.000000000+00, 9.000000000+00, 9.000000000+00],
    [ 3.100000000+01, 3.100000000+01, 3.000008080+01...8.000000000+00, 8.000000000+00, 8.000000000+00]]]])

3 根因分析

在进行切片操时,是会转换成算子ScatterNdUpdate进行计算,这个算子在ascend 上没有所能支持的规格。

4 解决方案

x=cv2.imread('0100000000.jpg').transpose(2,0,1).astype(np.float32)

可以将要切片的数据在这一步转换成fp16或者fp32的类型。