MindSpore的Cell.insert_child_to_cell 添加层会出现参数名重复

1 系统环境

硬件环境(Ascend/GPU/CPU): 不限
MindSpore版本: 2.0.0
执行模式(PyNative/ Graph): 不限
Python版本: 3.7.5
操作系统平台: 不限

2 报错信息

2.1 问题描述

运行示例代码出现报错

2.2 报错信息

raise ValueError("The value {} , its name '{}' already exists. " ValueError: The value Parameter (name=0.conv1.weight, shape=(128, 64, 3, 3), dtype=Float32, requires_grad=True) , its name '0.conv1.weight' already exists. Please set a unique name for the parameter.复制

2.3 脚本代码(代码格式,可上传附件)

import mindspore
from mindspore import nn
import mindspore.ops as ops

class Residual(nn.Cell):
    """The Residual block of ResNet."""
    def __init__(self, input_channels, num_channels,
                    use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                                kernel_size=3, padding=1, stride=strides, pad_mode='pad')
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                                kernel_size=3, padding=1, pad_mode='pad')
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                    kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = ops.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return ops.relu(Y)

def resnet_block(in_channels, out_channels, num_residuals,
                    first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(in_channels, out_channels,
                                    use_1x1conv=True, strides=2))
            nn.Conv2d(in_channels, out_channels,
                        kernel_size=3, padding=1, stride=2, pad_mode='pad')
        else:
            blk.append(Residual(out_channels, out_channels))
    return nn.SequentialCell(*blk)

# net = resnet_block(10,3, 3)

net = nn.SequentialCell(
        nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, pad_mode='pad'),
        nn.BatchNorm2d(64),
        nn.ReLU())
net.insert_child_to_cell("resnet_block1", resnet_block(64, 64, 2, first_block=True))
net.insert_child_to_cell("resnet_block2", resnet_block(64, 128, 2))

trainer = nn.Adam(net.trainable_params(), learning_rate=0.001)

3 根因分析

当前框架容器类命名机制某些场景会失效,导致cell里Parameter重复,导致训练时报错。
理论上框架应该能避免这种情况,但是这种场景比较特殊。两次调用了insert_child_to_cell,且两次都插入了相同的网络Residual (resnet_block里面两次都最终插入了Residual 网络)
所以在第一次插入的时候自动生成了前缀,0.conv1.weight
然后第二次插入的时候仍然自动生成前缀依然是0.conv1.weight,这种情况下就参数名重复了。
如下是仅仅执行一次insert_child_to_cell的情况下打印参数列表。插入一次的情况下是不会报错的。再次insert_child_to_cell后就会报错了。

net.insert_child_to_cell("resnet_block1", resnet_block(64, 64, 2, first_block=True))  
print(net.trainable_params())  
  
  
[Parameter (name=0.weight, shape=(64, 3, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=1.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=1.beta, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.conv1.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=0.conv2.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=0.bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.bn1.beta, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.bn2.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.bn2.beta, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=1.conv1.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=1.conv2.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=1.bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=1.bn1.beta, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=1.bn2.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=1.bn2.beta, shape=(64,), dtype=Float32, requires_grad=True)]

4 解决方案

通过CellList来管理,从而规避多个 Cell 间的同名 Paramete
把两次insert_child_to_cell合并成一次,插入一个CellList

cell_list = nn.CellList()  
cell_list.append(resnet_block(64, 64, 2, first_block=True))  
cell_list.append(resnet_block(64, 128, 2))  
net.insert_child_to_cell("resnet_blocks", cell_list)


    
import mindspore  
from mindspore import nn  
import mindspore.ops as ops  
    
class Residual(nn.Cell):  
    """The Residual block of ResNet."""  
    def __init__(self, input_channels, num_channels,  
                    use_1x1conv=False, strides=1):  
        super().__init__()  
        self.conv1 = nn.Conv2d(input_channels, num_channels,  
                                kernel_size=3, padding=1, stride=strides, pad_mode='pad')  
        self.conv2 = nn.Conv2d(num_channels, num_channels,  
                                kernel_size=3, padding=1, pad_mode='pad')  
        if use_1x1conv:  
            self.conv3 = nn.Conv2d(input_channels, num_channels,  
                                    kernel_size=1, stride=strides)  
        else:  
            self.conv3 = None  
        self.bn1 = nn.BatchNorm2d(num_channels)  
        self.bn2 = nn.BatchNorm2d(num_channels)  
    
    def forward(self, X):  
        Y = ops.relu(self.bn1(self.conv1(X)))  
        Y = self.bn2(self.conv2(Y))  
        if self.conv3:  
            X = self.conv3(X)  
        Y += X  
        return ops.relu(Y)  
    
def resnet_block(in_channels, out_channels, num_residuals,  
                    first_block=False):  
    blk = []  
    for i in range(num_residuals):  
        if i == 0 and not first_block:  
            blk.append(Residual(in_channels, out_channels,  
                                    use_1x1conv=True, strides=2))  
            nn.Conv2d(in_channels, out_channels,  
                        kernel_size=3, padding=1, stride=2, pad_mode='pad')  
        else:  
            blk.append(Residual(out_channels, out_channels))  
    return nn.SequentialCell(*blk)  
    
# net = resnet_block(10,3, 3)  
    
net = nn.SequentialCell(  
        nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, pad_mode='pad'),  
        nn.BatchNorm2d(64),  
        nn.ReLU())  
cell_list = nn.CellList()  
cell_list.append(resnet_block(64, 64, 2, first_block=True))  
cell_list.append(resnet_block(64, 128, 2))  
net.insert_child_to_cell("resnet_blocks", cell_list)  
print(net.trainable_params())  
trainer = nn.Adam(net.trainable_params(), learning_rate=0.001) 

最终打印的网络参数如下,可以看到CellList里面的参数是0.0 0.1.0的前缀

[Parameter (name=0.weight, shape=(64, 3, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=1.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=1.beta, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.0.conv1.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),   
Parameter (name=0.0.conv2.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=0.0.bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.0.bn1.beta, shape=(64,), dtype=Float32, requires_grad=True),   
Parameter (name=0.0.bn2.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.0.bn2.beta, shape=(64,), dtype=Float32, requires_grad=True),   
Parameter (name=0.1.conv1.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.conv2.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.bn1.beta, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.bn2.gamma, shape=(64,), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.bn2.beta, shape=(64,), dtype=Float32, requires_grad=True),   
Parameter (name=0.1.0.conv1.weight, shape=(128, 64, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.0.conv2.weight, shape=(128, 128, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.0.conv3.weight, shape=(128, 64, 1, 1), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.0.bn1.gamma, shape=(128,), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.0.bn1.beta, shape=(128,), dtype=Float32, requires_grad=True),   
Parameter (name=0.1.0.bn2.gamma, shape=(128,), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.0.bn2.beta, shape=(128,), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.1.conv1.weight, shape=(128, 128, 3, 3), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.1.conv2.weight, shape=(128, 128, 3, 3), dtype=Float32, requires_grad=True),   
Parameter (name=0.1.1.bn1.gamma, shape=(128,), dtype=Float32, requires_grad=True),   
Parameter (name=0.1.1.bn1.beta, shape=(128,), dtype=Float32, requires_grad=True),   
Parameter (name=0.1.1.bn2.gamma, shape=(128,), dtype=Float32, requires_grad=True),  
Parameter (name=0.1.1.bn2.beta, shape=(128,), dtype=Float32, requires_grad=True)]