1 系统环境
硬件环境(Ascend/GPU/CPU): 不限
MindSpore版本: 2.0.0
执行模式(PyNative/ Graph): 不限
Python版本: 3.7.5
操作系统平台: 不限
2 报错信息
2.1 问题描述
运行示例代码出现报错
2.2 报错信息
raise ValueError("The value {} , its name '{}' already exists. " ValueError: The value Parameter (name=0.conv1.weight, shape=(128, 64, 3, 3), dtype=Float32, requires_grad=True) , its name '0.conv1.weight' already exists. Please set a unique name for the parameter.复制
2.3 脚本代码(代码格式,可上传附件)
import mindspore
from mindspore import nn
import mindspore.ops as ops
class Residual(nn.Cell):
"""The Residual block of ResNet."""
def __init__(self, input_channels, num_channels,
use_1x1conv=False, strides=1):
super().__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels,
kernel_size=3, padding=1, stride=strides, pad_mode='pad')
self.conv2 = nn.Conv2d(num_channels, num_channels,
kernel_size=3, padding=1, pad_mode='pad')
if use_1x1conv:
self.conv3 = nn.Conv2d(input_channels, num_channels,
kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
def forward(self, X):
Y = ops.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
Y += X
return ops.relu(Y)
def resnet_block(in_channels, out_channels, num_residuals,
first_block=False):
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(in_channels, out_channels,
use_1x1conv=True, strides=2))
nn.Conv2d(in_channels, out_channels,
kernel_size=3, padding=1, stride=2, pad_mode='pad')
else:
blk.append(Residual(out_channels, out_channels))
return nn.SequentialCell(*blk)
# net = resnet_block(10,3, 3)
net = nn.SequentialCell(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, pad_mode='pad'),
nn.BatchNorm2d(64),
nn.ReLU())
net.insert_child_to_cell("resnet_block1", resnet_block(64, 64, 2, first_block=True))
net.insert_child_to_cell("resnet_block2", resnet_block(64, 128, 2))
trainer = nn.Adam(net.trainable_params(), learning_rate=0.001)
3 根因分析
当前框架容器类命名机制某些场景会失效,导致cell里Parameter重复,导致训练时报错。
理论上框架应该能避免这种情况,但是这种场景比较特殊。两次调用了insert_child_to_cell,且两次都插入了相同的网络Residual (resnet_block里面两次都最终插入了Residual 网络)
所以在第一次插入的时候自动生成了前缀,0.conv1.weight
然后第二次插入的时候仍然自动生成前缀依然是0.conv1.weight,这种情况下就参数名重复了。
如下是仅仅执行一次insert_child_to_cell的情况下打印参数列表。插入一次的情况下是不会报错的。再次insert_child_to_cell后就会报错了。
net.insert_child_to_cell("resnet_block1", resnet_block(64, 64, 2, first_block=True))
print(net.trainable_params())
[Parameter (name=0.weight, shape=(64, 3, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=1.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=1.beta, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.conv1.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.conv2.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.bn1.beta, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.bn2.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.bn2.beta, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=1.conv1.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=1.conv2.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=1.bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=1.bn1.beta, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=1.bn2.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=1.bn2.beta, shape=(64,), dtype=Float32, requires_grad=True)]
4 解决方案
通过CellList来管理,从而规避多个 Cell 间的同名 Paramete
把两次insert_child_to_cell合并成一次,插入一个CellList
cell_list = nn.CellList()
cell_list.append(resnet_block(64, 64, 2, first_block=True))
cell_list.append(resnet_block(64, 128, 2))
net.insert_child_to_cell("resnet_blocks", cell_list)
import mindspore
from mindspore import nn
import mindspore.ops as ops
class Residual(nn.Cell):
"""The Residual block of ResNet."""
def __init__(self, input_channels, num_channels,
use_1x1conv=False, strides=1):
super().__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels,
kernel_size=3, padding=1, stride=strides, pad_mode='pad')
self.conv2 = nn.Conv2d(num_channels, num_channels,
kernel_size=3, padding=1, pad_mode='pad')
if use_1x1conv:
self.conv3 = nn.Conv2d(input_channels, num_channels,
kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
def forward(self, X):
Y = ops.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
Y += X
return ops.relu(Y)
def resnet_block(in_channels, out_channels, num_residuals,
first_block=False):
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(in_channels, out_channels,
use_1x1conv=True, strides=2))
nn.Conv2d(in_channels, out_channels,
kernel_size=3, padding=1, stride=2, pad_mode='pad')
else:
blk.append(Residual(out_channels, out_channels))
return nn.SequentialCell(*blk)
# net = resnet_block(10,3, 3)
net = nn.SequentialCell(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, pad_mode='pad'),
nn.BatchNorm2d(64),
nn.ReLU())
cell_list = nn.CellList()
cell_list.append(resnet_block(64, 64, 2, first_block=True))
cell_list.append(resnet_block(64, 128, 2))
net.insert_child_to_cell("resnet_blocks", cell_list)
print(net.trainable_params())
trainer = nn.Adam(net.trainable_params(), learning_rate=0.001)
最终打印的网络参数如下,可以看到CellList里面的参数是0.0 0.1.0的前缀
[Parameter (name=0.weight, shape=(64, 3, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=1.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=1.beta, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.0.conv1.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.0.conv2.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.0.bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.0.bn1.beta, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.0.bn2.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.0.bn2.beta, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.conv1.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.1.conv2.weight, shape=(64, 64, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.1.bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.bn1.beta, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.bn2.gamma, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.bn2.beta, shape=(64,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.0.conv1.weight, shape=(128, 64, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.1.0.conv2.weight, shape=(128, 128, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.1.0.conv3.weight, shape=(128, 64, 1, 1), dtype=Float32, requires_grad=True),
Parameter (name=0.1.0.bn1.gamma, shape=(128,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.0.bn1.beta, shape=(128,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.0.bn2.gamma, shape=(128,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.0.bn2.beta, shape=(128,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.1.conv1.weight, shape=(128, 128, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.1.1.conv2.weight, shape=(128, 128, 3, 3), dtype=Float32, requires_grad=True),
Parameter (name=0.1.1.bn1.gamma, shape=(128,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.1.bn1.beta, shape=(128,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.1.bn2.gamma, shape=(128,), dtype=Float32, requires_grad=True),
Parameter (name=0.1.1.bn2.beta, shape=(128,), dtype=Float32, requires_grad=True)]