1.系统环境
硬件环境(Ascend/GPU/CPU): Ascend/GPU/CPU
MindSpore版本: 1.1
执行模式(动态图):不限
Python版本: Python=3.7.5
操作系统平台: 不限
2 报错信息
2.1 问题描述
自定义数据集在第一轮可正常遍历,但第二轮时出现list index out of range
2.2 报错信息
ValueError: when loss_fn is not None, train_dataset should return two elements, but got 3
2.3 脚本代码
import mindspore.dataset as ds
import matplotlib.pyplot as plt
import mindspore.dataset.vision.py_transforms as py_trans
from mindspore.dataset.transforms.py_transforms import Compose
import mindspore.ops as ops
# from mindspore import Tensor
import numpy as np
from PIL import Image
import os
import random
from option import opt
class DatasetGenerator:
def __init__(self, path, train, format='.png'):
# self.size = size
self.format = format
self.train = train
if train:
self.haze_imgs_dir = os.listdir(os.path.join(path, 'train', 'hazy'))
self.haze_imgs = [os.path.join(path, 'train', 'hazy', img) for img in self.haze_imgs_dir]
self.clear_dir = os.path.join(path, 'train', 'gt')
else:
self.haze_imgs_dir = os.listdir(os.path.join(path, 'test', 'hazy'))
self.haze_imgs = [os.path.join(path, 'test', 'hazy', img) for img in self.haze_imgs_dir]
self.clear_dir = os.path.join(path, 'test', 'gt')
# print(self.haze_imgs_dir, self.clear_dir)
np.random.seed(58)
self.__random_seed = []
for _ in range(len(self.haze_imgs)):
self.__random_seed.append(random.randint(0, 1000000))
self.__index = -1
def __getitem__(self, index):
self.__index += 1
haze = Image.open(self.haze_imgs[index])
# if isinstance(self.size,int):
# while haze.size[0]<self.size or haze.size[1]<self.size :
# index=random.randint(0,20000)
# haze=Image.open(self.haze_imgs[index])
img = self.haze_imgs[index].split('\\')[-1]
# img=self.haze_imgs[index].split('/')[-1]
img_name = img.split('_')
# img_name=img.split('\\')[-1].split('_')
# print(img_name)
clear_name=f"{img_name[0]}_gt_{img_name[2]}"
# print(self.clear_dir, clear_name, os.path.join(self.clear_dir,clear_name))
clear=Image.open(os.path.join(self.clear_dir,clear_name))
w, h = clear.size
nw, nh = haze.size
left = (w - nw)/2
top = (h - nh)/2
right = (w + nw)/2
bottom = (h + nh)/2
clear = clear.crop((left, top, right, bottom))
return (haze, clear, index)
def __len__(self):
print(len(self.haze_imgs))
return len(self.haze_imgs)
def get_seed(self):
seed = self.__random_seed[self.__index]
return seed
def decode(img):
return Image.fromarray(img)
def set_random_seed(img_name, seed):
random.seed(seed)
return img_name
ds.config.set_seed(8)
# DATA_DIR = opt.data_url
DATA_DIR = 'C:\Users\kle\Desktop\NTIRE2021'
train_dataset_generator = DatasetGenerator(DATA_DIR, train=True)
train_dataset = ds.GeneratorDataset(train_dataset_generator, ["hazy", "gt", "img_name"], shuffle=True)
test_dataset_generator = DatasetGenerator(DATA_DIR, train=False)
test_dataset = ds.GeneratorDataset(test_dataset_generator, ["hazy", "gt", "img_name"], shuffle=False)
transforms_list = [
decode,
(lambda img_name: set_random_seed(img_name, train_dataset_generator.get_seed())),
py_trans.RandomCrop(opt.crop_size),
py_trans.ToTensor(),
]
compose_trans = Compose(transforms_list)
train_dataset = train_dataset.map(operations=compose_trans, input_columns=["hazy"])
train_dataset = train_dataset.map(operations=compose_trans, input_columns=["gt"])
train_dataset = train_dataset.batch(opt.bs, drop_remainder=True)
if __name__ == '__main__':
for i in range(2):
print(i)
for batch in train_dataset.create_dict_iterator():
# print(batch)
# hazy = Tensor(batch["hazy"], dtype=mindspore.float32)
# clear = Tensor(batch["gt"], dtype=mindspore.float32)
print(batch["hazy"].shape, batch["gt"].shape)
3 根因分析
脚本中第37行利用魔术方法 def getitem(self, index)定义了随机访问的类,其中index参数作为样本元素索引,外部可以直接使用index来进行数据的访问,外部不需要对其进行修改(如:每个迭代开始前的复位操作).
但是在代码的第35行户另外自定义了__index,所以需要在每个迭代前进行复位操作,来保证不出现数组越界访问。
4 解决方案
根据根因分析提供的信息,已经非常清楚,就是在另外每次迭代再重新使用__index时,每次迭代完后都进行了
self.__index = -1
造成了下次迭代没有复位的问题,那么可以很简单的让它等于初始值即可。
self.__index += 0
完整代码如下:
import mindspore.dataset as ds
import matplotlib.pyplot as plt
import mindspore.dataset.vision.py_transforms as py_trans
from mindspore.dataset.transforms.py_transforms import Compose
import mindspore.ops as ops
# from mindspore import Tensor
import numpy as np
from PIL import Image
import os
import random
from option import opt
class DatasetGenerator:
def __init__(self, path, train, format='.png'):
# self.size = size
self.format = format
self.train = train
if train:
self.haze_imgs_dir = os.listdir(os.path.join(path, 'train', 'hazy'))
self.haze_imgs = [os.path.join(path, 'train', 'hazy', img) for img in self.haze_imgs_dir]
self.clear_dir = os.path.join(path, 'train', 'gt')
else:
self.haze_imgs_dir = os.listdir(os.path.join(path, 'test', 'hazy'))
self.haze_imgs = [os.path.join(path, 'test', 'hazy', img) for img in self.haze_imgs_dir]
self.clear_dir = os.path.join(path, 'test', 'gt')
# print(self.haze_imgs_dir, self.clear_dir)
np.random.seed(58)
self.__random_seed = []
for _ in range(len(self.haze_imgs)):
self.__random_seed.append(random.randint(0, 1000000))
self.__index += 0
def __getitem__(self, index):
self.__index += 1
haze = Image.open(self.haze_imgs[index])
# if isinstance(self.size,int):
# while haze.size[0]<self.size or haze.size[1]<self.size :
# index=random.randint(0,20000)
# haze=Image.open(self.haze_imgs[index])
img = self.haze_imgs[index].split('\\')[-1]
# img=self.haze_imgs[index].split('/')[-1]
img_name = img.split('_')
# img_name=img.split('\\')[-1].split('_')
# print(img_name)
clear_name=f"{img_name[0]}_gt_{img_name[2]}"
# print(self.clear_dir, clear_name, os.path.join(self.clear_dir,clear_name))
clear=Image.open(os.path.join(self.clear_dir,clear_name))
w, h = clear.size
nw, nh = haze.size
left = (w - nw)/2
top = (h - nh)/2
right = (w + nw)/2
bottom = (h + nh)/2
clear = clear.crop((left, top, right, bottom))
return (haze, clear, index)
def __len__(self):
print(len(self.haze_imgs))
return len(self.haze_imgs)
def get_seed(self):
seed = self.__random_seed[self.__index]
return seed
def decode(img):
return Image.fromarray(img)
def set_random_seed(img_name, seed):
random.seed(seed)
return img_name
ds.config.set_seed(8)
# DATA_DIR = opt.data_url
DATA_DIR = 'C:\Users\kle\Desktop\NTIRE2021'
train_dataset_generator = DatasetGenerator(DATA_DIR, train=True)
train_dataset = ds.GeneratorDataset(train_dataset_generator, ["hazy", "gt", "img_name"], shuffle=True)
test_dataset_generator = DatasetGenerator(DATA_DIR, train=False)
test_dataset = ds.GeneratorDataset(test_dataset_generator, ["hazy", "gt", "img_name"], shuffle=False)
transforms_list = [
decode,
(lambda img_name: set_random_seed(img_name, train_dataset_generator.get_seed())),
py_trans.RandomCrop(opt.crop_size),
py_trans.ToTensor(),
]
compose_trans = Compose(transforms_list)
train_dataset = train_dataset.map(operations=compose_trans, input_columns=["hazy"])
train_dataset = train_dataset.map(operations=compose_trans, input_columns=["gt"])
train_dataset = train_dataset.batch(opt.bs, drop_remainder=True)
if __name__ == '__main__':
for i in range(2):
print(i)
for batch in train_dataset.create_dict_iterator():
# print(batch)
# hazy = Tensor(batch["hazy"], dtype=mindspore.float32)
# clear = Tensor(batch["gt"], dtype=mindspore.float32)
print(batch["hazy"].shape, batch["gt"].shape)