1 系统环境
硬件环境(Ascend/GPU/CPU): Ascend/GPU/CPU
MindSpore版本: mindspore=1.7.0
执行模式(动态图):不限
Python版本: Python=3.7.5
操作系统平台: linux
2 报错信息
2.1 问题描述
创建一个做超分辨率的数据集,使用mindspore.dataset.GeneratorDataset接口实现自定义方式的进行数据集加载。
2.2 脚本代码
from os import listdir
from os.path import join
import os
from PIL import Image
import random
import numpy as np
import mindspore.dataset as ds
import mindspore.dataset.vision.py_transforms as py_vision
import mindspore.dataset.transforms.py_transforms as py_transforms
def is_image_file(filename):
return any(filename.endswith(extension) for extension in ['.png','.jpg','jpeg','PNG','.JPG','.JPEG' ])
def make_dataset (dir_path, max_dataset_size=float("inf")):
"""Return image list in dir."""
inages = []
assert os.path.isdir(dir_path), '%s is not a valid directory' % dir_path
for root, _, fnanes in sorted(os, walk(dir_path)):
for fname in fnames:
if is_image_file(fname):
path = os.path.join(root, fname)
images.append(path)
return images[:min(max_dataset_size,len(images))]
class MakeDataset:
def __init__ (self,dataset_dir,max_dataset_size=float ("inf")):
self.dir_A = os.path.join(dataset_dir,'data')
self.dir_B = os.path.join(dataset_dir,'target')
self.A_paths = sorted(make_dataset(self.dir_A,max_dataset_size))
self.B_paths = sorted(make_dataset(self.dir_B,max_dataset_size))
self.A_size = len(self.A_paths)
self.B_size = len(self.B_paths)
def __getitem__ (self,index):
A_path = self.A_paths[index % self.A_size]
B_path = self.B_paths[index % self.B_size]
A_img = np.array(Image.open(A_path).convert('RGB'))
B_img = np.array(Image.open(B_path).convert('RGB'))
print(A_img.shape)
print(B_img.shape)
return A_img,B_img
def --len_- (self):
return self.A_size
dataset = MakeDataset(dataset_dir=″D:/instruction/image_folder_dataset_directory/")
DS = ds.GeneratorDataset(dataset,column_names=["LR,HR″])
for data in DS.create_dict_iterator ():
print(data["LR"].shape)
2.3 报错
RuntimeError: Exception thrown from PyFunc. Invalid python function, the 'source' of 'GeneratorDataset' should return same number of NumPy arrays as specified in column_names, the size of column_names is:1 and number of returned NumPy array is:2
3 根因分析
GeneratorDataset中使用到的数据返回字段应与指定的列名个数一致,但目前指定列名个数为1,返回列数为2。按照数据返回列数分析,应定义两个列名。
在上面代码中发现:
DS=ds.GeneratorDataset(dataset,column_names=["LR,HR"])
[“LR,HR”]表示为一个字段,那么我们在这一部分进行修改。
DS=ds.GeneratorDataset(dataset,column_names=["LR","HR"])
4 解决方案
使用环境:
硬件环境(Ascend/GPU/CPU): CPU
MindSpore版本: mindspore=1.8.0
执行模式(动态图):不限
Python版本: Python=3.7.5
操作系统平台: linux
完整代码:
from os import listdir
from os.path import join
import os
from PIL import Image
import random
import numpy as np
import mindspore.dataset as ds
import mindspore.dataset.vision.py_transforms as py_vision
import mindspore.dataset.transforms.py_transforms as py_transforms
def is_image_file(filename):
return any(filename.endswith(extension) for extension in ['.png','.jpg','jpeg','PNG','.JPG','.JPEG' ])
def make_dataset (dir_path, max_dataset_size=float("inf")):
"""Return image list in dir."""
inages = []
assert os.path.isdir(dir_path), '%s is not a valid directory' % dir_path
for root, _, fnanes in sorted(os, walk(dir_path)):
for fname in fnames:
if is_image_file(fname):
path = os.path.join(root, fname)
images.append(path)
return images[:min(max_dataset_size,len(images))]
class MakeDataset:
def __init__ (self,dataset_dir,max_dataset_size=float ("inf")):
self.dir_A = os.path.join(dataset_dir,'data')
self.dir_B = os.path.join(dataset_dir,'target')
self.A_paths = sorted(make_dataset(self.dir_A,max_dataset_size))
self.B_paths = sorted(make_dataset(self.dir_B,max_dataset_size))
self.A_size = len(self.A_paths)
self.B_size = len(self.B_paths)
def __getitem__ (self,index):
A_path = self.A_paths[index % self.A_size]
B_path = self.B_paths[index % self.B_size]
A_img = np.array(Image.open(A_path).convert('RGB'))
B_img = np.array(Image.open(B_path).convert('RGB'))
print(A_img.shape)
print(B_img.shape)
return A_img,B_img
def --len_- (self):
return self.A_size
dataset = MakeDataset(dataset_dir=″D:/instruction/image_folder_dataset_directory/")
DS = ds.GeneratorDataset(dataset,column_names=["LR","HR″])
for data in DS.create_dict_iterator ():
print(data["LR"].shape)