MindSpore报错RuntimeError: Invalid python function, the 'source' of 'GeneratorDataset' should return same number ...

1 系统环境

硬件环境(Ascend/GPU/CPU): Ascend/GPU/CPU
MindSpore版本: mindspore=1.7.0
执行模式(动态图):不限
Python版本: Python=3.7.5
操作系统平台: linux

2 报错信息

2.1 问题描述

创建一个做超分辨率的数据集,使用mindspore.dataset.GeneratorDataset接口实现自定义方式的进行数据集加载。

2.2 脚本代码

from os import listdir  
from os.path import join  
import os  
from PIL import Image  
import random  
import numpy as np  
import mindspore.dataset as ds  
import mindspore.dataset.vision.py_transforms as py_vision  
import mindspore.dataset.transforms.py_transforms as py_transforms  
    
def is_image_file(filename):  
    return any(filename.endswith(extension) for extension in ['.png','.jpg','jpeg','PNG','.JPG','.JPEG' ])  
    
def make_dataset (dir_path, max_dataset_size=float("inf")):  
    """Return image list in dir."""  
    inages = []  
    assert os.path.isdir(dir_path), '%s is not a valid directory' % dir_path  
    
    for root, _, fnanes in sorted(os, walk(dir_path)):  
        for fname in fnames:  
            if is_image_file(fname):  
                path = os.path.join(root, fname)  
                images.append(path)  
    
    return images[:min(max_dataset_size,len(images))]  
    
class MakeDataset:  
    def __init__ (self,dataset_dir,max_dataset_size=float ("inf")):  
        self.dir_A = os.path.join(dataset_dir,'data')  
        self.dir_B = os.path.join(dataset_dir,'target')  
        self.A_paths = sorted(make_dataset(self.dir_A,max_dataset_size))  
        self.B_paths = sorted(make_dataset(self.dir_B,max_dataset_size))  
        self.A_size = len(self.A_paths)  
        self.B_size = len(self.B_paths)  
    
    def __getitem__ (self,index):  
        A_path = self.A_paths[index % self.A_size]  
        B_path = self.B_paths[index % self.B_size]  
        A_img = np.array(Image.open(A_path).convert('RGB'))  
        B_img = np.array(Image.open(B_path).convert('RGB'))  
        print(A_img.shape)  
        print(B_img.shape)  
        return A_img,B_img  
    
    def --len_- (self):  
        return self.A_size  
    
dataset = MakeDataset(dataset_dir=″D:/instruction/image_folder_dataset_directory/")  
DS = ds.GeneratorDataset(dataset,column_names=["LR,HR″])  
for data in DS.create_dict_iterator ():  
    print(data["LR"].shape)

2.3 报错

RuntimeError: Exception thrown from PyFunc. Invalid python function, the 'source' of 'GeneratorDataset' should return same number of NumPy arrays as specified in column_names, the size of column_names is:1 and number of returned NumPy array is:2

3 根因分析

GeneratorDataset中使用到的数据返回字段应与指定的列名个数一致,但目前指定列名个数为1,返回列数为2。按照数据返回列数分析,应定义两个列名。
在上面代码中发现:

DS=ds.GeneratorDataset(dataset,column_names=["LR,HR"])

[“LR,HR”]表示为一个字段,那么我们在这一部分进行修改。

DS=ds.GeneratorDataset(dataset,column_names=["LR","HR"])

4 解决方案

使用环境:

硬件环境(Ascend/GPU/CPU): CPU
MindSpore版本: mindspore=1.8.0
执行模式(动态图):不限
Python版本: Python=3.7.5
操作系统平台: linux
完整代码:

from os import listdir  
from os.path import join  
import os  
from PIL import Image  
import random  
import numpy as np  
import mindspore.dataset as ds  
import mindspore.dataset.vision.py_transforms as py_vision  
import mindspore.dataset.transforms.py_transforms as py_transforms  
    
def is_image_file(filename):  
    return any(filename.endswith(extension) for extension in ['.png','.jpg','jpeg','PNG','.JPG','.JPEG' ])  
    
def make_dataset (dir_path, max_dataset_size=float("inf")):  
    """Return image list in dir."""  
    inages = []  
    assert os.path.isdir(dir_path), '%s is not a valid directory' % dir_path  
    
    for root, _, fnanes in sorted(os, walk(dir_path)):  
        for fname in fnames:  
            if is_image_file(fname):  
                path = os.path.join(root, fname)  
                images.append(path)  
    
    return images[:min(max_dataset_size,len(images))]  
    
class MakeDataset:  
    def __init__ (self,dataset_dir,max_dataset_size=float ("inf")):  
        self.dir_A = os.path.join(dataset_dir,'data')  
        self.dir_B = os.path.join(dataset_dir,'target')  
        self.A_paths = sorted(make_dataset(self.dir_A,max_dataset_size))  
        self.B_paths = sorted(make_dataset(self.dir_B,max_dataset_size))  
        self.A_size = len(self.A_paths)  
        self.B_size = len(self.B_paths)  
    
    def __getitem__ (self,index):  
        A_path = self.A_paths[index % self.A_size]  
        B_path = self.B_paths[index % self.B_size]  
        A_img = np.array(Image.open(A_path).convert('RGB'))  
        B_img = np.array(Image.open(B_path).convert('RGB'))  
        print(A_img.shape)  
        print(B_img.shape)  
        return A_img,B_img  
    
    def --len_- (self):  
        return self.A_size  
    
dataset = MakeDataset(dataset_dir=″D:/instruction/image_folder_dataset_directory/")  
DS = ds.GeneratorDataset(dataset,column_names=["LR","HR″])  
for data in DS.create_dict_iterator ():  
    print(data["LR"].shape)