模型训练:RuntimeError: Output_idx 0 of node @kernel_graph5:param_global_step node:0xaaab447b3220 output addr is not exist.

代码如下:

loss_fn = nn.MSELoss()
optimizer = nn.Adam(model.trainable_params(), 1e-8)
B = np.linspace(0.0001, 0.02, 300)
B = Tensor(B, dtype=mindspore.float32)

a = 1.0 - B
a_bar = ops.cumprod(a,0)
sqrt_a_bar = ops.sqrt(a_bar)
sqrt_one_minus_a_bar = ops.sqrt(1 - a_bar)

model.to_float(mindspore.float32)
loss_fn.to_float(mindspore.float32)

# 1\定义向前函数
def forward_fn(img, t, c, c_mask):
    
    img_with_noise_t, targets_noise = q(img, t, sqrt_a_bar, sqrt_one_minus_a_bar)
    logits_noise = model(img_with_noise_t, t, c, c_mask)

    loss = loss_fn(logits_noise, targets_noise)
    return loss, logits_noise

# 2\得到梯度函数
grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)

# 3\定义一步训练函数
def train_step(img, t, c, c_mask):
    output, inputs_gradient = grad_fn(img, t, c, c_mask)
    # (loss, _), grads = grad_fn(img, t, c, c_mask)
    optimizer(inputs_gradient)
    return loss

def train(model, dataset):
    size = dataset.get_dataset_size()
    model.set_train()
    for batch, (img, label) in enumerate(dataset.create_tuple_iterator()):
        img = Tensor(img, dtype=mindspore.float32)
        img = ops.cast(img, ms.float32)
        t_np = np.random.randint(0, 300, size=(img.shape[0],), dtype=np.int32)
        t = Tensor(t_np, dtype=mindspore.float32)

        c = ops.gather(text_features, label, 0)  # 匹配文本向量
        c, c_mask = get_context_mask(c, 0.1)
      
        loss = train_step(img, t, c, c_mask)

        if batch % 100 == 0:
            loss, current = loss.asnumpy(), batch
            print(f"loss: {loss:>7f} [{current:>3d}/{size:>3d}]")

在执行训练代码的时候报错

epochs = 1
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    ms.jit(train(model, train_dataset1))
print("Done!")

报错如下:

[ERROR] KERNEL(22770,ffffa80f80b0,python):2025-09-29-17:13:16.939.851 [mindspore/ccsrc/kernel/kernel_info.cc:72] GetMutableOutputAddr] Index [0] out of range
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[54], line 6
      4 for t in range(epochs):
      5     print(f"Epoch {t+1}\n-------------------------------")
----> 6     ms.jit(train(model, train_dataset1))
      7 print("Done!")

Cell In[52], line 45, in train(model, dataset)
     42 c = ops.gather(text_features, label, 0)  # 匹配文本向量
     43 c, c_mask = get_context_mask(c, 0.1)
---> 45 loss = train_step(img, t, c, c_mask)
     47 if batch % 100 == 0:
     48     loss, current = loss.asnumpy(), batch

Cell In[52], line 30, in train_step(img, t, c, c_mask)
     28 output, inputs_gradient = grad_fn(img, t, c, c_mask)
     29 # (loss, _), grads = grad_fn(img, t, c, c_mask)
---> 30 optimizer(inputs_gradient)
     31 return loss

File ~/anaconda3/envs/MindSpore/lib/python3.10/site-packages/mindspore/nn/cell.py:1270, in Cell.__call__(self, *args, **kwargs)
   1267 if not (self.requires_grad or self._dynamic_shape_inputs or self.mixed_precision_type):
   1268     if not (self._forward_pre_hook or self._forward_hook or self._backward_pre_hook or self._backward_hook or
   1269             self._shard_fn or self._recompute_cell or (self.has_bprop and _pynative_executor.requires_grad())):
-> 1270         return self.construct(*args, **kwargs)
   1272     return self._run_construct(*args, **kwargs)
   1274 return self._complex_call(*args, **kwargs)

File ~/anaconda3/envs/MindSpore/lib/python3.10/site-packages/mindspore/common/api.py:1180, in jit.<locals>.wrap_func.<locals>.staging_specialize(*args, **kwargs)
   1177         setattr(func, "amp_strategy", get_curr_amp_strategy())
   1179 ms_function_executor = _JitExecutor(func, hash_obj, None, process_obj, jit_config, dynamic)
-> 1180 out = ms_function_executor(*args, **kwargs)
   1181 return out

File ~/anaconda3/envs/MindSpore/lib/python3.10/site-packages/mindspore/common/api.py:180, in _wrap_func.<locals>.wrapper(*arg, **kwargs)
    178 @wraps(fn)
    179 def wrapper(*arg, **kwargs):
--> 180     results = fn(*arg, **kwargs)
    181     return _convert_python_data(results)

File ~/anaconda3/envs/MindSpore/lib/python3.10/site-packages/mindspore/common/api.py:627, in _JitExecutor.__call__(self, *args, **kwargs)
    625 except Exception as err:
    626     _pynative_executor.clear_res()
--> 627     raise err
    629 if context.get_context("precompile_only") or os.getenv('MS_DEV_PRECOMPILE_ONLY') == '1':
    630     return None

File ~/anaconda3/envs/MindSpore/lib/python3.10/site-packages/mindspore/common/api.py:621, in _JitExecutor.__call__(self, *args, **kwargs)
    619 if context.get_context("mode") == context.PYNATIVE_MODE:
    620     _pynative_executor.set_jit_compile_status(True, phase)
--> 621     phase = self.compile(self.fn.__name__, *args_list, **kwargs)
    622     _pynative_executor.set_jit_compile_status(False, phase)
    623 else:

File ~/anaconda3/envs/MindSpore/lib/python3.10/site-packages/mindspore/common/api.py:738, in _JitExecutor.compile(self, method_name, *args, **kwargs)
    736     if isinstance(self.obj, ms.nn.Cell):
    737         self._graph_executor.set_weights_values(self.obj.parameters_dict())
--> 738     is_compile = self._graph_executor.compile(self.obj, compile_args, kwargs, phase)
    740 if not is_compile:
    741     raise RuntimeError("Executor compile failed.")

RuntimeError: Output_idx 0 of node @kernel_graph5:param_global_step node:0xaaab447b3220 output addr is not exist.

----------------------------------------------------
- C++ Call Stack: (For framework developers)
----------------------------------------------------
mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc:909 GetMutableOutputAddr

感觉是在构建计算图的时候找不到输出接点,但是没有定位到具体哪里出了问题。

使用的modelarts notebook实例规格为:
mindspore_2.6.0rc1-cann_8.1.rc1-py_3.10-euler_2.10.11-aarch64-snt9b

用户您好,欢迎使用MindSpore,已经收到您上述的问题,还请耐心等待下答复~

能贴下model的代码吗?

用户您好,由于较长时间未看到您答复,这里版主将进行采纳回答的结帖操作,如果还其他疑问请发新帖子提问,谢谢支持~

此话题已在最后回复的 60 分钟后被自动关闭。不再允许新回复。