自定义基于Transformer的Bert模型使用mindspore-lite推理时如何处理变长的batch_size和seq_length两个维度

我使用mindspore训练了自己实现的Bert,要使用mindspore-lite进行推理,但是mindspore-lite并不支持动态的形状,我看可以使用配置文件指定动态维度,但是似乎好像只支持4个输入维度的情况

Dynamic input whose shape is not 4-dimensional is not supported, input shape: [const vector]{-1, 8, 128}

推理时使用动态形状(只改了一个批次)也失败了

int InferenceEngineForAscend::reshape_batch(int32_t batch_size) {
	std::vector<std::vector<int64_t>> new_shape;
	std::vector<mindspore::MSTensor> inputs = model->GetInputs();
	for (auto& single : inputs) {
		std::vector<int64_t> shape = single.Shape();
		shape[0] = batch_size;
		new_shape.push_back(shape);
	}

	if (model->Resize(inputs, new_shape) != mindspore::kSuccess) {
		std::cerr << "Failed to resize to batch size " << batch_size << std::endl;
		return 4;
	}

	return 0;
}

[ERROR] LITE(294917,ffff944e1020,mslite):2025-10-29-11:10:35.361.755 [mindspore-lite/src/extendrt/kernel/ascend/model/model_process.cc:1031] Resize] Not support dynamic input
[ERROR] LITE(294917,ffff944e1020,mslite):2025-10-29-11:10:35.361.806 [mindspore-lite/src/extendrt/kernel/ascend/src/custom_ascend_kernel.cc:274] OnNewInputShapes] Failed to Resize
[ERROR] LITE(294917,ffff944e1020,mslite):2025-10-29-11:10:35.361.825 [mindspore-lite/src/extendrt/kernel/ascend/src/custom_ascend_kernel.cc:220] Resize] Failed to resize inputs
[ERROR] LITE(294917,ffff944e1020,mslite):2025-10-29-11:10:35.361.837 [mindspore-lite/src/extendrt/session/single_op_session.cc:590] OnNewInputShapes] Failed to resize custom ascend kernel
Failed to resize to batch size 4

用户您好,请把使用的环境版本信息和推理示例链接补充一下,方便分析问题

硬件:Ascend310P

基于mindspore开发的模型

class SeqEncoder(nn.Cell):
    def __init__(self, in_dim, d_model=64, dim_feedforward=256, n_head=8, num_layer=6, dropout=0.5):
        super().__init__()

        self.dmodel = d_model

        self.linear1 = nn.Dense(in_dim, d_model)
        self.linear2 = nn.Dense(d_model, d_model)
        self.identity = nn.Dense(in_dim, d_model)
        self.gate = nn.GELU()
        self.norm1 = nn.LayerNorm((d_model, ))

        layer = nn.TransformerEncoderLayer(d_model, n_head, dim_feedforward, dropout, ms.ops.gelu, batch_first=True)
        norm = nn.LayerNorm((d_model, ))

        self.embed = PositionalEncoding(d_model, dropout)
        self.encoder = nn.TransformerEncoder(layer, num_layer, norm)

        self.cls_token = init(Normal(sigma=1.0), (1, 1, d_model), ms.float32, "cls_token", True)

    def construct(self, seq, seq_attention_mask):
        mask = ms.ops.zeros((seq_attention_mask.shape[0], seq_attention_mask.shape[1] + 1), dtype=seq.dtype)
        mask[:, 1:][seq_attention_mask == 0] = -1e+9

        seq = self.linear1(seq)
        seq = self.norm1(self.gate(self.linear2(seq)) + seq)

        cls_token = ms.ops.tile(self.cls_token, (seq.shape[0], 1, 1))
        seq = ms.ops.concat((cls_token, seq), axis=1)
        seq = self.embed(seq)

        return self.encoder(seq, src_key_padding_mask=mask)



在使用Ascend310P训练完毕后,使用export导出了mindir模型,并使用mindspore-lite云侧推理(固定维度,输入形状为模型导出时测试案例的输入形状)成功。seq的维度为(batch_size, seq_length, feature),其中seq_length和batch_size为动态维度,这个该如何设置?

int main(int argc, const char** argv) {
	std::string model_path = argv[1];
	std::string config_file = argv[2];
	int device_id = atoi(argv[3]);
	/*InferenceEngineForAscend inference_interface(model_path, config_file, device_id);*/

	auto model = load_model(model_path, config_file, device_id);

	int batch_size = atoi(argv[4]);

	int seq_length = 64;
	int seq_feature = 5;

	if (batch_size != 5) {
		reshape_batch(model, batch_size);
	}

	std::shared_ptr<InvisightInputs> inputs = std::make_shared<InvisightInputs>();

	inputs->seqs = generateRandomVector<float>(batch_size * seq_length * seq_feature, 0.0, 1.0);
	inputs->seq_attention_mask = generate_mask<int32_t>(batch_size * seq_length);
	inference(model, inputs);
	return 0;
}

std::shared_ptr<mindspore::CPUDeviceInfo> _create_cpu_info() {
	auto device_info = std::make_shared<mindspore::CPUDeviceInfo>();
	if (device_info == nullptr) {
		std::cerr << "New CPUDeviceInfo failed." << std::endl;
		return nullptr;
	}
	return device_info;
}

std::shared_ptr<mindspore::AscendDeviceInfo> _create_ascend_info(int device_id) {
	// for Ascend 310, 310P
	auto device_info = std::make_shared<mindspore::AscendDeviceInfo>();
	if (device_info == nullptr) {
		std::cerr << "New AscendDeviceInfo failed." << std::endl;
		return nullptr;
	}
	device_info->SetDeviceID(device_id);
	return device_info;
}

std::shared_ptr<mindspore::Model> load_model(std::string& model_path, std::string& config_file, int device_id) {
	auto context = std::make_shared<mindspore::Context>();
	if (context == nullptr) {
		std::cerr << "New context failed." << std::endl;
		return nullptr;
	}
	auto& device_list = context->MutableDeviceInfo();
	std::shared_ptr<mindspore::DeviceInfoContext> device_info = nullptr;

	if (device_id < 0) {
		device_info = _create_cpu_info();
	}
	else {
		device_info = _create_ascend_info(device_id);
	}
	if (device_info == nullptr) {
		std::cerr << "Create DeviceInfo failed." << std::endl;
		/*return nullptr;*/
		return nullptr;
	}
	// 添加后端信息
	device_list.push_back(device_info);

	auto model = std::make_shared<mindspore::Model>();
	if (model == nullptr) {
		std::cerr << "New Model failed." << std::endl;
		return nullptr;
	}

	if (!config_file.empty()) {
		if (model->LoadConfig(config_file) != mindspore::kSuccess) {
			std::cerr << "Failed to load config file " << config_file << std::endl;
			return nullptr;
		}
	}

	std::cout << context << std::endl;
	auto build_ret = model->Build(model_path, mindspore::kMindIR, context);
	if (build_ret != mindspore::kSuccess) {
		std::cerr << "Build model failed." << std::endl;
		return nullptr;
	}

	return model;

}

int reshape_batch(std::shared_ptr<mindspore::Model> model, int32_t batch_size) {
	std::vector<std::vector<int64_t>> new_shape;
	std::vector<mindspore::MSTensor> inputs = model->GetInputs();
	for (auto& single : inputs) {
		std::vector<int64_t> shape = single.Shape();
		shape[0] = batch_size;
		new_shape.push_back(shape);
	}

	if (model->Resize(inputs, new_shape) != mindspore::kSuccess) {
		std::cerr << "Failed to resize to batch size " << batch_size << std::endl;
		return 4;
	}

	return 0;
}


void inference(std::shared_ptr<mindspore::Model> &model, std::shared_ptr<InvisightInputs>& inputs_data) {
	auto inputs = model->GetInputs();
	//copy_data_to_tensor<float>(inputs_data->flows, inputs[0]);
	copy_data_to_tensor<float>(inputs_data->seqs, inputs[0]);
	copy_data_to_tensor<int32_t>(inputs_data->seq_attention_mask, inputs[1]);

	auto outputs = model->GetOutputs();

	model->Predict(inputs, &outputs);

	// Print Output Tensor Data.
	for (auto& tensor : outputs) {
		std::cout << "tensor name is:" << tensor.Name() << " tensor size is:" << tensor.DataSize()
			<< " tensor elements num is:" << tensor.ElementNum() << std::endl;
		auto out_data = reinterpret_cast<const float*>(tensor.Data().get());
		std::cout << "output data is:";
		for (int i = 0; i < tensor.ElementNum() && i <= 50; i++) {
			std::cout << out_data[i] << " ";
		}
		std::cout << std::endl;
	}
}

配置文件内容

[ascend_context]
input_format=NCH
input_shape=input_1:[-1,8,128];input_2:[-1, 64, 5];input_3:[-1, 64]
dynamic_dims=[1~7],[8];[1~7],[8];[1~7],[8]

参考这个教程,里面有动态shape的配置方式;

你没有通过MindSpore Lite的converter_lite转换工具进行转换吗?

没有,直接使用mindspore的export导出的mindir,这个配置项是转换的时候配置项是吧,我转一下试试

是的

mindspore脚本-》导出Mindir模型-》通过MindSpore Lite Converter_lite转换工具进行转换,使用上面的配置文件-》得到MindSpore Lite转换后的新的Mindir -》 通过你上面推理的代码进行新的Mindir的推理

按照这个流程试试,应该是可以的吧

你开发的这个模型,通过export导出mindir模型的相关代码贴一下,需要保证导出的mindir是可以支持动态shape的,mindspore lite 在转换的时候才可以配置纯动态,否则在某些模型中,部分shape相关的参数会被固化,导致没法使用动态shape的功能

from mindspore import export, load_checkpoint
import numpy as np
from flowaims.models.cnn_res_model import FlowBaseCNNModelForVPClassifier
import mindspore as ms

model_path = r"D:\models\best_model.ckpt"

model = FlowBaseCNNModelForVPClassifier(num_classes=6,
                                        in_channel=8,
                                        out_feature=128,
                                        d_channel=128,
                                        in_dim=5,
                                        d_model=128,
                                        dim_feedforward=128,
                                        n_head=8,
                                        num_layer=3,
                                        dropout=0.5,
                                        fusion_dim=128,
                                        cls_hidden_dim=64,
                                        _in_length=128)

load_checkpoint(model_path, model)

flow = ms.tensor(np.random.randn(5, 8, 128).astype(np.float32))
seq = ms.tensor(np.random.randn(5, 64, 5).astype(np.float32))
seq_attention_mask = ms.tensor(np.ones((5, 64)).astype(np.int64))


export(model, flow, seq, seq_attention_mask, file_name="flow_cnn_res_model", file_format="MINDIR")

在调用export前,对输入的定义可以用这种方式

flow = ms.Tensor(shape=[ms.Symbol(), 8, 128], dtype=ms.float32)

把需要是动态shape的维度用 ms.Symbol() 进行替换,就可以导出动态shape的前项mindir模型。

再参考上面的mindspore lite 常见配置项中的纯动态配置,将前项mindir模型转换成mindspore-lite的mindir模型进行推理。

[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.204.250 [mindspore-lite/tools/converter/import/convert_extend_ops/dense.cc:85] ConvertDensePass] "Dense got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.206.367 [mindspore-lite/tools/converter/import/convert_extend_ops/dense.cc:85] ConvertDensePass] "Dense got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.207.286 [mindspore-lite/tools/converter/import/convert_extend_ops/dense.cc:85] ConvertDensePass] "Dense got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.212.075 [mindspore-lite/tools/converter/import/convert_extend_ops/dense.cc:85] ConvertDensePass] "Dense got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.212.118 [mindspore-lite/tools/converter/import/convert_extend_ops/dense.cc:85] ConvertDensePass] "Dense got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.212.787 [mindspore-lite/tools/converter/import/convert_extend_ops/dense.cc:85] ConvertDensePass] "Dense got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.214.656 [mindspore-lite/tools/converter/import/convert_extend_ops/dense.cc:85] ConvertDensePass] "Dense got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.215.733 [mindspore-lite/tools/converter/import/convert_extend_ops/matmul_ext.cc:186] ConvertMatMulExtPass] "MatMulExt got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.217.381 [mindspore-lite/tools/converter/import/convert_extend_ops/matmul_ext.cc:186] ConvertMatMulExtPass] "MatMulExt got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.217.983 [mindspore-lite/tools/converter/import/convert_extend_ops/matmul_ext.cc:186] ConvertMatMulExtPass] "MatMulExt got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.218.528 [mindspore-lite/tools/converter/import/convert_extend_ops/matmul_ext.cc:186] ConvertMatMulExtPass] "MatMulExt got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.219.413 [mindspore-lite/tools/converter/import/convert_extend_ops/matmul_ext.cc:186] ConvertMatMulExtPass] "MatMulExt got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.219.641 [mindspore-lite/tools/converter/import/convert_extend_ops/matmul_ext.cc:186] ConvertMatMulExtPass] "MatMulExt got dynamic shape."
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.913.932 [mindspore-lite/tools/optimizer/graph/scalar_op_pass.cc:476] RemoveTensorToScalar] Cannot handle primitive RemoveExpandedDims after TensorToScalar, please check graph.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.913.974 [mindspore-lite/tools/optimizer/graph/scalar_op_pass.cc:574] RunRemoveTensorToScalarPass] Failed to run remove TensorToScalar pass at cnode: Default/TensorToScalar-op0
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.926.037 [mindspore-lite/tools/converter/quantizer/quant_helper/ascend_distribute_fake_quant_transform.cc:407] NeedAscendDistributeFakeQuantTransform] primitive must not be null!
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.019 [mindspore-lite/tools/converter/quantizer/quant_helper/ascend_distribute_fake_quant_transform.cc:251] SetInputQuantParam] primitive must not be null!
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.056 [mindspore-lite/tools/converter/quantizer/quant_helper/ascend_distribute_fake_quant_transform.cc:464] DoSingleGraphAscendDistributeFakeQuantTransform] Fail to SetInputQuantParam
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.070 [mindspore-lite/tools/converter/quantizer/quant_helper/ascend_distribute_fake_quant_transform.cc:526] Transform] Do AscendDistributeFakeQuantTransform failed.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.084 [mindspore-lite/tools/converter/adapter/acl/src/acl_pass_impl.cc:1278] Run] Do AscendDistributeFakeQuantTransform failed.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.096 [mindspore-lite/tools/converter/adapter/acl/acl_pass.cc:42] Run] Acl pass impl run failed.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.107 [mindspore-lite/tools/converter/anf_transform.cc:488] RunConvertPass] Acl pass failed.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.121 [mindspore-lite/tools/converter/anf_transform.cc:681] RunPass] Run convert pass failed.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.132 [mindspore-lite/tools/converter/anf_transform.cc:782] TransformFuncGraph] Proc online transform failed.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.174 [mindspore-lite/tools/converter/anf_transform.cc:892] Transform] optimizer failed.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.928.186 [mindspore-lite/tools/converter/converter_funcgraph.cc:557] Optimize] Transform anf graph failed.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.929.414 [mindspore-lite/tools/converter/converter.cc:1203] HandleGraphCommon] Optimize func graph failed: -2 NULL pointer returned.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.933.031 [mindspore-lite/tools/converter/converter.cc:1152] Convert] Handle graph failed: -2 NULL pointer returned.
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.933.055 [mindspore-lite/tools/converter/converter.cc:1345] RunConverter] Convert model failed
[ERROR] LITE(738498,ffffa95aa020,converter_lite):2025-10-30-14:31:06.933.074 [mindspore-lite/tools/converter/cxx_api/converter.cc:361] Convert] Convert model failed, ret=NULL pointer returned.
ERROR [mindspore-lite/tools/converter/converter_lite/main.cc:107] main] Convert failed. Ret: NULL pointer returned.

修改为ms.Symbol()之后转换报错

转换使用的命令和配置文件能否贴下。
另外可以试下只设置batch_size 为动态的,能否转换成功。如果输入的动态shape维度会导致网络结构变化,也可能导致转mindspore lite 报错。

只设置batch也会报相同的错,按说这个模型,batch_size变化不会影响模型结构

./converter_lite --fmk=MINDIR --modelFile=./flow_cnn_res_model.mindir --outputFile=test_ms_ir --configFile=./config.ini --device=Ascend

配置文件

[acl_build_options]
input_shape="flow:-1,8,128;seq:-1,-1,5;seq_attention_mask:-1,-1"

可以在mindspore export的脚本开头加上这一行设置

ms.context.set_context(save_graphs=True, save_graphs_path=“./graph”)

这个会在./graph目录下保存mindspore 前项的图文件。

执行完export后,查找一下引入TensorToScalar-op0 算子的代码位置,看是否有返回

grep -r ‘TensorToScalar-op0’ ./graph

TensorToScalar-op0没有这个文件

方便的话能否贴下模型的代码,我这边转换看看。

class SeqEncoder(nn.Cell):
    def __init__(self, in_dim, d_model=64, dim_feedforward=256, n_head=8, num_layer=6, dropout=0.5):
        super().__init__()

        self.dmodel = d_model

        self.linear1 = nn.Dense(in_dim, d_model)
        self.linear2 = nn.Dense(d_model, d_model)
        self.identity = nn.Dense(in_dim, d_model)
        self.gate = nn.GELU()
        self.norm1 = nn.LayerNorm((d_model, ))

        layer = nn.TransformerEncoderLayer(d_model, n_head, dim_feedforward, dropout, ms.ops.gelu, batch_first=True)
        norm = nn.LayerNorm((d_model, ))

        self.embed = PositionalEncoding(d_model, dropout)
        self.encoder = nn.TransformerEncoder(layer, num_layer, norm)

        self.cls_token = init(Normal(sigma=1.0), (1, 1, d_model), ms.float32, "cls_token", True)

    def construct(self, seq, seq_attention_mask):
        mask = ms.ops.zeros((seq_attention_mask.shape[0], seq_attention_mask.shape[1] + 1), dtype=seq.dtype)
        mask[:, 1:][seq_attention_mask == 0] = -1e+9

        seq = self.linear1(seq)
        seq = self.norm1(self.gate(self.linear2(seq)) + seq)

        cls_token = ms.ops.tile(self.cls_token, (seq.shape[0], 1, 1))
        seq = ms.ops.concat((cls_token, seq), axis=1)
        seq = self.embed(seq)

        return self.encoder(seq, src_key_padding_mask=mask)

class MyModel(nn.Cell):
    def __init__(self):
        super().__init__()
        self.encoder = SeqEncoder(5, 32, 128, 8, 3, 0.5)
        self.cls = nn.Linear(32, 6)
        
    def construct(self, seq, seq_attention_mask):
        feature = self.encoder(seq, seq_attention_mask)[:, 0, :]
        return self.cls(feature)

Mymodel是一个分类模型,我临时写的,就是利用[CLS]token进行分类