LoRA介绍
LoRA(Low-Rank Adaptation)是一种高效的参数微调方法,属于参数高效微调(Parameter-Efficient Fine-Tuning, PEFT)范式。其核心思想是在冻结原始模型参数的基础上,在 Attention 层中的 Query、Key、Value(QKV)等模块引入一个低秩旁路结构。
该旁路由两个可训练的低维矩阵 A 和 B 组成,替代对原始大矩阵的直接更新。微调过程中仅更新 A 和 B,从而显著减少训练参数量,降低计算与内存开销,同时保持接近全参数微调的性能表现。
环境准备
# 实验环境已经预装了mindspore==2.6.0,如需更换mindspore版本,可更改下面MINDSPORE_VERSION 变量
pip uninstall mindspore -y
%env MINDSPORE_VERSION=2.6.0
pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/${MINDSPORE_VERSION}/MindSpore/unified/aarch64/mindspore-${MINDSPORE_VERSION}-cp39-cp39-linux_aarch64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple
查看当前mindspore版本
pip show mindspore
输出信息
Name: mindspore
Version: 2.6.0
Summary: MindSpore is a new open source deep learning training/inference framework that could be used for mobile, edge and cloud scenarios.
Home-page: https://www.mindspore.cn
Author: The MindSpore Authors
Author-email: contact@mindspore.cn
License: Apache 2.0
Location: /home/mindspore/miniconda/envs/jupyter/lib/python3.9/site-packages
Requires: asttokens, astunparse, dill, numpy, packaging, pillow, protobuf, psutil, safetensors, scipy
Required-by:
安装mindnlp 0.4.1 版本
pip uninstall mindnlp -y
pip install https://xihe.mindspore.cn/coderepo/web/v1/file/MindSpore/mindnlp/main/media/mindnlp-0.4.1-py3-none-any.whl
安装openMind Hub Client
pip install openmind_hub
下载与处理数据集
通过openmind_hub提供的接口下载 huanhuan.json 数据集:
import os
from mindnlp.transformers import AutoModelForCausalLM, AutoTokenizer
from mindnlp.engine import TrainingArguments, Trainer
from mindnlp.dataset import load_dataset
from mindnlp.transformers import GenerationConfig
from mindnlp.peft import LoraConfig, TaskType, get_peft_model
from mindnlp.engine.utils import PREFIX_CHECKPOINT_DIR
from mindnlp.configs import SAFE_WEIGHTS_NAME
from mindnlp.engine.callbacks import TrainerCallback, TrainerState, TrainerControl
from mindspore._c_expression import disable_multi_thread
disable_multi_thread() # 禁用多线程,提升微调性能
# 开启同步,在出现报错,定位问题时开启
# mindspore.set_context(pynative_synchronize=True)
from openmind_hub import om_hub_download
# 从魔乐社区下载数据集
om_hub_download(
repo_id="MindSpore-Lab/huanhuan",
repo_type="dataset",
filename="huanhuan.json",
local_dir="./",
)
# 加载数据集
dataset = load_dataset(path="json", data_files="./huanhuan.json")
# 实例化tokenizer
tokenizer = AutoTokenizer.from_pretrained("MindSpore-Lab/DeepSeek-R1-Distill-Qwen-1.5B-FP16", mirror="modelers", use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'
# 定义数据处理逻辑
def process_func(instruction, input, output):
MAX_SEQ_LENGTH = 64 # 最长序列长度
input_ids, attention_mask, labels = [], [], []
# 首先生成user和assistant的对话模板
# User: instruction + input
# Assistant: output
formatted_instruction = tokenizer(f"User: {instruction}{input}\n\n", add_special_tokens=False)
formatted_response = tokenizer(f"Assistant: {output}", add_special_tokens=False)
# 最后添加 eos token,在deepseek-r1-distill-qwen的词表中, eos_token 和 pad_token 对应同一个token
# User: instruction + input \n\n Assistant: output + eos_token
input_ids = formatted_instruction["input_ids"] + formatted_response["input_ids"] + [tokenizer.pad_token_id]
# 注意,我们在微调时仅考虑Assistant部分回答的内容,所以User部分提问的内容对应的标签为-100
attention_mask = formatted_instruction["attention_mask"] + formatted_response["attention_mask"] + [1]
labels = [-100] * len(formatted_instruction["input_ids"]) + formatted_response["input_ids"] + [tokenizer.pad_token_id]
# 如超过最大长度,则进行截断
if len(input_ids) > MAX_SEQ_LENGTH:
input_ids = input_ids[:MAX_SEQ_LENGTH]
attention_mask = attention_mask[:MAX_SEQ_LENGTH]
labels = labels[:MAX_SEQ_LENGTH]
# 如不足最大长度,则进行填充
padding_length = MAX_SEQ_LENGTH - len(input_ids)
input_ids = input_ids + [tokenizer.pad_token_id] * padding_length
attention_mask = attention_mask + [0] * padding_length # 填充的 attention_mask 为 0
labels = labels + [-100] * padding_length # 填充的 label 为 -100
return input_ids, attention_mask, labels
formatted_dataset = dataset.map(operations=[process_func],
input_columns=['instruction', 'input', 'output'],
output_columns=["input_ids", "attention_mask", "labels"])
# 查看预处理后的数据
for input_ids, attention_mask, labels in formatted_dataset.create_tuple_iterator():
print(tokenizer.decode(input_ids))
break
# 为节约演示时间,将数据集裁剪
truncated_dataset = formatted_dataset.take(3)
输出信息
User: 小姐,别的秀女都在求中选,唯有咱们小姐想被撂牌子,菩萨一定记得真真儿的——
Assistant: 嘘——都说许愿说破是不灵的。<|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|><|end▁of▁sentence|>
执行微调
# 实例化base model
model_id = "MindSpore-Lab/DeepSeek-R1-Distill-Qwen-1.5B-FP16"
base_model = AutoModelForCausalLM.from_pretrained(model_id, mirror="modelers")
base_model.generation_config = GenerationConfig.from_pretrained(model_id, mirror="modelers")
base_model.generation_config.pad_token_id = base_model.generation_config.eos_token_id
# LoRA配置
config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
inference_mode=False, # 训练模式
r=8, # Lora 秩
lora_alpha=32, # Lora alaph,具体作用参见 Lora 原理
lora_dropout=0.1 # Dropout 比例
)
# 实例化LoRA模型
model = get_peft_model(base_model, config)
# 获取模型参与训练的参数,发现仅占总参数量的0.5%
model.print_trainable_parameters()
# Callback函数,随save_steps定义的步数保存LoRA adapter权重
class SavePeftModelCallback(TrainerCallback):
def on_save(
self,
args: TrainingArguments,
state: TrainerState,
control: TrainerControl,
**kwargs,
):
# LoRA adapter权重保存路径
checkpoint_folder = os.path.join(
args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}"
)
# 保存LoRA adapter权重
peft_model_path = os.path.join(checkpoint_folder, "adapter_model")
kwargs["model"].save_pretrained(peft_model_path, safe_serialization=True)
# 移除额外保存的base model的model.safetensors,节约空间
base_model_path = os.path.join(checkpoint_folder, SAFE_WEIGHTS_NAME)
os.remove(base_model_path) if os.path.exists(base_model_path) else None
return control
# Callback函数,随save_steps定义的步数保存LoRA adapter权重
class SavePeftModelCallback(TrainerCallback):
def on_save(
self,
args: TrainingArguments,
state: TrainerState,
control: TrainerControl,
**kwargs,
):
# LoRA adapter权重保存路径
checkpoint_folder = os.path.join(
args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}"
)
# 保存LoRA adapter权重
peft_model_path = os.path.join(checkpoint_folder, "adapter_model")
kwargs["model"].save_pretrained(peft_model_path, safe_serialization=True)
# 移除额外保存的base model的model.safetensors,节约空间
base_model_path = os.path.join(checkpoint_folder, SAFE_WEIGHTS_NAME)
os.remove(base_model_path) if os.path.exists(base_model_path) else None
return control
# 训练超参
args = TrainingArguments(
output_dir="./output/DeepSeek-R1-Distill-Qwen-1.5B", # 输出保存路径
per_device_train_batch_size=1, # batch size
logging_steps=1, # 每多少步记录一次训练日志
num_train_epochs=1, # epoch数
save_steps=3, # 每多少步保存一次权重
learning_rate=1e-4, # 学习率
)
# 定义Trainer
trainer = Trainer(
model=model,
args=args,
train_dataset=truncated_dataset,
callbacks=[SavePeftModelCallback],
)
# 启动微调
trainer.train()
微调结果
...{'loss': 1.5475, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.33}
{'loss': 1.3807, 'learning_rate': 3.3333333333333335e-05, 'epoch': 0.67}
{'loss': 1.0043, 'learning_rate': 0.0, 'epoch': 1.0}
{'train_runtime': 62.2028, 'train_samples_per_second': 0.048, 'train_steps_per_second': 0.048, 'train_loss': 1.3108176390329997, 'epoch': 1.0}
TrainOutput(global_step=3, training_loss=1.3108176390329997, metrics={'train_runtime': 62.2028, 'train_samples_per_second': 0.048, 'train_steps_per_second': 0.048, 'train_loss': 1.3108176390329997, 'epoch': 1.0})
心得
本来计划尝试使用Atlas 200I A2试试看能不能体验开发板运行,但是最近比较忙,所以就暂时先使用了羲和平台进行体验。通过一个简单的demo可以很快了解掌握基于昇思大模型平台,对 DeepSeek-R1-Distill-Qwen-1.5B 模型进行 LoRA 微调的操作,整体体验还是很容易上手的。后续计划等时间充裕了 打算尝试试试看能不能在老的开发板上进行体验操作