LLaMA Factory 实现 LLMs 指令监督微调 (SFT) 完整工作流程

LLaMA Factory 实现 LLMs 指令监督微调 (SFT) 完整工作流程 | 极客日志

git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
cd LLaMA-Factory
pip install -e ".[torch,metrics]"
# 校验是否安装成功
llamafactory-cli version

[ { "messages": [ { "role": "system", "content": "系统提示词（选填）" }, { "role": "user", "content": "人类指令" }, { "role": "assistant", "content": "模型回答" } ] }]

import codecs
import os
import json
from datasets import Dataset # 需安装 dataset 包

json_file = 'PATH_TO_JSON_FORMAT_FILE.json'
save_path = 'SAVE_PATH'

with codecs.open(json_file, 'r', 'utf-8') as fp:
    data_json = json.load(fp)

all_systems = data_json['system']
all_conversations = data_json['conversations']
my_dataset = Dataset.from_dict({"system": all_systems, "conversations": all_conversations})

print(f'Saving to path: [{save_path}]')
my_dataset.save_to_disk(save_path, max_shard_size="2048MB")

"My_New_Data_Set_1": {
  "file_name": "PATH_TO_DATA_FILE",
  "formatting": "sharegpt",
  "columns": {
    "messages": "messages"
  },
  "tags": {
    "role_tag": "role",
    "content_tag": "content",
    "user_tag": "user",
    "assistant_tag": "assistant",
    "system_tag": "system"
  }
}

from huggingface_hub import snapshot_download

model_name = "Qwen/Qwen2.5-1.5B-Instruct"
print(f"正在下载模型 {model_name} 到 {model_name} ...")
snapshot_download(
    repo_id=model_name,
    local_dir=model_name
)

total 3026368
-rw-r--r-- 1 root root 660 Nov 12 14:21 config.json
-rw-r--r-- 1 root root 242 Nov 12 14:21 generation_config.json
-rw-r--r-- 1 root root 11343 Nov 12 14:21 LICENSE
-rw-r--r-- 1 root root 1671839 Nov 12 14:21 merges.txt
-rw-r--r-- 1 root root 3087467144 Nov 12 14:21 model.safetensors
-rw-r--r-- 1 root root 4917 Nov 12 14:21 README.md
-rw-r--r-- 1 root root 7305 Nov 12 14:21 tokenizer_config.json
-rw-r--r-- 1 root root 7031645 Nov 12 14:21 tokenizer.json
-rw-r--r-- 1 root root 2776833 Nov 12 14:21 vocab.json

### model
model_name_or_path: /data/BaseModels/Qwen/Qwen2.5-1.5B-Instruct
trust_remote_code: true

### method
stage: sft
do_train: true
do_eval: true
finetuning_type: full
deepspeed: examples/deepspeed/ds_z2_config.json

### dataset
dataset: My_New_Data_Set_1
template: qwen
cutoff_len: 2048
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4

### output
output_dir: saves/qwen2.5-1.5b/full_sft
logging_steps: 10
save_steps: 200
plot_loss: true
overwrite_output_dir: true

### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 12
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: false
ddp_timeout: 180000000
resume_from_checkpoint: null

### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 200

# 单卡版：
FORCE_TORCHRUN=1 CUDA_VISIBLE_DEVICES=0 llamafactory-cli train qwen2.5_1.5b_full_sft.yaml

# 多卡版：
FORCE_TORCHRUN=1 NNODES=1 NODE_RANK=0 MASTER_PORT=29500 llamafactory-cli train qwen2.5_1.5b_full_sft.yaml

# 多机多卡版（假设两台机器）：
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=xx.xx.xx.xx MASTER_PORT=29500 llamafactory-cli train qwen2.5_1.5b_full_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=xx.xx.xx.xx MASTER_PORT=29500 llamafactory-cli train qwen2.5_1.5b_full_sft.yaml

### model
model_name_or_path: /data/BaseModels/Qwen/Qwen2.5-32B-Instruct
trust_remote_code: true

### method
stage: sft
do_train: true
finetuning_type: lora
lora_alpha: 32
lora_rank: 16
lora_target: all
deepspeed: examples/deepspeed/ds_z2_config.json

### dataset
dataset: My_New_Data_Set_1
template: qwen
cutoff_len: 4096
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4

### output
output_dir: saves/qwen2.5-32b_instruct/lora_sft
logging_steps: 10
save_steps: 400
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none

### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
resume_from_checkpoint: null

### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 200

llamafactory-cli export \
  --model_name_or_path /data/BaseModels/Qwen2.5-32B-Instruct \
  --adapter_name_or_path saves/qwen2.5-32b_instruct/lora_sft \
  --export_dir saves/qwen2.5-32b_instruct/lora_sft/merge/ \
  --template qwen \
  --finetuning_type lora \
  --export_size 2 \
  --export_legacy_format False \
  --export_device cpu

# LLaMA-Factory/examples/deepspeed
ds_z0_config.json
ds_z2_config.json
ds_z2_offload_config.json
ds_z3_config.json
ds_z3_offload_config.json

API_PORT=8000 CUDA_VISIBLE_DEVICES=0 llamafactory-cli api \
  --model_name_or_path saves/qwen2.5-32b_instruct/lora_sft/merge/ \
  --template qwen

def call_llamafactory_api(prompt):
    url = 'http://localhost:8000/v1/chat/completions'
    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json'
    }
    payload = {
        "model": "qwen",
        "messages": [{ "role": "user", "content": prompt}],
        "do_sample": True,
        "temperature": 0.7,
        "top_p": 0.9,
        "n": 1,
        "max_tokens": 200,
        "stream": False
    }
    try:
        response = requests.post(url, headers=headers, data=json.dumps(payload))
        if response.status_code == 200:
            rsp_json = response.json()
            messages = []
            for chs in rsp_json.get('choices', []):
                msg = chs['message']
                messages.append(msg['content'])
            return messages
    except requests.exceptions.RequestException as e:
        print(f"请求发生异常：{e}")
    return None

python3 tools/llama.cpp/convert_hf_to_gguf.py \
  /data/LLamaFactory/saves/qwen2.5-32b_instruct/lora_sft/merge/ \
  --outtype auto \
  --outfile /data/LLamaFactory/saves/qwen2.5-32b_instruct_lora_sft.gguf

FROM qwen2.5-32b_instruct_lora_sft.gguf
PARAMETER num_ctx 4096

ollama create my-llm-model -f Modelfile
# my-llm-model 给模型起的名字

OLLAMA_MODELS=/usr/share/ollama/.ollama/models \
  OLLAMA_HOST=0.0.0.0:6006 \
  OLLAMA_ORIGINS=* \
  ollama serve

OLLAMA_URL = "http://localhost:6006/v1/chat/completions"
MODEL_NAME = "my-llm-model"

def call_ollama(prompt):
    data = {
        "model": MODEL_NAME,
        "messages": [{"role": "user", "content": prompt}],
        "stream": False
    }
    try:
        response = requests.post(OLLAMA_URL, json=data)
        response.raise_for_status()
        rsp = response.json()
        results = []
        for chs in rsp['choices']:
            if chs['message']['role'] == 'assistant':
                results.append(chs['message']['content'])
        return results
    except Exception as e:
        results = None

LLaMA Factory 实现 LLMs 指令监督微调 (SFT) 完整工作流程

简介

Fine-Tuning

LLaMA Factory

更多推荐文章

相关免费在线工具

SFT Pipeline for LLMs

Data Preparation

Model Training

Full Fine-Tuning（全量微调）

PEFT（参数高效微调）

DeepSpeed 配置

Evaluation & Deployment

LLaMA Factory API

Ollama API

Conclusion

更多推荐文章

相关免费在线工具

LLaMA Factory 实现 LLMs 指令监督微调 (SFT) 完整工作流程

简介

Fine-Tuning

LLaMA Factory

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

SFT Pipeline for LLMs

Data Preparation

Model Training

Full Fine-Tuning（全量微调）

PEFT（参数高效微调）

DeepSpeed 配置

Evaluation & Deployment

LLaMA Factory API

Ollama API

Conclusion

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具