f"0 - Failed to decode JSON: {json_str} - {assistant_content}"
True
break
"<functioncall> "
"role"
"assistant"
"content"
else
"role"
"assistant"
"content"
elif
"FUNCTION RESPONSE:"
18
if
"ASSISTANT:"
in
"ASSISTANT:"
try
except
as
print
f"1 - Failed to decode JSON: {function_content}"
True
break
"role"
"user"
"content"
"role"
"assistant"
"content"
else
try
except
as
print
f"2 - Failed to decode JSON: {function_response}"
True
break
"role"
"user"
"content"
elif
"ASSISTANT:"
"ASSISTANT:"
1
if
"<functioncall>"
in
"<functioncall>"
""
try
except
as
print
f"3 - Failed to decode JSON: {json_str} - {assistant_content}"
True
break
"<functioncall> "
"role"
"assistant"
"content"
if
continue
False
False
del
'system'
del
'chat'
return
"text"
map
True
(2)安装 mlx-lm 包
pip install mlx-lm
这个库为微调 LLM 提供了一个友好的用户交互方式,省去了许多麻烦,并实现更好的效果。
(3)创建 LoRA 配置
通过配置 LoRA 来微调 Llama3 8B 模型。更改一些关键参数以优化性能:
使用 fp16 代替 qlora,以避免由于量化和解量化而导致的潜在性能下降。
将 lora_layers 设置为 32,并使用全线性层,以获得与全微调相媲美的结果。
以下是 lora_config.yaml 文件的示例:
# The path to the local model directory or Hugging Face repo.model:"meta-llama/Meta-Llama-3-8B-Instruct"# Whether or not to train (boolean)train:true# Directory with {train, valid, test}.jsonl filesdata:"data"# The PRNG seedseed:0# Number of layers to fine-tunelora_layers:32# Minibatch size.batch_size:1# Iterations to train for.iters:6000# Number of validation batches, -1 uses the entire validation set.val_batches:25# Adam learning rate.learning_rate:1e-6# Number of training steps between loss reporting.steps_per_report:10# Number of training steps between validations.steps_per_eval:200# Load path to resume training with the given adapter weights.resume_adapter_file:null# Save/load path for the trained adapter weights.adapter_path:"adapters"# Save the model every N iterations.save_every:1000# Evaluate on the test set after trainingtest:false# Number of test set batches, -1 uses the entire test set.test_batches:100# Maximum sequence length.max_seq_length:8192# Use gradient checkpointing to reduce memory use.grad_checkpoint:true# LoRA parameters can only be specified in a config filelora_parameters:# The layer keys to apply LoRA to.# These will be applied for the last lora_layerskeys: ['mlp.gate_proj', 'mlp.down_proj', 'self_attn.q_proj', 'mlp.up_proj', 'self_attn.o_proj','self_attn.v_proj', 'self_attn.k_proj']
rank:128alpha:256scale:10.0dropout:0.05# Schedule can only be specified in a config file, uncomment to use.# lr_schedule:# name: cosine_decay# warmup: 100 # 0 for no warmup# warmup_init: 1e-7 # 0 if not specified# arguments: [1e-6, 1000, 1e-7] # passed to scheduler
instruction = "A rectangular garden has a length of 25 feet and a width of 15 feet. If you want to build a fence around the entire garden, how many feet of fencing will you need?"# tokenize and prepare the input
prompt = prompt_no_input_template % instruction
prompt = tokenizer(prompt, return_tensors="pt").to(device)
base_unit_location = prompt["input_ids"].shape[-1] - 1# last position
_, reft_response = reft_model.generate(
prompt, unit_locations={"sources->base": (None, [[[base_unit_location]]])},
intervene_on_prompt=True, max_new_tokens=512, do_sample=True,
eos_token_id=tokenizer.eos_token_id, early_stopping=True
)
print(tokenizer.decode(reft_response[0], skip_special_tokens=True))
# 1) Download a pretrained model
litgpt download --repo_id microsoft/phi-2
# 2) Chat with the model
litgpt chat \
--checkpoint_dir checkpoints/microsoft/phi-2
>> Prompt: What do Llamas eat?
(3)微调模型
下面是在 phi-2 基础上进行微调的命令。
# 1) Download a pretrained model
litgpt download --repo_id microsoft/phi-2
# 2) Finetune the model
curl -L https://huggingface.co/datasets/ksaw008/finance_alpaca/resolve/main/finance_alpaca.json -o my_custom_dataset.json
litgpt finetune \
--checkpoint_dir checkpoints/microsoft/phi-2 \
--data JSON \
--data.json_path my_custom_dataset.json \
--data.val_split_fraction 0.1 \
--out_dir out/custom-model
# 3) Chat with the model
litgpt chat \
--checkpoint_dir out/custom-model/final
除此外,还可以基于自己的数据进行训练。详细参考 GitHub。
(4)部署
通过下面的部署命令,启动模型服务。
# locate the checkpoint to your finetuned or pretrained model and call the `serve` command:
litgpt serve --checkpoint_dir path/to/your/checkpoint/microsoft/phi-2
# Alternative: if you haven't finetuned, download any checkpoint to deploy it:
litgpt download --repo_id microsoft/phi-2
litgpt serve --checkpoint_dir checkpoints/microsoft/phi-2
通过 Http API 访问服务。
# Use the server (in a separate session)import requests, json
response = requests.post(
"http://127.0.0.1:8000/predict",
json={"prompt": "Fix typos in the following sentence: Exampel input"}
)
print(response.json()["output"])
# set the base model
FROM llama3:8b
# set custom parameter values
PARAMETER temperature 1
PARAMETER num_keep 24
PARAMETER stop <|start_header_id|>
PARAMETER stop <|end_header_id|>
PARAMETER stop <|eot_id|>
PARAMETER stop <|reserved_special_token
# set the model template
TEMPLATE """
{{ if .System }}<|
start_header_id
|>system<|
end_header_id|>
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|
start_header_id
|>user<|
end_header_id|>
{{ .Prompt }}<|eot_id|>{{ end }}<|
start_header_id
|>assistant<|
end_header_id|>
{{ .Response }}<|eot_id|>
"""
# set the system message
SYSTEM You are llama3 from Meta.
# set Chinese lora support
ADAPTER /root/.ollama/models/lora/ggml-adapter-model.bin