from unsloth.mlx import mlx_utils
from unsloth.mlx import lora as mlx_lora
from unsloth import is_bfloat16_supported
from transformers.utils import strtobool
from datasets import Dataset
import logging
import os
import argparse
args = argparse.Namespace(
model_name="unsloth/Llama-3.2-3B-Instruct",
max_seq_length=2048,
dtype="bfloat16" if is_bfloat16_supported() else "float16",
load_in_4bit=True,
r=16,
lora_alpha=16,
lora_dropout=0.1,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=3407,
use_rslora=False,
loftq_config=None,
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=5,
max_steps=100,
learning_rate=2e-4,
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="linear",
seed=3407,
output_dir="outputs",
report_to="tensorboard",
logging_steps=1,
adapter_file="adapters.safetensors",
save_model=True,
save_method="merged_16bit",
save_gguf=False,
save_path="model",
quantization="q8_0"
)
logging.getLogger('hf-to-gguf').setLevel(logging.WARNING)
print("Loading pretrained model. This may take a while...")
model, tokenizer, config = mlx_utils.load_pretrained(args.model_name, dtype=args.dtype, load_in_4bit=args.load_in_4bit)
print("Model loaded")
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction: {}
### Input: {}
### Response: {}"""
EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
instructions = examples["instruction"]
inputs = examples["input"]
outputs = examples["output"]
texts = []
for instruction, input, output in zip(instructions, inputs, outputs):
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
texts.append(text)
return {"text": texts}
basic_data = {
"instruction": [
"Summarize the following text",
"Translate this to French",
"Explain this concept"
],
"input": [
"The quick brown fox jumps over the lazy dog.",
"Hello world",
"Machine learning is a subset of artificial intelligence"
],
"output": [
"A fox quickly jumps over a dog.",
"Bonjour le monde",
"Machine learning is an AI approach where systems learn patterns from data"
]
}
dataset = Dataset.from_dict(basic_data)
print("Dataset initialized")
dataset = dataset.map(formatting_prompts_func, batched=True)
print("Data is formatted and ready!")
datasets = dataset.train_test_split(test_size=0.33)
print(f"Training examples: {len(datasets['train'])}, Test examples: {len(datasets['test'])}")
print("Starting training")
mlx_lora.train_model(args, model, tokenizer, datasets["train"], datasets["test"])
Trainable parameters: 0.143% (4.588M/3212.750M)
Starting training..., iters: 100
Iter 1: Val loss 2.323, Val took 1.660s
Iter 1: Train loss 2.401, Learning Rate 0.000e+00, It/sec 0.580, Tokens/sec 117.208, Trained Tokens 202, Peak mem 2.661 GB
Iter 2: Train loss 2.134, Learning Rate 0.000e+00, It/sec 0.493, Tokens/sec 119.230, Trained Tokens 444, Peak mem 2.810 GB