PythonAI算法
HuggingFace 大模型微调与在线推理基础教程
基于 HuggingFace 库的大模型微调与推理实战教程。内容涵盖环境搭建、Tokenizer 分词器操作、Dataset 数据处理、自定义模型构建、Trainer 训练配置及优化器设置。详细演示 ChatGLM3-6B 的 16 位推理部署与 Qwen1.5-7B 的 4 位量化加载方案,包含显存优化、指标计算函数编写及对抗训练示例。提供从数据准备到模型上线的完整代码逻辑。

基于 HuggingFace 库的大模型微调与推理实战教程。内容涵盖环境搭建、Tokenizer 分词器操作、Dataset 数据处理、自定义模型构建、Trainer 训练配置及优化器设置。详细演示 ChatGLM3-6B 的 16 位推理部署与 Qwen1.5-7B 的 4 位量化加载方案,包含显存优化、指标计算函数编写及对抗训练示例。提供从数据准备到模型上线的完整代码逻辑。

!pip install transformers datasets seqeval sacrebleu evaluate accelerate==0.19.0 sentencepiece loralib peft
import transformers
import torch
import datasets
import peft
print("transformers vision: %s" % (transformers.__version__))
print("torch vision: %s" % (torch.__version__))
print(f"Accelerate version: {accelerate.__version__}")
print("datasets version: %s" % (datasets.__version__))
print(f"PEFT version: {peft.__version__}")
import os
import gc
import ctypes
import torch
import random
import numpy as np
os.environ['WANDB_DISABLED'] = 'true'
os.environ["TOKENIZERS_PARALLELISM"] = "false"
def clean_memory():
gc.collect()
ctypes.CDLL("libc.so.6").malloc_trim(0)
torch.cuda.empty_cache()
from huggingface_hub import login
login(token='your_token')
import wandb
wandb.login(key='your_key')
wandb.init(project="trl_imdb_positive")
def seed_everything(seed):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True
class Args:
model_path = "hfl/chinese-bert-wwm-ext"
max_seq_len = 128
ratio = 0.8
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_batch_size = 32
dev_batch_size = 32
weight_decay = 0.01
epochs = 1
learning_rate = 3e-5
eval_step = 100
prompt = "情感是 [MASK][MASK]。"
seed = 2024
args = Args()
seed_everything(args.seed)
from transformers import BertForMaskedLM, BertTokenizer, BertForSequenceClassification, BertConfig, AdamW
from transformers import pipeline
tokenizer = BertTokenizer.from_pretrained(args.model_path)
sentence = "It is a very beautiful book."
tokens = tokenizer.tokenize(sentence)
print(tokens)
# ['it', 'is', 'a', 'very', 'beautiful', 'book', '.']
tokenizer.convert_tokens_to_ids(tokens)
# [8233, 8310, 143, 11785, 13106, 9106, 119]
token_samples_c = tokenizer.encode(text=sentence, add_special_tokens=True)
print(token_samples_c)
# [101, 8233, 8310, 143, 11785, 13106, 9106, 119, 102]
print(tokenizer.all_special_ids, tokenizer.all_special_tokens)
# [100, 102, 0, 101, 103], ['[UNK]', '[SEP]', '[PAD]', '[CLS]', '[MASK]']
token_samples_d = tokenizer.encode_plus(
text=sentence,
max_length=15,
return_tensors='pt',
add_special_tokens=True,
padding="max_length",
truncation="longest_first",
return_attention_mask=True,
return_token_type_ids=True
)
print(token_samples_d)
# {'input_ids': tensor([[...]]), 'token_type_ids': tensor([[...]]), 'attention_mask': tensor([[...]])}
vocab = tokenizer.vocab
label2ind = {}
for label_name in label2id:
zz = [vocab[label] for label in label_name]
label2ind[label_name] = zz
print(label2ind)
print(tokenizer.unk_token, tokenizer.convert_tokens_to_ids(tokenizer.unk_token))
print(tokenizer.sep_token, tokenizer.convert_tokens_to_ids(tokenizer.sep_token))
print(tokenizer.pad_token, tokenizer.convert_tokens_to_ids(tokenizer.pad_token))
print(tokenizer.cls_token, tokenizer.convert_tokens_to_ids(tokenizer.cls_token))
print(tokenizer.mask_token, tokenizer.convert_tokens_to_ids(tokenizer.mask_token))
print(tokenizer.all_special_ids)
print(tokenizer.all_special_tokens)
# 处理本地数据
!wget https://github.com/crux82/squad-it/raw/master/SQuAD_it-train.json.gz
!wget https://github.com/crux82/squad-it/raw/master/SQuAD_it-test.json.gz
!gzip -dkv SQuAD_it-*.json.gz
data_files = {"train": "SQuAD_it-train.json", "test": "SQuAD_it-test.json"}
squad_it_dataset = load_dataset("json", data_files=data_files, field="data")
# 处理线上数据
url = "https://github.com/crux82/squad-it/raw/master/"
data_files = {
"train": url + "SQuAD_it-train.json.gz",
"test": url + "SQuAD_it-test.json.gz",
}
squad_it_dataset = load_dataset("json", data_files=data_files, field="data")
!wget "https://archive.ics.uci.edu/ml/machine-learning-databases/00462/drugsCom_raw.zip"
!unzip drugsCom_raw.zip
from datasets import load_dataset
data_files = {"train": "drugsComTrain_raw.tsv", "test": "drugsComTest_raw.tsv"}
drug_dataset = load_dataset("csv", data_files=data_files, delimiter="\t")
# 查看明细
drug_sample = drug_dataset["train"].shuffle(seed=42).select(range(1000))
print(drug_sample[:3])
# 重命名列
drug_dataset = drug_dataset.rename_column(original_column_name="Unnamed: 0", new_column_name="patient_id")
# filter 和 map 函数
def lowercase_condition(example):
return {"condition": example["condition"].lower()}
drug_dataset = drug_dataset.filter(lambda x: x["condition"] is not None)
drug_dataset = drug_dataset.map(lowercase_condition)
# 创建新列
def compute_review_length(example):
return {"review_length": len(example["review"].split())}
drug_dataset = drug_dataset.map(compute_review_length)
# sort 函数
drug_dataset["train"].sort("review_length")[:3]
drug_dataset = drug_dataset.filter(lambda x: x["review_length"] > 30)
# 性能加速
import html
new_drug_dataset = drug_dataset.map(
lambda x: {"review": [html.unescape(o) for o in x["review"]]},
batched=, num_proc=
)
():
tokenizer(
examples[],
truncation=,
max_length=,
return_overflowing_tokens=,
)
tokenized_dataset = drug_dataset.(
tokenize_and_split, batched=, remove_columns=drug_dataset[].column_names
)
drug_dataset_clean = drug_dataset[].train_test_split(train_size=, seed=)
drug_dataset_clean[] = drug_dataset_clean.pop()
drug_dataset_clean[] = drug_dataset[]
drug_dataset_clean.save_to_disk("drug-reviews")
from datasets import load_from_disk
drug_dataset_reloaded = load_from_disk("drug-reviews")
from huggingface_hub import list_datasets
all_datasets = list_datasets()
from huggingface_hub import login
login(token='your_token')
from huggingface_hub import create_repo
repo_url = create_repo(name="github-issues", repo_type="dataset")
from huggingface_hub import Repository
repo = Repository(local_dir="github-issues", clone_from=repo_url)
!cp issues-datasets-with-hf-doc-builder.jsonl github-issues/
repo.lfs_track("*.jsonl")
repo.push_to_hub()
remote_dataset = load_dataset("lewtun/github-issues", split="train")
from huggingface_hub import hf_hub_url
from datasets import load_dataset
data_files = hf_hub_url(
repo_id="lewtun/github-issues",
filename="datasets-issues-with-hf-doc-builder.jsonl",
repo_type="dataset",
)
issues_dataset = load_dataset("json", data_files=data_files, split="train")
# 保持文本向量
def get_embeddings(text_list):
encoded_input = tokenizer(
text_list, padding=True, truncation=True, return_tensors="pt"
)
encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
model_output = model(**encoded_input)
return cls_pooling(model_output)
embedding = get_embeddings(comments_dataset["text"][0])
embeddings_dataset = comments_dataset.map(
lambda x: {"embeddings": get_embeddings(x["text"]).detach().cpu().numpy()[0]}
)
embeddings_dataset.add_faiss_index(column="embeddings")
def load_data(data, prompt, max_seq_len):
return_data = []
for d in data:
text = d[0]
label = d[1]
text = "".join(text.split(" ")).strip() + "," + prompt
if len(text) > max_seq_len - 2:
continue
return_data.append((text, label))
return return_data
class Collate:
def __init__(self, tokenizer, max_seq_len):
self.tokenizer = tokenizer
self.max_seq_len = max_seq_len
def collate_fn(self, batch):
input_ids_all = []
token_type_ids_all = []
attention_mask_all = []
label_all = []
mask_pos_all = []
for data in batch:
text = data[0]
label = data[1]
inputs = self.tokenizer.encode_plus(
text=text,
max_length=self.max_seq_len,
padding="max_length",
truncation="longest_first",
return_attention_mask=True,
return_token_type_ids=True
)
input_ids = inputs["input_ids"]
mask_pos = [i for i, token_id in enumerate(input_ids) if token_id == self.tokenizer.convert_tokens_to_ids(self.tokenizer.mask_token)]
mask_pos_all.append(mask_pos)
token_type_ids = inputs[]
attention_mask = inputs[]
input_ids_all.append(input_ids)
token_type_ids_all.append(token_type_ids)
attention_mask_all.append(attention_mask)
label_all.append(label)
input_ids_all = torch.tensor(input_ids_all, dtype=torch.long)
token_type_ids_all = torch.tensor(token_type_ids_all, dtype=torch.long)
attention_mask_all = torch.tensor(attention_mask_all, dtype=torch.long)
mask_pos_all = torch.tensor(mask_pos_all, dtype=torch.long)
label_all = torch.tensor(label_all, dtype=torch.long)
{
: input_ids_all,
: attention_mask_all,
: token_type_ids_all,
: label_all,
: mask_pos_all,
}
collate = Collate(tokenizer, args.max_seq_len)
train_loader = DataLoader(train_data, batch_size=args.train_batch_size, shuffle=True, num_workers=2, collate_fn=collate.collate_fn)
total_step = len(train_loader) * args.epochs
args.total_step = total_step
dev_loader = DataLoader(dev_data, batch_size=args.dev_batch_size, shuffle=False, num_workers=2, collate_fn=collate.collate_fn)
test_loader = dev_loader
for step, batch_data in enumerate(train_loader):
label = batch_data["label"]
batch_size = label.size(0)
input_ids = batch_data["input_ids"]
mask_pos = batch_data["mask_pos"]
token_type_ids = batch_data["token_type_ids"]
attention_mask = batch_data["attention_mask"]
print(input_ids[:2])
print(mask_pos[:2])
break
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict
test = Dataset.from_pandas(test)
disaster_tweets_test = DatasetDict()
disaster_tweets_test['test'] = test
data_y = train['labels']
data_x = train.drop(columns='labels')
def get_train_val_test(df_train, df_val):
tds = Dataset.from_pandas(df_train)
vds = Dataset.from_pandas(df_val)
disaster_tweets = DatasetDict()
disaster_tweets['train'] = tds
disaster_tweets['validation'] = vds
return disaster_tweets
from transformers import AutoTokenizer, ElectraTokenizer, ElectraForSequenceClassification, AdamW
import torch
max_length = 40
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
def tokenize(batch):
return tokenizer(batch['text'], max_length=max_length, padding=True, truncation=True)
disaster_tweets_test_encoded = disaster_tweets_test.map(tokenize, batched=True, batch_size=None)
Fold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed_val)
for n, (train_index, val_index) in enumerate(Fold.split(data_x, data_y)):
train_pf = data_x.iloc[train_index,:]
train_pf['label'] = data_y.iloc[train_index]
val_pf = data_x.iloc[val_index,:]
val_pf['label'] = data_y.iloc[val_index]
disaster_tweets = get_train_val_test(train_pf, val_pf)
disaster_tweets_encoded = disaster_tweets.map(tokenize, batched=True, batch_size=)
class multilabel_dropout(nn.Module):
def __init__(self, hidden_size, num_labels=2):
super(multilabel_dropout, self).__init__()
self.classifier = torch.nn.Linear(hidden_size, num_labels)
def forward(self, out):
return torch.mean(torch.stack([self.classifier(torch.nn.Dropout(p)(out)) for p in np.linspace(0.1, 0.5, 5)], dim=0), dim=0)
class MeanPooling(nn.Module):
def __init__(self):
super(MeanPooling, self).__init__()
def forward(self, last_hidden_state, attention_mask):
input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
sum_mask = input_mask_expanded.sum(1)
sum_mask = torch.clamp(sum_mask, min=1e-9)
mean_embeddings = sum_embeddings / sum_mask
return mean_embeddings
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification, AutoModel, AutoConfig
from transformers.modeling_outputs import TokenClassifierOutput
num_labels = len(class_names)
class MyModel(nn.Module):
def __init__(self, model_name, num_labels):
super(MyModel, self).__init__()
self.num_labels = num_labels
self.model = AutoModel.from_pretrained(model_name)
self.config = AutoConfig.from_pretrained(model_name)
self.drop = nn.Dropout(p=0.2)
self.pooler = MeanPooling()
self.fc = nn.Linear(self.config.hidden_size, num_labels)
self.multi_drop = multilabel_dropout(self.config.hidden_size, self.num_labels)
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, labels=None):
outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=False)
out = self.pooler(outputs.last_hidden_state, attention_mask)
logits = self.multi_drop(out)
loss = None
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-))
TokenClassifierOutput(loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions)
model = MyModel(model_name=model_ckpt, num_labels=num_labels).to(device)
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification, AutoModel, AutoConfig
from transformers.modeling_outputs import TokenClassifierOutput
num_labels = len(class_names)
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels=num_labels).to(device)
model = BertForSequenceClassification.from_pretrained(model_ckpt, num_labels=2).to(device)
# 下载模型权重
!sudo apt-get install git-lfs
!git lfs install
!git clone https://huggingface.co/THUDM/chatglm2-6b.git
!GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/THUDM/chatglm2-6b
# 从国内下载模型 huggingface 镜像
!pip install -U huggingface_hub
!git clone https://github.com/LetheSec/HuggingFace-Download-Accelerator.git
%cd HuggingFace-Download-Accelerator
!python hf_download.py --model mistralai/Mixtral-8x7B-Instruct-v0.1 --save_dir ../hf_hub
from huggingface_hub import snapshot_download
snapshot_download(repo_id="baichuan-inc/Baichuan2-13B-Chat-4bits", local_dir="baichuan-inc/Baichuan2-13B-Chat-4bits")
# 利用 python 代码上传大模型到 huggingface 中
save_path = "your_finetune_model_weight"
from huggingface_hub import login
login()
from huggingface_hub import HfApi
api = HfApi()
repo_id = "shujunge/chatglm2_6b_helloword"
api.create_repo(repo_id=repo_id)
api.upload_folder(folder_path=save_path, repo_id=repo_id, repo_type="model")
from transformers import BertForMaskedLM, BertTokenizer, BertForSequenceClassification, BertConfig, AdamW
def build_optimizer(self):
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': self.args.weight_decay},
{'params': [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=self.args.learning_rate)
return optimizer
from transformers.optimization import Adafactor, AdafactorSchedule
optimizer = Adafactor(model.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
lr_scheduler = AdafactorSchedule(optimizer)
trainer = Trainer(..., optimizers=(optimizer, lr_scheduler))
import evaluate
predictions = trainer.predict(tokenized_datasets["validation"])
print(predictions.predictions.shape, predictions.label_ids.shape)
preds = np.argmax(predictions.predictions, axis=-1)
metric = evaluate.load("glue", "mrpc")
metric.compute(predictions=preds, references=predictions.label_ids)
# 使用 sklearn 库搭建验证指标函数
from sklearn.metrics import accuracy_score, f1_score
def compute_metrics(pred) -> dict:
labels = pred.label_ids
preds = pred.predictions.argmax(-1)
f1 = f1_score(labels, preds, average='weighted')
acc = accuracy_score(labels, preds)
return {'accuracy': acc, 'f1': f1}
# 使用 evaluate 库搭建验证指标函数
import numpy as np
import evaluate
metric = evaluate.load("seqeval")
def compute_metrics(eval_preds):
logits, labels = eval_preds
predictions = np.argmax(logits, axis=-1)
true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
true_predictions = [[label_names[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
all_metrics = metric.compute(predictions=true_predictions, references=true_labels)
return {
: all_metrics[],
: all_metrics[],
: all_metrics[],
: all_metrics[],
}
class RegressionTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.get("labels")
outputs = model(**inputs)
logits = outputs.get('logits')
loss = torch.mean(torch.square(logits.squeeze() - labels.squeeze()))
return (loss, outputs) if return_outputs else loss
class CustomTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.get("labels")
outputs = model(**inputs)
logits = outputs.get('logits')
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
return (loss, outputs) if return_outputs else loss
def training_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]]) -> torch.Tensor:
model.train()
inputs = self._prepare_inputs(inputs)
fgm = FGM(model, epsilon=1, emb_name='word_embeddings.')
with self.autocast_smart_context_manager():
loss = self.compute_loss(model, inputs)
loss = loss / self.args.gradient_accumulation_steps
loss = self.scaler.scale(loss)
loss.backward()
fgm.attack()
loss_adv = .compute_loss(model, inputs)
loss_adv.backward()
fgm.restore()
loss_adv.detach()
class MyTrainer(Trainer):
def log(self, logs):
logs["learning_rate"] = self._get_learning_rate()
super().log(logs)
logging_steps = len(disaster_tweets_encoded['train']) // batch_size
model_name = f"{model_ckpt}-finetuned-disaster"
training_args = TrainingArguments(
seed=seed_val,
report_to='none',
output_dir=model_name,
learning_rate=3e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
weight_decay=.01,
disable_tqdm=False,
logging_steps=logging_steps,
log_level='error',
load_best_model_at_end=True,
save_total_limit=1,
fp16=True,
num_train_epochs=2,
save_strategy="epoch",
evaluation_strategy='epoch',
push_to_hub=False,
)
trainer = MyTrainer(
model=model,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=disaster_tweets_encoded['train'],
eval_dataset=disaster_tweets_encoded['validation'],
tokenizer=tokenizer,
callbacks=[early_stop, mlc],
)
trainer.train()
trainer.evaluate()
proba_prediction_test = trainer.predict(disaster_tweets_test_encoded['test'])
self.model = BertForMaskedLM.from_pretrained(args.model_path)
torch.save(self.model.state_dict(), "bert_prompt.pt")
trainer = Trainer(args)
model = BertForMaskedLM.from_pretrained(args.model_path)
model.load_state_dict(torch.load(ckpt_path))
model.to(args.device)
trainer.save_model("my_weight")
tokenizer.save_pretrained('my_weight')
trainer.push_to_hub(commit_message="Training complete", tags="summarization")
from sklearn.metrics import classification_report
results = trainer.predict(disaster_tweets_test_encoded['test'])['predictions']
report = classification_report(test['labels'], np.argmax(results, axis=-1).tolist(), target_names=class_names)
from transformers import TrainerCallback
class CustomCallback(TrainerCallback):
def __init__(self, trainer) -> None:
super().__init__()
self._trainer = trainer
def on_epoch_end(self, args, state, control, **kwargs):
if control.should_evaluate:
control_copy = deepcopy(control)
self._trainer.evaluate(eval_dataset=self._trainer.train_dataset, metric_key_prefix="train")
return control_copy
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=valid_dataset,
compute_metrics=compute_metrics,
tokenizer=tokenizer
)
trainer.add_callback(CustomCallback(trainer))
train = trainer.train()
from IPython.display import clear_output
!pip install transformers>=4.37.0
!pip install -q peft
!pip install -q accelerate
!pip install -q bitsandbytes
import torch
import transformers
import peft
import accelerate
print("torch:", torch.__version__)
print("transformers:", transformers.__version__)
print("peft:", peft.__version__)
print("accelerate:", accelerate.__version__)
!pip show bitsandbytes
from bitsandbytes.cuda_setup.main import CUDASetup
setup = CUDASetup.get_instance()
if setup.initialized != True:
setup.run_cuda_setup()
lib = setup.lib
lib.cquantize_blockwise_fp16_nf4
from huggingface_hub import snapshot_download
snapshot_download(repo_id="THUDM/chatglm3-6b", local_dir='THUDM/chatglm3-6b')
import gc
import ctypes
import torch
import random
from transformers import AutoTokenizer, AutoConfig, AutoModel, BitsAndBytesConfig
def clean_memory():
gc.collect()
ctypes.CDLL("libc.so.6").malloc_trim(0)
torch.cuda.empty_cache()
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name_or_path = '/kaggle/working/THUDM/chatglm3-6b'
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, torch_dtype=torch.float16, trust_remote_code=True)
model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True).half().cuda()
!nvidia-smi
%%time
response, history = model.chat(tokenizer, query='你好。你是谁,你能干什么', history=[])
print(response)
输出结果: 你好,我是 ChatGLM3-6B,是清华大学 KEG 实验室和智谱 AI 公司于 2023 年共同训练的语言模型。
from IPython.display import clear_output
!pip install transformers>=4.37.0
!pip install -q peft
!pip install -q accelerate
!pip install -q bitsandbytes
import torch
import transformers
import peft
import accelerate
print("torch:", torch.__version__)
print("transformers:", transformers.__version__)
print("peft:", peft.__version__)
print("accelerate:", accelerate.__version__)
!pip show bitsandbytes
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
device = "cuda"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
llm_int8_threshold=6.0,
llm_int8_has_fp16_weight=False,
)
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen1.5-7B-Chat",
load_in_4bit=True,
torch_dtype=torch.float16,
quantization_config=bnb_config,
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B-Chat")
%%time
prompt = "什么是大模型?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
gen_kwargs = {
"max_new_tokens": 1024,
"num_beams": 1,
"do_sample": True,
"top_p": 0.8,
"temperature": 0.01,
"top_k": 50,
'repetition_penalty': 1
}
generated_ids = model.generate(model_inputs.input_ids, **gen_kwargs)
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)
输出结果: 大模型通常指的是在人工智能领域中训练规模较大、参数较多的模型...
!nvidia-smi

微信公众号「极客日志」,在微信中扫描左侧二维码关注。展示文案:极客日志 zeeklog
使用加密算法(如AES、TripleDES、Rabbit或RC4)加密和解密文本明文。 在线工具,加密/解密文本在线工具,online
生成新的随机RSA私钥和公钥pem证书。 在线工具,RSA密钥对生成器在线工具,online
基于 Mermaid.js 实时预览流程图、时序图等图表,支持源码编辑与即时渲染。 在线工具,Mermaid 预览与可视化编辑在线工具,online
解析常见 curl 参数并生成 fetch、axios、PHP curl 或 Python requests 示例代码。 在线工具,curl 转代码在线工具,online
将字符串编码和解码为其 Base64 格式表示形式即可。 在线工具,Base64 字符串编码/解码在线工具,online
将字符串、文件或图像转换为其 Base64 表示形式。 在线工具,Base64 文件转换器在线工具,online