import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel, AdamW
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score
deftrain():
model.train()
total_loss, total_accuracy = 0, 0
total_preds = []
for step, batch inenumerate(train_loader):
# Move batch to GPU if available
batch = [item.to(device) for item in batch]
sent_id, mask, labels = batch
# Clear previously calculated gradients
optimizer.zero_grad()
# Get model predictions for the current batch
preds = model(sent_id, mask)
# Calculate the loss between predictions and labels
loss_function = nn.CrossEntropyLoss()
loss = loss_function(preds, labels)
# Add to the total loss
total_loss += loss.item()
# Backward pass and gradient update
loss.backward()
optimizer.step()
# Move predictions to CPU and convert to numpy array
preds = preds.detach().cpu().numpy()
total_preds.append(preds)
# Compute the average loss
avg_loss = total_loss / len(train_loader)
# Concatenate the predictions
total_preds = np.concatenate(total_preds, axis=0)
# Return the average loss and predictionsreturn avg_loss, total_preds
评估函数:
defevaluate():
model.eval()
total_loss, total_accuracy = 0, 0
total_preds = []
for step, batch inenumerate(val_loader):
# Move batch to GPU if available
batch = [item.to(device) for item in batch]
sent_id, mask, labels = batch
# Clear previously calculated gradients
optimizer.zero_grad()
# Get model predictions for the current batch
preds = model(sent_id, mask)
# Calculate the loss between predictions and labels
loss_function = nn.CrossEntropyLoss()
loss = loss_function(preds, labels)
# Add to the total loss
total_loss += loss.item()
# Backward pass and gradient update
loss.backward()
optimizer.step()
# Move predictions to CPU and convert to numpy array
preds = preds.detach().cpu().numpy()
total_preds.append(preds)
# Compute the average loss
avg_loss = total_loss / len(val_loader)
# Concatenate the predictions
total_preds = np.concatenate(total_preds, axis=0)
# Return the average loss and predictionsreturn avg_loss, total_preds
5. 主训练循环
我们现在将使用这些函数来训练模型:
# set initial loss to infinite
best_valid_loss = float('inf')
# defining epochs
epochs = 5# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]
# for each epochfor epoch inrange(epochs):
print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
# train model
train_loss, _ = train()
# evaluate model
valid_loss, _ = evaluate()
# save the best modelif valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'saved_weights.pt')
print(f'\nSaved new best model with loss: {valid_loss:.3f}')
# append training and validation loss
train_losses.append(train_loss)
valid_losses.append(valid_loss)
print(f'\nTraining Loss: {train_loss:.3f}')
print(f'Validation Loss: {valid_loss:.3f}')
6. 模型推理与加载
现在你就得到了它。您可以使用经过训练的模型来推断您选择的任何数据或文本。首先加载保存的权重。
# Load the saved model weights
model.load_state_dict(torch.load('saved_weights.pt'))
model.eval()
# Example prediction
test_text = "This movie was absolutely fantastic and I loved every minute of it."
encoded_input = tokenizer(test_text, return_tensors='pt', max_length=250, padding=True, truncation=True)
input_ids = encoded_input['input_ids'].to(device)
attention_mask = encoded_input['attention_mask'].to(device)
with torch.no_grad():
output = model(input_ids, attention_mask)
prediction = torch.argmax(output, dim=1).item()
print(f"Predicted sentiment: {'Positive'if prediction == 1else'Negative'}")