train: ./images/visible/train
val: ./images/visible/val
nc: 10
names:
['dmjrb',
'ns',
'dyrb',
'ejgdl',
'zw',
'yyzd',
'ygfs',
'ycdw',
'dmjrb_ycdw',
'dyrb_ycdw'
""" 红外 - 可见光图像配准脚本
输入:一对红外图 + 可见光图(同一场景)
输出:配准后的红外图(与可见光对齐)
"""
import cv2
import numpy as np
import os
from skimage import exposure
def preprocess_images(ir_img, vis_img):
""" 预处理:增强对比度,便于特征匹配 """
ir_eq = exposure.equalize_adapthist(ir_img.astype(np.uint8), clip_limit=0.03)
ir_eq = (ir_eq * 255).astype(np.uint8)
if len(vis_img.shape) == 3:
vis_gray = cv2.cvtColor(vis_img, cv2.COLOR_BGR2GRAY)
else:
vis_gray = vis_img
return ir_eq, vis_gray
def register_ir_to_visible(ir_path, vis_path, output_path=None):
""" 将红外图像配准到可见光图像
:param ir_path: 红外图像路径
:param vis_path: 可见光图像路径
:param output_path: 配准后红外图保存路径(若为 None,则返回图像数组)
:return: 配准后的红外图像(H×W)
"""
ir = cv2.imread(ir_path, cv2.IMREAD_GRAYSCALE)
vis = cv2.imread(vis_path, cv2.IMREAD_COLOR)
if ir is None or vis is None:
raise FileNotFoundError(f"图像未找到:{ir_path} 或 {vis_path}")
ir_proc, vis_gray = preprocess_images(ir, vis)
sift = cv2.SIFT_create()
kp1, des1 = sift.detectAndCompute(ir_proc, None)
kp2, des2 = sift.detectAndCompute(vis_gray, None)
if des1 is None or des2 is None or len(kp1) < 4 or len(kp2) < 4:
print("⚠️ 特征点不足,跳过配准,直接返回原图")
return ir
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
good_matches = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
good_matches.append(m)
if len(good_matches) < 4:
print("⚠️ 有效匹配点不足,跳过配准")
return ir
src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
if H is None:
print("⚠️ 单应性矩阵计算失败,返回原图")
return ir
h, w = vis.shape[:2]
ir_registered = cv2.warpPerspective(ir, H, (w, h))
if output_path:
cv2.imwrite(output_path, ir_registered)
print(f"✅ 配准完成:{output_path}")
return ir_registered
if __name__ == "__main__":
ir_file = "sample/ir_001.jpg"
vis_file = "sample/vis_001.jpg"
out_file = "sample/ir_registered_001.jpg"
registered = register_ir_to_visible(ir_file, vis_file, out_file)
import matplotlib.pyplot as plt
vis = cv2.imread(vis_file)
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1); plt.imshow(cv2.imread(ir_file), cmap='gray'); plt.title("原始红外")
plt.subplot(1, 3, 2); plt.imshow(vis[:, :, ::-1]); plt.title("可见光")
plt.subplot(1, 3, 3); plt.imshow(registered, cmap='gray'); plt.title("配准后红外")
plt.tight_layout()
plt.savefig("registration_comparison.png", dpi=200)
plt.show()
""" 双模态 YOLO 模型:融合红外 + 可见光进行光伏缺陷检测
基于 YOLOv8 架构改造
"""
import torch
import torch.nn as nn
from ultralytics.nn.tasks import DetectionModel
from ultralytics import YOLO
from pathlib import Path
class DualModalYOLO(nn.Module):
""" 双模态 YOLO 模型
输入:[B, 1, H, W] 红外 + [B, 3, H, W] 可见光
输出:YOLO 检测头输出
"""
def __init__(self, num_classes=10, model_size='s'):
super().__init__()
self.num_classes = num_classes
base_model = YOLO(f'yolov8{model_size}.pt').model
self.backbone_vis = base_model.model[:6]
self.backbone_ir = self._copy_backbone()
self.ir_proj = nn.Conv2d(1, 3, kernel_size=1)
self.fusion_conv = nn.Conv2d(6, 3, kernel_size=1)
self.shared_layers = base_model.model[6:]
self.detection_head = base_model.model[-1]
def _copy_backbone(self):
"""复制可见光主干结构,但不共享权重"""
base_model = YOLO('yolov8s.pt').model
return nn.Sequential(*list(base_model.model[:6]))
def forward(self, x_ir, x_vis):
"""
:param x_ir: [B, 1, H, W] 红外图
:param x_vis: [B, 3, H, W] 可见光图
"""
feat_ir = self.backbone_ir(self.ir_proj(x_ir))
feat_vis = self.backbone_vis(x_vis)
fused = torch.cat([feat_ir, feat_vis], dim=1)
fused = self.fusion_conv(fused)
out = self.shared_layers(fused)
return out
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import numpy as np
class DualModalDataset(Dataset):
def __init__(self, image_dir, label_dir, img_size=640):
self.img_size = img_size
self.image_pairs = []
vis_dir = os.path.join(image_dir, 'visible')
ir_dir = os.path.join(image_dir, 'infrared')
for f in os.listdir(vis_dir):
if f.endswith('.jpg'):
name = f.replace('.jpg', '')
self.image_pairs.append({
'vis': os.path.join(vis_dir, f),
'ir': os.path.join(ir_dir, f),
'label': os.path.join(label_dir, f.replace('.jpg', '.txt'))
})
def __len__(self):
return len(self.image_pairs)
def __getitem__(self, idx):
pair = self.image_pairs[idx]
vis = Image.open(pair['vis']).convert('RGB')
ir = Image.open(pair['ir']).convert('L')
vis = vis.resize((self.img_size, self.img_size))
ir = ir.resize((self.img_size, self.img_size))
vis = torch.from_numpy(np.array(vis)).permute(2, 0, 1).float() / 255.0
ir = torch.from_numpy(np.array(ir)).unsqueeze(0).float() / 255.0
boxes, classes = [], []
if os.path.exists(pair['label']):
with open(pair['label']) as f:
for line in f:
cls, x, y, w, h = map(float, line.strip().split())
boxes.append([x, y, w, h])
classes.append(int(cls))
return ir, vis, torch.tensor(boxes), torch.tensor(classes)
def train_dual_modal():
dataset = DualModalDataset(
image_dir='pv_defect_dataset/images',
label_dir='pv_defect_dataset/labels',
img_size=640)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
model = DualModalYOLO(num_classes=10).to('cuda' if torch.cuda.is_available() else 'cpu')
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print("💡 提示:完整训练需继承 YOLOv8 Trainer,此处为概念验证")
print("📌 建议:先用单模态(可见光)训练 baseline,再扩展双模态")
if __name__ == "__main__":
model = DualModalYOLO(num_classes=10)
dummy_ir = torch.randn(1, 1, 640, 640)
dummy_vis = torch.randn(1, 3, 640, 640)
output = model(dummy_ir, dummy_vis)
print("✅ 双模态模型构建成功!输出形状:", [o.shape for o in output])