计算机视觉基础理论与实战应用指南

计算机视觉基础理论与实战应用指南 | 极客日志

import cv2
import numpy as np

def read_image(image_path):
    image = cv2.imread(image_path)
    return image

def save_image(image, output_path):
    cv2.imwrite(output_path, image)

def resize_image(image, width, height):
    resized_image = cv2.resize(image, (width, height))
    return resized_image

def adjust_brightness_contrast(image, alpha=1.0, beta=0.0):
    adjusted_image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
    return adjusted_image

def crop_image(image, x, y, width, height):
    cropped_image = image[y:y+height, x:x+width]
    return cropped_image

def rotate_image(image, angle):
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated_image = cv2.warpAffine(image, M, (w, h))
    return rotated_image

import cv2
import numpy as np

def histogram_equalization(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    equalized_image = cv2.equalizeHist(gray_image)
    return equalized_image

def mean_filter(image, kernel_size=3):
    blurred_image = cv2.blur(image, (kernel_size, kernel_size))
    return blurred_image

def gaussian_filter(image, kernel_size=3, sigma=0):
    blurred_image = cv2.GaussianBlur(image, (kernel_size, kernel_size), sigma)
    return blurred_image

def median_filter(image, kernel_size=3):
    blurred_image = cv2.medianBlur(image, kernel_size)
    return blurred_image

import cv2
import numpy as np

def sobel_edge_detection(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sobel_x = cv2.Sobel(gray_image, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(gray_image, cv2.CV_64F, 0, 1, ksize=3)
    sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2)
    sobel_combined = np.uint8(sobel_combined / np.max(sobel_combined) * 255)
    return sobel_combined

def canny_edge_detection(image, threshold1=100, threshold2=200):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray_image, threshold1, threshold2)
    return edges

import cv2
import numpy as np

def extract_hog_features(image):
    hog = cv2.HOGDescriptor()
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features = hog.compute(gray_image)
    return features

import cv2
import numpy as np

def extract_sift_features(image):
    sift = cv2.SIFT_create()
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    keypoints, descriptors = sift.detectAndCompute(gray_image, None)
    return keypoints, descriptors

import cv2
import numpy as np

def extract_orb_features(image):
    orb = cv2.ORB_create()
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    keypoints, descriptors = orb.detectAndCompute(gray_image, None)
    return keypoints, descriptors

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

def train_resnet_model(data_dir, num_classes=2, batch_size=32, num_epochs=10, lr=0.001):
    # 数据预处理
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }
    
    # 加载数据
    image_datasets = {x: datasets.ImageFolder(f'{data_dir}/{x}', data_transforms[x]) for x in ['train', 'val']}
    dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
    class_names = image_datasets['train'].classes
    
    # 加载模型
    model = models.resnet18(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)
    
    # 定义损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    
    # 训练模型
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in dataloaders[phase]:
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    print('Training complete')
    return model

pip install opencv-python
pip install pillow
pip install torch torchvision
pip install tensorflow

import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk

class ImageInputFrame(tk.Frame):
    def __init__(self, parent, on_image_selected):
        tk.Frame.__init__(self, parent)
        self.parent = parent
        self.on_image_selected = on_image_selected
        self.create_widgets()

    def create_widgets(self):
        self.image_label = tk.Label(self)
        self.image_label.pack(pady=10, padx=10, fill="both", expand=True)
        tk.Button(self, text="选择图像", command=self.select_image).pack(pady=10, padx=10)

    def select_image(self):
        file_path = filedialog.askopenfilename(filetypes=[("Image Files", "*.png *.jpg *.jpeg *.bmp")])
        if file_path:
            image = Image.open(file_path)
            image = image.resize((400, 300), Image.ANTIALIAS)
            photo = ImageTk.PhotoImage(image)
            self.image_label.configure(image=photo)
            self.image_label.image = photo
            self.on_image_selected(file_path)

import torch
from torchvision import transforms, models
from PIL import Image

def classify_image(image_path, model_path, class_names):
    data_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    image = Image.open(image_path)
    image = data_transforms(image)
    image = image.unsqueeze(0)
    
    model = models.resnet18()
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, len(class_names))
    model.load_state_dict(torch.load(model_path))
    model.eval()
    
    with torch.no_grad():
        outputs = model(image)
        _, preds = torch.max(outputs, 1)
    return class_names[preds[0]]

import cv2
import numpy as np
import torch
from torchvision import transforms, models
from PIL import Image

def detect_objects(image_path, model_path, class_names):
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_pil = Image.fromarray(image_rgb)
    
    data_transforms = transforms.Compose([
        transforms.Resize((416, 416)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    image_tensor = data_transforms(image_pil)
    image_tensor = image_tensor.unsqueeze(0)
    
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, len(class_names))
    model.load_state_dict(torch.load(model_path))
    model.eval()
    
    with torch.no_grad():
        outputs = model(image_tensor)
    
    boxes = outputs[0]['boxes'].cpu().numpy()
    scores = outputs[0]['scores'].cpu().numpy()
    labels = outputs[0]['labels'].cpu().numpy()
    
    for i in range(len(boxes)):
        if scores[i] > 0.5:
            box = boxes[i].astype(int)
            label = class_names[labels[i]]
            score = scores[i]
            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
            cv2.putText(image, f"{label}: {score:.2f}", (box[0], box[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    return image

import tkinter as tk
from tkinter import ttk, messagebox, filedialog
from PIL import Image, ImageTk
from image_input_frame import ImageInputFrame
from result_frame import ResultFrame
from cv_functions import classify_image, detect_objects

class CVApp:
    def __init__(self, root):
        self.root = root
        self.root.title("计算机视觉应用")
        self.class_names = ['猫', '狗']
        self.model_path = 'model.pth'
        self.create_widgets()

    def create_widgets(self):
        self.image_input_frame = ImageInputFrame(self.root, self.process_image)
        self.image_input_frame.pack(pady=10, padx=10, fill="both", expand=True)
        
        function_frame = tk.LabelFrame(self.root, text="功能选择")
        function_frame.pack(pady=10, padx=10, fill="x")
        self.function_var = tk.StringVar()
        self.function_var.set("图像分类")
        tk.Radiobutton(function_frame, text="图像分类", variable=self.function_var, value="图像分类").grid(row=0, column=0, padx=5, pady=5)
        tk.Radiobutton(function_frame, text="目标检测", variable=self.function_var, value="目标检测").grid(row=0, column=1, padx=5, pady=5)
        
        self.result_frame = ResultFrame(self.root)
        self.result_frame.pack(pady=10, padx=10, fill="both", expand=True)
        
        self.output_image_label = tk.Label(self.root)
        self.output_image_label.pack(pady=10, padx=10, fill="both", expand=True)

    def process_image(self, image_path):
        function = self.function_var.get()
        try:
            if function == "图像分类":
                result = classify_image(image_path, self.model_path, self.class_names)
                self.result_frame.display_result(result)
            elif function == "目标检测":
                result_image = detect_objects(image_path, self.model_path, self.class_names)
                result_image = cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB)
                result_image_pil = Image.fromarray(result_image)
                result_image_pil = result_image_pil.resize((400, 300), Image.ANTIALIAS)
                photo = ImageTk.PhotoImage(result_image_pil)
                self.output_image_label.configure(image=photo)
                self.output_image_label.image = photo
            else:
                raise ValueError("未知功能")
        except Exception as e:
            messagebox.showerror("错误", f"处理失败：{str(e)}")

if __name__ == "__main__":
    root = tk.Tk()
    app = CVApp(root)
    root.mainloop()

计算机视觉基础理论与实战应用指南

计算机视觉的基础与应用

学习目标

重点内容概览

一、计算机视觉基础

1.1 基本概念

1.1.1 核心价值

1.1.2 应用场景

1.2 面临的挑战

二、图像处理技术

2.1 图像预处理

2.1.1 常见格式

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

2.1.2 基础操作

2.2 图像增强

2.2.1 直方图均衡化

2.2.2 图像平滑

2.3 图像滤波

2.3.1 边缘检测

三、特征提取方法

3.1 HOG 特征

3.2 SIFT 特征

3.3 ORB 特征

四、常用模型与架构

4.1 传统机器学习模型

4.2 深度学习模型

4.3 模型训练实战

五、实战项目：计算机视觉应用开发

5.1 需求分析

5.2 系统架构

5.3 系统实现

5.3.1 环境搭建

5.3.2 图像输入模块

5.3.3 图像分类功能

5.3.4 目标检测功能

5.3.5 结果可视化与主程序

5.4 运行与测试

六、总结

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具