【算法工程师】—— Python 数据分析

Ne0inhk

21 Mar 2026 — 62 min read

Python 数据分析

Numpy

特点：

ndarray：N维数组对象，快速高效
向量化操作：避免循环，提高性能
广播机制：不同形状数组的运算
丰富的数学函数：线性代数、傅里叶变换等

数组创建与初始化

函数/方法	作用	参数示例
`np.array()`	从列表/元组创建数组	`np.array([1,2,3])`
`np.zeros()`	创建全0数组	`np.zeros((3,3))`
`np.ones()`	创建全1数组	`np.ones((2,4))`
`np.full()`	创建指定值数组	`np.full((3,3), 5)`
`np.arange()`	创建等差数组	`np.arange(0, 10, 2)`
`np.linspace()`	创建等间隔数组	`np.linspace(0, 1, 5)`
`np.random.rand()`	随机数组(0-1)	`np.random.rand(3,3)`
`np.random.randn()`	标准正态分布	`np.random.randn(100)`
`np.random.randint()`	随机整数数组	`np.random.randint(0,10,(3,3))`
`np.eye()`	单位矩阵	`np.eye(3)`
`np.diag()`	对角矩阵	`np.diag([1,2,3])`
`np.empty()`	未初始化数组	`np.empty((3,3))`

import numpy as np # 2.1 基础数组创建# 从列表创建 arr1 = np.array([1,2,3,4,5]) arr2d = np.array([[1,2,3],[4,5,6]])# 特殊数组 zeros = np.zeros((3,3))# 3x3全0数组 ones = np.ones((2,4))# 2x4全1数组 full = np.full((3,3),255)# 3x3全255数组（图像常用）# 序列数组 range_arr = np.arange(0,10,2)# [0, 2, 4, 6, 8] linspace_arr = np.linspace(0,1,5)# [0., 0.25, 0.5, 0.75, 1.]# 随机数组 random_arr = np.random.rand(3,3)# 3x3随机数组(0-1) normal_arr = np.random.randn(100)# 100个标准正态分布值 int_random = np.random.randint(0,256,(3,3))# 3x3随机整数(0-255)# 矩阵 identity = np.eye(3)# 3x3单位矩阵 diagonal = np.diag([1,2,3])# 对角矩阵print("基础数组创建示例完成")print(f"数组形状 zeros: {zeros.shape}, 类型: {zeros.dtype}")

数组属性与信息

属性/方法	作用	示例
`.shape`	数组形状	`arr.shape`
`.ndim`	数组维度	`arr.ndim`
`.size`	元素总数	`arr.size`
`.dtype`	数据类型	`arr.dtype`
`.itemsize`	元素字节数	`arr.itemsize`
`.nbytes`	总字节数	`arr.nbytes`
`np.shape()`	获取形状	`np.shape(arr)`
`np.ndim()`	获取维度	`np.ndim(arr)`
`np.size()`	获取元素数	`np.size(arr)`
`np.dtype()`	获取类型	`np.dtype(arr)`

# 3.1 数组属性 arr = np.random.rand(3,4,5)print("数组属性:")print(f"形状 shape: {arr.shape}")# (3, 4, 5)print(f"维度 ndim: {arr.ndim}")# 3print(f"元素总数 size: {arr.size}")# 60print(f"数据类型 dtype: {arr.dtype}")# float64print(f"元素字节数 itemsize: {arr.itemsize}")# 8字节print(f"总字节数 nbytes: {arr.nbytes}")# 480字节# 3.2 类型转换 int_arr = np.array([1,2,3], dtype=np.float32) float_arr = int_arr.astype(np.float64)# 类型转换 uint8_arr = np.array([0,128,255], dtype=np.uint8)# 图像常用uint8print(f"类型转换: {int_arr.dtype} -> {float_arr.dtype}")print(f"uint8范围: [{uint8_arr.min()}, {uint8_arr.max()}]")

数组索引与切片

操作	语法	说明
基本索引	`arr[index]`	单个元素
切片	`arr[start:end:step]`	切片操作
多维索引	`arr[row, col]`	多维数组索引
布尔索引	`arr[mask]`	使用布尔数组
花式索引	`arr[[indices]]`	使用整数数组
`:`	`arr[:]`	所有元素
`...`	`arr[..., 0]`	省略号索引

# 4.1 创建测试数组 arr = np.arange(24).reshape(4,6)print("原始数组:")print(arr)# 4.2 基本索引print(f"\n单个元素 arr[2, 3]: {arr[2,3]}")# 15print(f"整行 arr[1]: {arr[1]}")# 第1行print(f"整列 arr[:, 2]: {arr[:, 2]}")# 第2列# 4.3 切片操作print(f"\n切片 arr[1:3, 2:5]:")print(arr[1:3,2:5])# 行1-2，列2-4print(f"\n带步长 arr[::2, ::2]:")print(arr[::2,::2])# 每隔一行一列# 4.4 布尔索引 mask = arr >10print(f"\n布尔索引 (arr > 10):")print(arr[mask])# 所有大于10的元素# 4.5 花式索引 indices =[0,2,3]print(f"\n花式索引 arr[[0, 2, 3]]:")print(arr[indices])# 第0,2,3行# 4.6 省略号索引 arr_3d = np.arange(60).reshape(3,4,5)print(f"\n省略号索引 arr_3d[..., 0]:")print(arr_3d[...,0])# 所有维度的第一个元素

形状操作与重塑

函数/方法	作用	示例
`.reshape()`	重塑形状	`arr.reshape((3,4))`
`.resize()`	改变数组形状	`arr.resize((3,4))`
`.flatten()`	展平为一维	`arr.flatten()`
`.ravel()`	展平（视图）	`arr.ravel()`
`.transpose()`	转置	`arr.transpose()` 或 `arr.T`
`.swapaxes()`	交换轴	`arr.swapaxes(0,1)`
`.squeeze()`	移除单维度	`arr.squeeze()`
`.expand_dims()`	增加维度	`np.expand_dims(arr, axis)`
`.concatenate()`	连接数组	`np.concatenate((a,b), axis)`
`.stack()`	堆叠数组	`np.stack((a,b), axis)`
`.vstack()`	垂直堆叠	`np.vstack((a,b))`
`.hstack()`	水平堆叠	`np.hstack((a,b))`
`.split()`	分割数组	`np.split(arr, indices)`

# 5.1 重塑形状 arr = np.arange(12) reshaped = arr.reshape(3,4)# 改为3x4print(f"reshape(3,4):\n{reshaped}")# 5.2 展平 flattened = reshaped.flatten()# 返回拷贝 raveled = reshaped.ravel()# 返回视图print(f"\nflatten: {flattened}")print(f"ravel: {raveled}")# 5.3 转置 transposed = reshaped.T print(f"\n转置:\n{transposed}")# 5.4 堆叠 a = np.array([[1,2],[3,4]]) b = np.array([[5,6],[7,8]]) vstacked = np.vstack((a, b))# 垂直堆叠 hstacked = np.hstack((a, b))# 水平堆叠 stacked = np.stack((a, b), axis=0)# 新维度堆叠print(f"\n垂直堆叠:\n{vstacked}")print(f"\n水平堆叠:\n{hstacked}")# 5.5 分割 arr = np.arange(12).reshape(3,4) split_arrs = np.split(arr,3, axis=0)# 沿行分割为3份print(f"\n分割为3份:")for i, sub_arr inenumerate(split_arrs):print(f"部分{i}:\n{sub_arr}")# 5.6 增加/减少维度 arr_1d = np.array([1,2,3]) arr_2d = np.expand_dims(arr_1d, axis=0)# 增加维度 (1,3) arr_squeezed = arr_2d.squeeze()# 移除单维度print(f"\n增加维度: {arr_1d.shape} -> {arr_2d.shape}")print(f"移除单维度: {arr_2d.shape} -> {arr_squeezed.shape}")

数学运算

基本运算

运算符	作用	示例
`+`	加法	`arr1 + arr2`
`-`	减法	`arr1 - arr2`
`*`	乘法	`arr1 * arr2`
`/`	除法	`arr1 / arr2`
`//`	整除	`arr1 // arr2`
`%`	取模	`arr1 % arr2`
`**`	幂运算	`arr ** 2`
`@`	矩阵乘法	`arr1 @ arr2`

通用函数（ufunc）

函数	作用	示例
`np.add()`	加法	`np.add(a, b)`
`np.subtract()`	减法	`np.subtract(a, b)`
`np.multiply()`	乘法	`np.multiply(a, b)`
`np.divide()`	除法	`np.divide(a, b)`
`np.power()`	幂运算	`np.power(a, 2)`
`np.sqrt()`	平方根	`np.sqrt(arr)`
`np.exp()`	指数	`np.exp(arr)`
`np.log()`	自然对数	`np.log(arr)`
`np.log10()`	常用对数	`np.log10(arr)`
`np.sin()`	正弦	`np.sin(arr)`
`np.cos()`	余弦	`np.cos(arr)`
`np.tan()`	正切	`np.tan(arr)`
`np.abs()`	绝对值	`np.abs(arr)`
`np.sign()`	符号函数	`np.sign(arr)`
`np.ceil()`	向上取整	`np.ceil(arr)`
`np.floor()`	向下取整	`np.floor(arr)`
`np.round()`	四舍五入	`np.round(arr)`

# 6.1 基本运算 a = np.array([1,2,3,4]) b = np.array([5,6,7,8])print("基本运算:")print(f"加法: {a + b}")print(f"减法: {a - b}")print(f"乘法: {a * b}")print(f"除法: {b / a}")print(f"幂运算: {a **2}")# 6.2 通用函数print("\n通用函数:")print(f"平方根 sqrt: {np.sqrt(a)}")print(f"指数 exp: {np.exp(a)}")print(f"自然对数 log: {np.log(a)}")print(f"绝对值 abs: {np.abs(np.array([-1,-2,3]))}")# 6.3 三角函数 angles = np.array([0, np.pi/4, np.pi/2])print(f"\n三角函数:")print(f"sin: {np.sin(angles)}")print(f"cos: {np.cos(angles)}")print(f"tan: {np.tan(angles)}")# 6.4 取整函数 arr_float = np.array([1.2,2.7,3.5,4.1])print(f"\n取整函数:")print(f"ceil向上: {np.ceil(arr_float)}")print(f"floor向下: {np.floor(arr_float)}")print(f"round四舍五入: {np.round(arr_float)}")

统计函数

函数	作用	示例
`np.sum()`	求和	`np.sum(arr)`
`np.mean()`	平均值	`np.mean(arr)`
`np.median()`	中位数	`np.median(arr)`
`np.std()`	标准差	`np.std(arr)`
`np.var()`	方差	`np.var(arr)`
`np.min()`	最小值	`np.min(arr)`
`np.max()`	最大值	`np.max(arr)`
`np.argmin()`	最小值索引	`np.argmin(arr)`
`np.argmax()`	最大值索引	`np.argmax(arr)`
`np.percentile()`	百分位数	`np.percentile(arr, 50)`
`np.ptp()`	极差	`np.ptp(arr)`
`np.cumsum()`	累积和	`np.cumsum(arr)`
`np.cumprod()`	累积积	`np.cumprod(arr)`
`np.histogram()`	直方图	`np.histogram(arr)`

# 7.1 创建测试数据 arr = np.random.randn(100)# 100个正态分布随机数print("统计函数:")print(f"求和 sum: {np.sum(arr):.3f}")print(f"平均值 mean: {np.mean(arr):.3f}")print(f"中位数 median: {np.median(arr):.3f}")print(f"标准差 std: {np.std(arr):.3f}")print(f"方差 var: {np.var(arr):.3f}")print(f"最小值 min: {np.min(arr):.3f}")print(f"最大值 max: {np.max(arr):.3f}")print(f"极差 ptp: {np.ptp(arr):.3f}")# 7.2 索引统计print(f"\n索引统计:")print(f"最小值索引 argmin: {np.argmin(arr)}")print(f"最大值索引 argmax: {np.argmax(arr)}")# 7.3 累积操作 arr_small = np.array([1,2,3,4])print(f"\n累积操作:")print(f"原始数组: {arr_small}")print(f"累积和 cumsum: {np.cumsum(arr_small)}")print(f"累积积 cumprod: {np.cumprod(arr_small)}")# 7.4 百分位数print(f"\n百分位数:")print(f"25%分位数: {np.percentile(arr,25):.3f}")print(f"50%分位数(中位数): {np.percentile(arr,50):.3f}")print(f"75%分位数: {np.percentile(arr,75):.3f}")# 7.5 直方图 hist, bins = np.histogram(arr, bins=10)print(f"\n直方图(10个bin):")print(f"计数: {hist}")print(f"边界: {bins}")

线性代数运算

函数	作用	示例
`np.dot()`	点积/矩阵乘法	`np.dot(a, b)`
`@`	矩阵乘法运算符	`a @ b`
`np.matmul()`	矩阵乘法	`np.matmul(a, b)`
`np.linalg.inv()`	矩阵求逆	`np.linalg.inv(a)`
`np.linalg.det()`	行列式	`np.linalg.det(a)`
`np.linalg.eig()`	特征值/特征向量	`np.linalg.eig(a)`
`np.linalg.svd()`	奇异值分解	`np.linalg.svd(a)`
`np.linalg.norm()`	范数	`np.linalg.norm(a)`
`np.linalg.solve()`	解线性方程	`np.linalg.solve(A, b)`
`np.linalg.lstsq()`	最小二乘解	`np.linalg.lstsq(A, b)`
`np.linalg.qr()`	QR分解	`np.linalg.qr(a)`
`np.linalg.cholesky()`	Cholesky分解	`np.linalg.cholesky(a)`
`np.trace()`	矩阵迹	`np.trace(a)`

# 8.1 创建矩阵 A = np.array([[1,2],[3,4]]) B = np.array([[5,6],[7,8]]) x = np.array([2,3]) b = np.array([8,18])print("线性代数运算:")# 8.2 矩阵乘法print(f"\n矩阵乘法:")print(f"dot: {np.dot(A, B)}")print(f"@运算符: {A @ B}")print(f"matmul: {np.matmul(A, B)}")# 8.3 矩阵求逆 A_inv = np.linalg.inv(A)print(f"\n矩阵求逆:")print(f"A: {A}")print(f"A的逆: {A_inv}")print(f"A @ A_inv ≈ I: {A @ A_inv}")# 接近单位矩阵# 8.4 行列式 det = np.linalg.det(A)print(f"\n行列式 det(A): {det:.3f}")# 8.5 特征值和特征向量 eigenvalues, eigenvectors = np.linalg.eig(A)print(f"\n特征值: {eigenvalues}")print(f"特征向量:\n{eigenvectors}")# 8.6 范数print(f"\n范数:")print(f"L2范数: {np.linalg.norm(A):.3f}")print(f"Frobenius范数: {np.linalg.norm(A,'fro'):.3f}")# 8.7 解线性方程组# A @ x = b solution = np.linalg.solve(A, b)print(f"\n解线性方程 A@x=b:")print(f"解x: {solution}")print(f"验证 A@x: {A @ solution}")# 8.8 奇异值分解(SVD) U, S, Vt = np.linalg.svd(A)print(f"\n奇异值分解:")print(f"U:\n{U}")print(f"奇异值: {S}")print(f"V转置:\n{Vt}")# 8.9 矩阵迹 trace = np.trace(A)print(f"\n矩阵迹: {trace}")

广播机制

广播规则：

如果数组维度不同，将小维度数组形状前面补1
如果两个数组在某个维度上大小相同或其中一个为1，则可以广播
如果两个数组在所有维度上都不匹配且没有维度为1，则出错

# 9.1 广播示例print("广播机制示例:")# 示例1: 标量与数组 arr = np.array([[1,2,3],[4,5,6]]) result = arr +10# 标量10广播到整个数组print(f"\n数组 + 标量:\n{result}")# 示例2: 行向量与列向量 row = np.array([1,2,3])# 形状(3,) col = np.array([[1],[2],[3]])# 形状(3,1) result = row + col # row广播为(1,3) -> (3,3), col广播为(3,3)print(f"\n行向量 + 列向量:\n{result}")# 示例3: 不同形状数组 A = np.array([[1,2,3],[4,5,6]])# 形状(2,3) B = np.array([10,20,30])# 形状(3,) result = A + B # B广播为(1,3) -> (2,3)print(f"\n(2,3) + (3,):\n{result}")# 示例4: 图像处理中的广播# 模拟RGB图像 (高度, 宽度, 3) image = np.random.randint(0,256,(100,100,3), dtype=np.uint8) brightness_adjust = np.array([10,20,30])# 每个通道调整不同亮度# 广播应用到每个像素 adjusted_image = image + brightness_adjust adjusted_image = np.clip(adjusted_image,0,255)# 限制到0-255print(f"\n图像广播调整:")print(f"原始图像形状: {image.shape}")print(f"调整值形状: {brightness_adjust.shape}")print(f"调整后图像形状: {adjusted_image.shape}")

随机数生成

函数	作用	示例
`np.random.rand()`	[0,1)均匀分布	`np.random.rand(3,3)`
`np.random.randn()`	标准正态分布	`np.random.randn(100)`
`np.random.randint()`	随机整数	`np.random.randint(0,10,(3,3))`
`np.random.random()`	[0,1)随机数	`np.random.random((3,3))`
`np.random.uniform()`	均匀分布	`np.random.uniform(0,1,10)`
`np.random.normal()`	正态分布	`np.random.normal(0,1,100)`
`np.random.choice()`	随机选择	`np.random.choice(arr, size=5)`
`np.random.shuffle()`	打乱顺序	`np.random.shuffle(arr)`
`np.random.permutation()`	随机排列	`np.random.permutation(arr)`
`np.random.seed()`	设置随机种子	`np.random.seed(42)`

# 10.1 设置随机种子（保证可复现） np.random.seed(42)print("随机数生成:")# 10.2 均匀分布 uniform = np.random.rand(3,3)# [0,1)均匀分布print(f"\n[0,1)均匀分布 (3x3):\n{uniform}") uniform_range = np.random.uniform(0,10,5)# [0,10)均匀分布print(f"\n[0,10)均匀分布: {uniform_range}")# 10.3 正态分布 normal = np.random.randn(5)# 标准正态分布print(f"\n标准正态分布: {normal}") normal_custom = np.random.normal(100,15,10)# 均值100, 标准差15print(f"\nN(100,15²)分布: {normal_custom}")# 10.4 随机整数 integers = np.random.randint(0,256,(3,3))# [0,256)随机整数print(f"\n随机整数 (0-255):\n{integers}")# 10.5 随机选择 choices = np.random.choice([0,128,255], size=10, p=[0.1,0.3,0.6])print(f"\n加权随机选择: {choices}")# 10.6 打乱顺序 arr = np.arange(10) np.random.shuffle(arr)print(f"\n打乱顺序: {arr}") permuted = np.random.permutation(10)print(f"随机排列: {permuted}")

图像处理相关应用

# 11.1 模拟图像操作print("图像处理相关应用:")# 创建模拟RGB图像 (高度, 宽度, 通道) height, width, channels =100,150,3 image = np.random.randint(0,256,(height, width, channels), dtype=np.uint8)print(f"\n模拟图像:")print(f"形状: {image.shape}")# (100, 150, 3)print(f"数据类型: {image.dtype}")# uint8print(f"像素范围: [{image.min()}, {image.max()}]")# 11.2 图像通道操作# 分离通道 red_channel = image[:,:,0] green_channel = image[:,:,1] blue_channel = image[:,:,2]print(f"\n通道分离:")print(f"红色通道形状: {red_channel.shape}")# (100, 150)# 合并通道 merged = np.stack([red_channel, green_channel, blue_channel], axis=-1)print(f"合并后形状: {merged.shape}")# 11.3 图像裁剪 cropped = image[20:80,30:120,:]# 裁剪区域print(f"\n裁剪后形状: {cropped.shape}")# 11.4 图像旋转 rotated = np.rot90(image, k=1)# 旋转90度print(f"旋转后形状: {rotated.shape}")# 11.5 图像翻转 flipped_h = image[:,::-1,:]# 水平翻转 flipped_v = image[::-1,:,:]# 垂直翻转print(f"水平翻转形状: {flipped_h.shape}")# 11.6 图像缩放（最近邻插值） scale_factor =0.5 new_height =int(height * scale_factor) new_width =int(width * scale_factor)# 使用数组索引进行简单缩放 scaled = image[::2,::2,:]# 每2个像素取一个print(f"缩放后形状: {scaled.shape}")# 11.7 图像归一化 normalized = image.astype(np.float32)/255.0print(f"\n归一化后范围: [{normalized.min():.3f}, {normalized.max():.3f}]")# 11.8 图像二值化 threshold =128 binary =(image > threshold).astype(np.uint8)*255print(f"二值化后唯一值: {np.unique(binary)}")

高级技巧与性能优化

# 12.1 向量化操作 vs 循环print("性能优化 - 向量化操作:")# 创建测试数据 size =10000 arr = np.random.rand(size)# 方法1: Python循环（慢）import time start = time.time() result_loop = np.zeros(size)for i inrange(size): result_loop[i]= arr[i]*2+1 loop_time = time.time()- start # 方法2: NumPy向量化（快） start = time.time() result_vectorized = arr *2+1 vectorized_time = time.time()- start print(f"循环时间: {loop_time:.6f}秒")print(f"向量化时间: {vectorized_time:.6f}秒")print(f"加速比: {loop_time/vectorized_time:.1f}倍")# 12.2 内存视图操作print(f"\n内存视图操作:") arr = np.arange(10) view = arr[3:7]# 创建视图，共享内存 view[0]=100# 修改视图会影响原始数组print(f"原始数组: {arr}")print(f"视图修改后: {arr}")# 12.3 原地操作print(f"\n原地操作:") arr = np.array([1,2,3,4,5]) arr +=10# 原地操作，不创建新数组print(f"原地加法后: {arr}")# 12.4 使用out参数避免内存分配 arr1 = np.random.rand(1000) arr2 = np.random.rand(1000) result = np.empty_like(arr1) np.multiply(arr1, arr2, out=result)# 使用预分配内存print(f"\n使用out参数避免内存分配")# 12.5 避免不必要的拷贝 arr = np.arange(10) copy = arr.copy()# 显式拷贝 view = arr[:]# 视图（无拷贝）print(f"\n拷贝 vs 视图:")print(f"arr is copy: {arr is copy}")# Falseprint(f"arr is view: {arr is view}")# True

实用小技巧

# 13.1 条件操作 arr = np.array([1,2,3,4,5])# np.where(condition, x, y): 满足条件取x，否则取y result = np.where(arr >3, arr,0)print(f"np.where(arr>3, arr, 0): {result}")# 13.2 重复数组 repeated = np.repeat(arr,3)# 每个元素重复3次print(f"np.repeat每个元素3次: {repeated}") tiled = np.tile(arr,3)# 整个数组重复3次print(f"np.tile整个数组3次: {tiled}")# 13.3 唯一值与计数 arr_with_dup = np.array([1,2,2,3,3,3,4]) unique_values = np.unique(arr_with_dup) unique_counts = np.bincount(arr_with_dup)# 计数（仅限非负整数）print(f"唯一值: {unique_values}")print(f"计数: {unique_counts}")# 13.4 排序 sorted_arr = np.sort(arr) sort_indices = np.argsort(arr)# 排序索引print(f"排序: {sorted_arr}")print(f"排序索引: {sort_indices}")# 13.5 网格坐标# 用于图像处理中的像素坐标 x = np.linspace(-1,1,5) y = np.linspace(-1,1,3) X, Y = np.meshgrid(x, y)# 生成网格坐标print(f"\n网格坐标X:\n{X}")print(f"网格坐标Y:\n{Y}")

Pandas

核心数据结构

Series（一维数据）

创建方法	说明	示例
`pd.Series()`	从列表创建	`pd.Series([1,2,3])`
`pd.Series()`	从字典创建	`pd.Series({'a':1, 'b':2})`
`pd.Series()`	从ndarray创建	`pd.Series(np.array([1,2,3]))`
属性
`.values`	获取值数组	`s.values`
`.index`	获取索引	`s.index`
`.dtype`	数据类型	`s.dtype`
`.name`	Series名称	`s.name`

import pandas as pd import numpy as np # 1.1 Series 创建print("=== Series 创建示例 ===")# 从列表创建 s1 = pd.Series([1,2,3,4,5])print(f"从列表创建:\n{s1}")# 从字典创建 s2 = pd.Series({'a':10,'b':20,'c':30})print(f"\n从字典创建:\n{s2}")# 从numpy数组创建 s3 = pd.Series(np.random.randn(5), index=['a','b','c','d','e'])print(f"\n指定索引创建:\n{s3}")# 1.2 Series 属性print(f"\n=== Series 属性 ===")print(f"值数组: {s3.values}")print(f"索引: {s3.index}")print(f"数据类型: {s3.dtype}")print(f"形状: {s3.shape}")print(f"大小: {s3.size}")# 1.3 Series 运算 s4 = pd.Series([1,2,3,4])print(f"\n=== Series 运算 ===")print(f"加法: {s4 +10}")print(f"乘法: {s4 *2}")print(f"统计: 均值={s4.mean()}, 总和={s4.sum()}, 标准差={s4.std()}")

DataFrame（二维数据）

创建方法	说明	示例
`pd.DataFrame()`	从字典创建	`pd.DataFrame({'A':[1,2], 'B':[3,4]})`
`pd.DataFrame()`	从列表创建	`pd.DataFrame([[1,2],[3,4]])`
`pd.DataFrame()`	从ndarray创建	`pd.DataFrame(np.array([[1,2],[3,4]]))`
`pd.read_csv()`	读取CSV	`pd.read_csv('data.csv')`
`pd.read_excel()`	读取Excel	`pd.read_excel('data.xlsx')`
属性
`.columns`	列名	`df.columns`
`.index`	索引	`df.index`
`.shape`	形状	`df.shape`
`.dtypes`	数据类型	`df.dtypes`
`.values`	值数组	`df.values`

# 2.1 DataFrame 创建print("\n=== DataFrame 创建示例 ===")# 从字典创建（最常用） df1 = pd.DataFrame({'图像ID':['img001','img002','img003','img004'],'宽度':[1920,1280,2560,800],'高度':[1080,720,1440,600],'通道数':[3,3,1,3],'标签':['cat','dog','cat','bird']})print(f"从字典创建:\n{df1}")# 从列表创建 data =[['img001',1920,1080,3,'cat'],['img002',1280,720,3,'dog'],['img003',2560,1440,1,'cat'],['img004',800,600,3,'bird']] df2 = pd.DataFrame(data, columns=['图像ID','宽度','高度','通道数','标签'])print(f"\n从列表创建:\n{df2}")# 从CSV文件读取# df_csv = pd.read_csv('image_data.csv')# print(f"从CSV读取:\n{df_csv.head()}")# 2.2 DataFrame 属性print(f"\n=== DataFrame 属性 ===")print(f"列名: {df1.columns}")print(f"索引: {df1.index}")print(f"形状: {df1.shape}")# (行数, 列数)print(f"数据类型:\n{df1.dtypes}")print(f"值数组形状: {df1.values.shape}")# 2.3 DataFrame 信息查看print(f"\n=== DataFrame 信息查看 ===")print("前3行:")print(df1.head(3))print("\n基本信息:")print(df1.info())print("\n统计描述:")print(df1.describe())print("\n唯一值:")print(df1['标签'].unique())

数据查看与选择

数据查看方法

方法	作用	示例
`.head()`	查看前n行	`df.head(5)`
`.tail()`	查看后n行	`df.tail(5)`
`.sample()`	随机抽样	`df.sample(5)`
`.info()`	数据信息	`df.info()`
`.describe()`	统计描述	`df.describe()`
`.shape`	数据形状	`df.shape`
`.columns`	列名	`df.columns`
`.dtypes`	数据类型	`df.dtypes`
`.value_counts()`	值计数	`df['col'].value_counts()`
`.unique()`	唯一值	`df['col'].unique()`
`.nunique()`	唯一值数量	`df['col'].nunique()`

# 创建示例数据 df = pd.DataFrame({'图像ID':[f'img{i:03d}'for i inrange(100)],'宽度': np.random.randint(800,2561,100),'高度': np.random.randint(600,1441,100),'通道数': np.random.choice([1,3],100, p=[0.2,0.8]),'类别': np.random.choice(['cat','dog','bird','car'],100),'置信度': np.random.uniform(0.7,1.0,100),'检测时间': pd.date_range('2024-01-01', periods=100, freq='H')})print("=== 数据查看方法 ===")# 基础查看print(f"1. 前5行:\n{df.head()}")print(f"\n2. 后3行:\n{df.tail(3)}")print(f"\n3. 随机5行:\n{df.sample(5)}")print(f"\n4. 数据形状: {df.shape}")print(f"\n5. 列名: {df.columns.tolist()}")# 统计信息print(f"\n6. 数值列统计:\n{df.describe()}")print(f"\n7. 类别分布:\n{df['类别'].value_counts()}")print(f"\n8. 唯一类别数: {df['类别'].nunique()}")# 数据类型print(f"\n9. 数据类型:\n{df.dtypes}")print(f"\n10. 内存使用:\n{df.info(memory_usage='deep')}")# 快速统计print(f"\n11. 总和统计:")print(f" 宽度总和: {df['宽度'].sum()}")print(f" 高度均值: {df['高度'].mean():.1f}")print(f" 置信度中位数: {df['置信度'].median():.3f}")print(f" 通道数众数: {df['通道数'].mode().values[0]}")

数据选择（索引/切片）

方法	语法	说明
列选择	`df['col']`	选择单列
列选择	`df[['col1', 'col2']]`	选择多列
行选择	`df.loc[index]`	按标签选择
行选择	`df.iloc[index]`	按位置选择
条件选择	`df[df['col'] > value]`	布尔索引
切片	`df[start:end]`	行切片
切片	`df.loc[start:end]`	标签切片
切片	`df.iloc[start:end]`	位置切片
`.isin()`	`df[df['col'].isin(list)]`	包含判断
`.query()`	`df.query('col > value')`	查询表达式

print("\n=== 数据选择方法 ===")# 列选择print("1. 选择单列:")print(df['宽度'].head())print("\n2. 选择多列:")print(df[['图像ID','宽度','高度','类别']].head())# 行选择print("\n3. 按位置选择行:")print(df.iloc[0])# 第一行print(df.iloc[[0,2,4]])# 多行print(df.iloc[10:15])# 切片print("\n4. 按标签选择行:")print(df.loc[0])# 索引为0的行print(df.loc[0:5])# 索引0到5的行# 条件选择print("\n5. 条件选择:")# 宽度大于2000的图像 wide_images = df[df['宽度']>2000]print(f"宽度>2000的图像: {len(wide_images)}张")# 灰度图像（通道数=1） gray_images = df[df['通道数']==1]print(f"灰度图像: {len(gray_images)}张")# 多条件组合 cat_dog = df[(df['类别']=='cat')|(df['类别']=='dog')]print(f"猫狗类别: {len(cat_dog)}张")# 高置信度且是猫的图像 high_conf_cat = df[(df['置信度']>0.9)&(df['类别']=='cat')]print(f"高置信度猫图像: {len(high_conf_cat)}张")# isin 方法print("\n6. isin 方法:") selected_categories = df[df['类别'].isin(['cat','dog'])]print(f"猫或狗: {len(selected_categories)}张")# query 方法print("\n7. query 方法:") result = df.query('宽度 > 2000 and 高度 > 1000')print(f"大尺寸图像: {len(result)}张")# 同时选择行和列print("\n8. 同时选择行和列:")# 选择前5行的特定列 subset = df.loc[0:4,['图像ID','宽度','高度','类别']]print(subset)# 使用iloc选择 subset2 = df.iloc[0:5,[0,1,2,4]]# 第0,1,2,4列print(f"\n使用iloc选择:\n{subset2}")

数据处理与清洗

数据清洗方法

方法	作用	示例
`.dropna()`	删除缺失值	`df.dropna()`
`.fillna()`	填充缺失值	`df.fillna(value)`
`.drop()`	删除行列	`df.drop(columns=['col'])`
`.rename()`	重命名	`df.rename(columns={'old':'new'})`
`.astype()`	类型转换	`df['col'].astype('int')`
`.replace()`	替换值	`df.replace({'old':'new'})`
`.duplicated()`	查找重复	`df.duplicated()`
`.drop_duplicates()`	删除重复	`df.drop_duplicates()`
`.isnull()`	检查空值	`df.isnull()`
`.notnull()`	检查非空	`df.notnull()`
`.clip()`	限幅	`df['col'].clip(lower, upper)`

print("=== 数据清洗方法 ===")# 创建包含问题数据的数据集 df_dirty = pd.DataFrame({'图像ID':['img001','img002','img003','img004','img005'],'宽度':[1920,None,2560,800,1920],'高度':[1080,720,None,600,1080],'通道数':[3,3,1,3,3],'类别':['cat','dog','cat','bird','cat'],'置信度':[0.95,0.87,1.2,0.65,0.95],# 1.2超出范围'文件大小':[2048,1024,4096,512,2048]})print("原始数据:")print(df_dirty)print(f"\n空值统计:\n{df_dirty.isnull().sum()}")# 1. 处理缺失值print("\n1. 处理缺失值:")# 删除含有缺失值的行 df_clean1 = df_dirty.dropna()print(f"删除缺失值后: {len(df_clean1)}行")# 填充缺失值 df_filled = df_dirty.fillna({'宽度': df_dirty['宽度'].mean(),'高度': df_dirty['高度'].median()})print(f"填充后:\n{df_filled}")# 2. 处理异常值print("\n2. 处理异常值:")# 置信度限制在0-1之间 df_dirty['置信度']= df_dirty['置信度'].clip(0,1)print(f"限幅后置信度: {df_dirty['置信度'].tolist()}")# 3. 删除列print("\n3. 删除列:") df_no_size = df_dirty.drop(columns=['文件大小'])print(f"删除文件大小列:\n{df_no_size}")# 4. 重命名列print("\n4. 重命名列:") df_renamed = df_dirty.rename(columns={'宽度':'image_width','高度':'image_height','通道数':'channels'})print(f"重命名后列名: {df_renamed.columns.tolist()}")# 5. 类型转换print("\n5. 类型转换:") df_dirty['宽度']= df_dirty['宽度'].astype('float64')print(f"宽度数据类型: {df_dirty['宽度'].dtype}")# 6. 替换值print("\n6. 替换值:") df_replaced = df_dirty.replace({'类别':{'cat':'猫','dog':'狗'}})print(f"类别替换后:\n{df_replaced['类别']}")# 7. 删除重复行print("\n7. 删除重复行:") df_dirty.loc[5]=['img001',1920,1080,3,'cat',0.95,2048]# 添加重复行 df_no_dup = df_dirty.drop_duplicates()print(f"删除重复后: {len(df_no_dup)}行")

数据变换方法

方法	作用	示例
`.apply()`	应用函数	`df['col'].apply(func)`
`.map()`	映射替换	`df['col'].map(mapping)`
`.applymap()`	元素级应用	`df.applymap(func)`
`.groupby()`	分组	`df.groupby('col')`
`.pivot_table()`	数据透视	`pd.pivot_table(df, ...)`
`.melt()`	宽转长	`pd.melt(df, ...)`
`.pivot()`	长转宽	`df.pivot(...)`
`.cut()`	数据分箱	`pd.cut(df['col'], bins)`
`.qcut()`	等频分箱	`pd.qcut(df['col'], q)`

print("\n=== 数据变换方法 ===")# 1. apply 方法print("1. apply 方法:")# 计算图像面积 df['面积']= df.apply(lambda row: row['宽度']* row['高度'], axis=1)print(f"添加面积列:\n{df[['图像ID','宽度','高度','面积']].head()}")# 分类图像尺寸defclassify_size(width, height):if width >2000or height >1500:return'large'elif width >1000or height >700:return'medium'else:return'small' df['尺寸分类']= df.apply(lambda row: classify_size(row['宽度'], row['高度']), axis=1)print(f"\n尺寸分类分布:\n{df['尺寸分类'].value_counts()}")# 2. map 方法print("\n2. map 方法:")# 类别编码 category_map ={'cat':0,'dog':1,'bird':2,'car':3} df['类别编码']= df['类别'].map(category_map)print(f"类别编码:\n{df[['类别','类别编码']].head()}")# 3. 数据分箱print("\n3. 数据分箱:")# 将宽度分为3个区间 df['宽度分箱']= pd.cut(df['宽度'], bins=3, labels=['小','中','大'])print(f"宽度分箱:\n{df[['宽度','宽度分箱']].head()}")# 等频分箱（按百分位数） df['置信度分箱']= pd.qcut(df['置信度'], q=4, labels=['低','中低','中高','高'])print(f"\n置信度分箱分布:\n{df['置信度分箱'].value_counts()}")# 4. 分组操作print("\n4. 分组操作:") grouped = df.groupby('类别')print(f"按类别分组统计:")print(grouped['宽度'].agg(['mean','std','count']))# 多列分组统计 category_stats = df.groupby('类别').agg({'宽度':['mean','max','min'],'高度':['mean','max','min'],'置信度':'mean'})print(f"\n类别详细统计:\n{category_stats}")

数据合并与连接

数据合并方法

方法	作用	适用场景
`pd.concat()`	拼接	相同结构的多个DataFrame
`pd.merge()`	合并	基于键合并不同DataFrame
`.join()`	连接	基于索引合并
`.append()`	追加	添加行（已弃用，用concat）
`pd.concat(axis=1)`	横向拼接	增加列

print("=== 数据合并与连接 ===")# 创建示例数据 df_images = pd.DataFrame({'图像ID':[f'img{i:03d}'for i inrange(10)],'宽度': np.random.randint(800,2561,10),'高度': np.random.randint(600,1441,10),'类别': np.random.choice(['cat','dog','bird'],10)}) df_metadata = pd.DataFrame({'图像ID':[f'img{i:03d}'for i inrange(5,15)],'拍摄时间': pd.date_range('2024-01-01', periods=10, freq='D'),'摄影师': np.random.choice(['Alice','Bob','Charlie'],10)}) df_labels = pd.DataFrame({'图像ID':[f'img{i:03d}'for i inrange(0,10,2)],'标注质量': np.random.choice(['高','中','低'],5),'标注员': np.random.choice(['张三','李四'],5)})print("1. 图像数据:")print(df_images)print("\n2. 元数据:")print(df_metadata)print("\n3. 标注数据:")print(df_labels)# 1. concat 纵向拼接print("\n1. concat 纵向拼接:")# 假设有更多图像数据 df_more_images = pd.DataFrame({'图像ID':[f'img{i:03d}'for i inrange(10,15)],'宽度': np.random.randint(800,2561,5),'高度': np.random.randint(600,1441,5),'类别': np.random.choice(['cat','dog','bird'],5)}) df_all_images = pd.concat([df_images, df_more_images], ignore_index=True)print(f"合并后总图像数: {len(df_all_images)}")# 2. merge 合并print("\n2. merge 合并:")# 内连接（默认） df_inner = pd.merge(df_images, df_metadata, on='图像ID', how='inner')print(f"内连接结果（5行）:\n{df_inner.head()}")# 左连接 df_left = pd.merge(df_images, df_metadata, on='图像ID', how='left')print(f"\n左连接结果（10行）:\n{df_left.head()}")# 右连接 df_right = pd.merge(df_images, df_metadata, on='图像ID', how='right')print(f"\n右连接结果（10行）:\n{df_right.head()}")# 外连接 df_outer = pd.merge(df_images, df_metadata, on='图像ID', how='outer')print(f"\n外连接结果（15行）:\n{df_outer.head()}")# 3. 多表合并print("\n3. 多表合并:") df_combined = pd.merge( pd.merge(df_images, df_metadata, on='图像ID', how='left'), df_labels, on='图像ID', how='left')print(f"三表合并结果:\n{df_combined}")# 4. join 连接（基于索引）print("\n4. join 连接:")# 设置索引 df_images_idx = df_images.set_index('图像ID') df_labels_idx = df_labels.set_index('图像ID') df_joined = df_images_idx.join(df_labels_idx, how='left')print(f"基于索引连接:\n{df_joined.head()}")# 5. 横向拼接（增加列）print("\n5. 横向拼接:")# 创建额外特征 df_features = pd.DataFrame({'特征1': np.random.randn(10),'特征2': np.random.randn(10),'特征3': np.random.randn(10)}) df_with_features = pd.concat([df_images.reset_index(drop=True), df_features], axis=1)print(f"增加特征列:\n{df_with_features.head()}")

分组与聚合操作

分组聚合方法

方法	作用	示例
`.groupby()`	分组	`df.groupby('col')`
`.agg()`	聚合	`df.groupby('col').agg(['mean', 'sum'])`
`.transform()`	分组转换	`df.groupby('col').transform('mean')`
`.filter()`	分组过滤	`df.groupby('col').filter(func)`
`.apply()`	分组应用	`df.groupby('col').apply(func)`
`.pivot_table()`	数据透视	`pd.pivot_table(df, ...)`
`.crosstab()`	交叉表	`pd.crosstab(df['col1'], df['col2'])`

print("=== 分组与聚合操作 ===")# 创建示例数据 df_detections = pd.DataFrame({'图像ID':[f'img{i:03d}'for i inrange(20)],'检测类别': np.random.choice(['person','car','dog','cat','bicycle'],20),'置信度': np.random.uniform(0.5,1.0,20),'检测框面积': np.random.randint(100,10000,20),'检测时间': np.random.uniform(0.1,2.0,20)# 秒})print("检测数据:")print(df_detections.head())# 1. 基础分组统计print("\n1. 基础分组统计:")# 按类别分组 grouped = df_detections.groupby('检测类别')print("每个类别的检测数量:")print(grouped.size())print("\n每个类别的平均置信度:")print(grouped['置信度'].mean())print("\n每个类别的统计汇总:")print(grouped.agg({'置信度':['mean','std','min','max'],'检测框面积':['mean','sum'],'检测时间':'mean'}))# 2. 多列分组print("\n2. 多列分组:")# 假设我们还有图像来源信息 df_detections['来源']= np.random.choice(['camera1','camera2','camera3'],20)# 按来源和类别双重分组 multi_grouped = df_detections.groupby(['来源','检测类别'])print("多级分组统计:")print(multi_grouped['置信度'].mean())# 3. 分组转换（添加分组统计列）print("\n3. 分组转换:")# 添加每个类别的平均置信度作为新列 df_detections['类别平均置信度']= df_detections.groupby('检测类别')['置信度'].transform('mean')print(df_detections[['图像ID','检测类别','置信度','类别平均置信度']].head())# 4. 分组过滤print("\n4. 分组过滤:")# 只保留检测数量大于3的类别 filtered = df_detections.groupby('检测类别').filter(lambda x:len(x)>3)print(f"过滤后类别: {filtered['检测类别'].unique()}")# 5. 自定义聚合函数print("\n5. 自定义聚合函数:")defconfidence_range(series):return series.max()- series.min()deflarge_detections(series):return(series >5000).sum() aggregations = df_detections.groupby('检测类别').agg({'置信度':['mean', confidence_range],'检测框面积':['mean', large_detections],'检测时间':lambda x: x.quantile(0.9)# 90%分位数})print("自定义聚合结果:")print(aggregations)# 6. 数据透视表print("\n6. 数据透视表:") pivot = pd.pivot_table( df_detections, values=['置信度','检测框面积'], index='检测类别', columns='来源', aggfunc=['mean','count'], fill_value=0)print("数据透视表:")print(pivot)# 7. 交叉表print("\n7. 交叉表:") cross = pd.crosstab( df_detections['检测类别'], df_detections['来源'], values=df_detections['置信度'], aggfunc='mean')print("交叉表（平均置信度）:")print(cross)

时间序列处理

时间序列方法

方法	作用	示例
`pd.to_datetime()`	转换为时间戳	`pd.to_datetime(df['col'])`
`.dt`访问器	时间属性	`df['date'].dt.year`
`.resample()`	重采样	`df.resample('D').mean()`
`.shift()`	偏移	`df['col'].shift(1)`
`.rolling()`	滚动窗口	`df['col'].rolling(window=7).mean()`
`.expanding()`	扩展窗口	`df['col'].expanding().mean()`
`.pct_change()`	百分比变化	`df['col'].pct_change()`
`.diff()`	差分	`df['col'].diff()`

print("=== 时间序列处理 ===")# 创建时间序列数据 np.random.seed(42) date_range = pd.date_range('2024-01-01', periods=100, freq='H') df_time = pd.DataFrame({'时间戳': date_range,'检测数量': np.random.poisson(5,100),# 泊松分布'平均置信度': np.random.uniform(0.7,0.95,100),'处理时间': np.random.exponential(0.5,100)# 指数分布})# 设置时间索引 df_time.set_index('时间戳', inplace=True)print("时间序列数据:")print(df_time.head())# 1. 时间属性提取print("\n1. 时间属性提取:") df_time['小时']= df_time.index.hour df_time['星期几']= df_time.index.day_name() df_time['是否工作日']= df_time.index.dayofweek <5print("添加时间属性后:")print(df_time[['检测数量','小时','星期几','是否工作日']].head())# 2. 重采样（降采样）print("\n2. 重采样 - 按天聚合:") daily = df_time.resample('D').agg({'检测数量':'sum','平均置信度':'mean','处理时间':'mean'})print("按天重采样:")print(daily.head())# 3. 重采样（升采样）print("\n3. 重采样 - 按10分钟插值:")# 创建更稀疏的数据用于演示 df_sparse = df_time.iloc[::6,:]# 每6小时一个数据点 ten_min = df_sparse.resample('10T').asfreq().interpolate()print("按10分钟插值:")print(ten_min.head())# 4. 滚动窗口计算print("\n4. 滚动窗口计算:")# 计算6小时滚动平均 df_time['6小时平均检测']= df_time['检测数量'].rolling(window=6, center=True).mean()# 计算24小时滚动标准差 df_time['24小时检测波动']= df_time['检测数量'].rolling(window=24).std()print("滚动窗口统计:")print(df_time[['检测数量','6小时平均检测','24小时检测波动']].head(10))# 5. 扩展窗口计算print("\n5. 扩展窗口计算:")# 计算累计平均 df_time['累计平均置信度']= df_time['平均置信度'].expanding().mean()print("扩展窗口统计:")print(df_time[['平均置信度','累计平均置信度']].head())# 6. 时间偏移print("\n6. 时间偏移:")# 计算检测数量的日环比 df_time['检测数量_昨日']= df_time['检测数量'].shift(24)# 24小时前 df_time['检测数量变化']= df_time['检测数量']- df_time['检测数量_昨日']print("时间偏移计算:")print(df_time[['检测数量','检测数量_昨日','检测数量变化']].head(25))# 7. 百分比变化和差分print("\n7. 百分比变化和差分:") df_time['检测数量_pct_change']= df_time['检测数量'].pct_change() df_time['检测数量_diff']= df_time['检测数量'].diff()print("变化率计算:")print(df_time[['检测数量','检测数量_pct_change','检测数量_diff']].head())# 8. 时间段选择print("\n8. 时间段选择:")# 选择工作时间（9-17点） work_hours = df_time.between_time('09:00','17:00')print(f"工作时间数据量: {len(work_hours)}")# 选择特定日期 jan_first = df_time['2024-01-01']print(f"1月1日数据量: {len(jan_first)}")# 选择日期范围 jan_data = df_time['2024-01']print(f"1月份数据量: {len(jan_data)}")

性能优化技巧

性能优化方法

技巧	作用	说明
向量化操作	避免循环	使用NumPy/Pandas内置函数
`.loc[]` vs `[]`	正确索引	使用`.loc[]`进行标签索引
避免链式赋值	防止警告	直接赋值而不是链式
使用合适的数据类型	减少内存	如`int8`代替`int64`
分块处理	处理大数据	`chunksize`参数
使用`.query()`	快速查询	语法简洁性能好
`.at[]`/`.iat[]`	快速访问	访问单个元素
`.eval()`	表达式求值	加速复杂运算
内存映射文件	处理大文件	`mmap_mode`参数

print("=== 性能优化技巧 ===")# 1. 创建大数据集用于测试print("1. 性能测试数据集:") np.random.seed(42) n_rows =100000 df_large = pd.DataFrame({'图像ID':[f'img_{i:06d}'for i inrange(n_rows)],'宽度': np.random.randint(800,2561, n_rows),'高度': np.random.randint(600,1441, n_rows),'类别': np.random.choice(['cat','dog','bird','car','person'], n_rows),'置信度': np.random.uniform(0.5,1.0, n_rows),'检测时间': np.random.uniform(0.01,2.0, n_rows)})print(f"数据集大小: {n_rows} 行 × {df_large.shape[1]} 列")print(f"内存使用: {df_large.memory_usage(deep=True).sum()/1024**2:.2f} MB")# 2. 优化数据类型print("\n2. 优化数据类型:")print("优化前数据类型:")print(df_large.dtypes)# 优化数值类型 df_optimized = df_large.copy() df_optimized['宽度']= df_optimized['宽度'].astype('int16')# 800-2560适合int16 df_optimized['高度']= df_optimized['高度'].astype('int16') df_optimized['置信度']= df_optimized['置信度'].astype('float32') df_optimized['检测时间']= df_optimized['检测时间'].astype('float32')print("\n优化后数据类型:")print(df_optimized.dtypes)print(f"内存减少: {(df_large.memory_usage().sum()- df_optimized.memory_usage().sum())/1024**2:.2f} MB")# 3. 向量化操作 vs 循环print("\n3. 向量化 vs 循环:")import time # 方法1: 使用循环（慢） start = time.time() areas_loop =[]for i inrange(len(df_large)): areas_loop.append(df_large.iloc[i]['宽度']* df_large.iloc[i]['高度']) loop_time = time.time()- start # 方法2: 向量化操作（快） start = time.time() areas_vectorized = df_large['宽度']* df_large['高度'] vector_time = time.time()- start print(f"循环时间: {loop_time:.4f} 秒")print(f"向量化时间: {vector_time:.4f} 秒")print(f"加速比: {loop_time/vector_time:.1f}倍")# 4. 使用.query()优化查询print("\n4. .query() 优化:") start = time.time() result1 = df_large[(df_large['宽度']>2000)&(df_large['置信度']>0.8)] time1 = time.time()- start start = time.time() result2 = df_large.query('宽度 > 2000 and 置信度 > 0.8') time2 = time.time()- start print(f"传统方法: {time1:.4f} 秒")print(f".query()方法: {time2:.4f} 秒")print(f"性能提升: {time1/time2:.1f}倍")# 5. 使用.eval()优化复杂运算print("\n5. .eval() 优化:")# 复杂计算：计算标准化面积 start = time.time() df_large['标准化面积']=(df_large['宽度']* df_large['高度']- df_large['宽度'].mean()* df_large['高度'].mean())/ \ (df_large['宽度'].std()* df_large['高度'].std()) time1 = time.time()- start start = time.time() df_large['标准化面积_eval']= pd.eval('(宽度 * 高度 - 宽度.mean() * 高度.mean()) / (宽度.std() * 高度.std())') time2 = time.time()- start print(f"传统计算: {time1:.4f} 秒")print(f".eval()计算: {time2:.4f} 秒")# 6. 分块处理大数据print("\n6. 分块处理:")# 模拟处理非常大的CSV文件 chunk_size =10000 results =[]for chunk in pd.read_csv('large_data.csv', chunksize=chunk_size):# 处理每个分块 chunk_processed = chunk[chunk['置信度']>0.7] results.append(chunk_processed)# 合并结果 final_df = pd.concat(results, ignore_index=True)print(f"分块处理完成，总行数: {len(final_df)}")# 7. 内存映射文件print("\n7. 内存映射文件:")# 对于非常大的数组，可以使用numpy内存映射 mmap_array = np.memmap('large_array.dat', dtype='float32', mode='r', shape=(1000000,100))print(f"内存映射数组形状: {mmap_array.shape}")

CV工程师实用案例

图像数据集管理

print("=== CV工程师实用案例 ===")# 案例1：图像数据集管理print("案例1：图像数据集管理")# 模拟图像数据集 image_data ={'image_id':[f'img_{i:04d}'for i inrange(1000)],'width': np.random.randint(800,2561,1000),'height': np.random.randint(600,1441,1000),'channels': np.random.choice([1,3],1000, p=[0.1,0.9]),'format': np.random.choice(['JPEG','PNG','BMP'],1000),'file_size': np.random.randint(1024,1024*1024,1000),# 1KB-1MB'category': np.random.choice(['person','car','cat','dog','bird','other'],1000),'split': np.random.choice(['train','val','test'],1000, p=[0.7,0.15,0.15])} df_images = pd.DataFrame(image_data)print(f"图像数据集: {df_images.shape[0]} 张图像")print(f"类别分布:\n{df_images['category'].value_counts()}")print(f"数据集划分:\n{df_images['split'].value_counts()}")# 添加衍生特征 df_images['aspect_ratio']= df_images['width']/ df_images['height'] df_images['pixel_count']= df_images['width']* df_images['height'] df_images['size_category']= pd.cut( df_images['file_size'], bins=[0,1024*100,1024*500,float('inf')], labels=['small','medium','large'])# 数据集统计print("\n数据集统计:") stats = df_images.groupby('split').agg({'width':'mean','height':'mean','pixel_count':'mean','file_size':'mean','category':lambda x: x.nunique()}).round(2) stats.columns =['平均宽度','平均高度','平均像素数','平均文件大小','类别数']print(stats)# 案例2：模型训练结果分析print("\n案例2：模型训练结果分析")# 模拟训练日志 epochs =50 train_log = pd.DataFrame({'epoch':range(1, epochs+1),'train_loss': np.exp(-np.linspace(0,5, epochs))+ np.random.normal(0,0.02, epochs),'val_loss': np.exp(-np.linspace(0,4.5, epochs))+ np.random.normal(0,0.03, epochs),'train_acc':1- np.exp(-np.linspace(0,4, epochs))+ np.random.normal(0,0.01, epochs),'val_acc':1- np.exp(-np.linspace(0,3.5, epochs))+ np.random.normal(0,0.015, epochs),'learning_rate': np.logspace(-3,-5, epochs)})print("训练日志:")print(train_log.head())# 分析最佳epoch best_epoch = train_log.loc[train_log['val_acc'].idxmax()]print(f"\n最佳epoch: {int(best_epoch['epoch'])}")print(f"最佳验证准确率: {best_epoch['val_acc']:.3%}")print(f"对应训练准确率: {best_epoch['train_acc']:.3%}")# 计算收敛速度 convergence_epoch = train_log[train_log['val_loss']<0.1].iloc[0]['epoch']print(f"收敛到val_loss<0.1的epoch: {int(convergence_epoch)}")# 案例3：检测结果分析print("\n案例3：检测结果分析")# 模拟检测结果 n_detections =500 detection_results = pd.DataFrame({'image_id': np.random.choice(df_images['image_id'], n_detections),'class': np.random.choice(['person','car','cat','dog'], n_detections),'confidence': np.random.beta(5,2, n_detections),# 偏向高置信度的分布'bbox_x': np.random.uniform(0,1, n_detections),'bbox_y': np.random.uniform(0,1, n_detections),'bbox_w': np.random.uniform(0.1,0.5, n_detections),'bbox_h': np.random.uniform(0.1,0.5, n_detections)})# 添加检测质量评估 detection_results['bbox_area']= detection_results['bbox_w']* detection_results['bbox_h'] detection_results['detection_quality']= pd.cut( detection_results['confidence'], bins=[0,0.5,0.7,0.9,1.0], labels=['poor','fair','good','excellent'])print("检测结果分析:")print(f"总检测数: {len(detection_results)}")# 按类别分析 class_stats = detection_results.groupby('class').agg({'confidence':['mean','std'],'bbox_area':'mean','image_id':'nunique'}) class_stats.columns =['平均置信度','置信度标准差','平均边界框面积','涉及图像数']print(f"\n按类别统计:\n{class_stats}")# 计算mAP（简化版）defcalculate_map(results, iou_threshold=0.5):# 简化的mAP计算 results_sorted = results.sort_values('confidence', ascending=False)# 模拟TP/FP（这里简化处理） results_sorted['is_tp']= results_sorted['confidence']>0.5# 计算precision-recall results_sorted['cumulative_tp']= results_sorted['is_tp'].cumsum() results_sorted['cumulative_fp']=(~results_sorted['is_tp']).cumsum() results_sorted['precision']= results_sorted['cumulative_tp']/ \ (results_sorted['cumulative_tp']+ results_sorted['cumulative_fp']) results_sorted['recall']= results_sorted['cumulative_tp']/ results_sorted['is_tp'].sum()# 计算AP（平均精度） ap = results_sorted['precision'].mean()return ap # 按类别计算AP ap_scores ={}for cls in detection_results['class'].unique(): cls_results = detection_results[detection_results['class']== cls] ap_scores[cls]= calculate_map(cls_results)print(f"\n各类别AP分数:")for cls, ap in ap_scores.items():print(f" {cls}: {ap:.3f}")print(f"mAP: {np.mean(list(ap_scores.values())):.3f}")

Matplotlib

架构

Figure（图像）：顶级容器，所有绘图元素的容器
Axes（坐标系）：带有坐标系的绘图区域，一个Figure可以包含多个Axes
Axis（坐标轴）：坐标系中的轴，包含刻度、标签等
Artist（艺术家）：所有可见元素的基类（文本、线条、图像等）

绘图风格

MATLAB风格：函数式接口，使用plt.plot()等函数
面向对象风格：显式创建Figure和Axes对象，更灵活可控

基础绘图

创建图形

函数/方法	作用	示例
`plt.figure()`	创建图形	`plt.figure(figsize=(8,6))`
`plt.subplots()`	创建图形和子图	`fig, ax = plt.subplots()`
`plt.subplot()`	创建子图	`plt.subplot(2,2,1)`
`plt.gcf()`	获取当前图形	`fig = plt.gcf()`
`plt.gca()`	获取当前坐标系	`ax = plt.gca()`
`plt.clf()`	清除当前图形	`plt.clf()`
`plt.cla()`	清除当前坐标系	`plt.cla()`
`plt.close()`	关闭图形	`plt.close('all')`

import matplotlib.pyplot as plt import numpy as np # 2.1 创建图形print("=== 创建图形示例 ===")# 方法1: MATLAB风格 plt.figure(figsize=(10,6), dpi=100, facecolor='white') plt.plot([1,2,3,4],[1,4,9,16]) plt.title('MATLAB Style Plot') plt.xlabel('X轴') plt.ylabel('Y轴') plt.grid(True) plt.show()# 方法2: 面向对象风格 (推荐) fig, ax = plt.subplots(figsize=(10,6)) ax.plot([1,2,3,4],[1,4,9,16]) ax.set_title('Object-Oriented Style Plot') ax.set_xlabel('X轴') ax.set_ylabel('Y轴') ax.grid(True) plt.show()# 2.2 创建多个子图print("=== 创建子图示例 ===")# 方法1: 使用subplots创建网格 fig, axes = plt.subplots(2,2, figsize=(12,8)) fig.suptitle('2x2子图示例')# 在第一个子图绘图 axes[0,0].plot([1,2,3,4],[1,4,9,16],'ro-') axes[0,0].set_title('子图 1')# 在第二个子图绘图 axes[0,1].plot([1,2,3,4],[1,2,3,4],'bs--') axes[0,1].set_title('子图 2')# 在第三个子图绘图 x = np.linspace(0,2*np.pi,100) axes[1,0].plot(x, np.sin(x),'g-') axes[1,0].set_title('正弦曲线')# 在第四个子图绘图 axes[1,1].bar(['A','B','C','D'],[3,7,2,5], color='orange') axes[1,1].set_title('柱状图') plt.tight_layout() plt.show()# 方法2: 使用subplot创建不规则子图 fig = plt.figure(figsize=(10,6))# 创建2x2网格中的第一个子图，占据两列 ax1 = plt.subplot(2,2,1) ax1.plot([1,2,3,4],[1,4,9,16]) ax1.set_title('子图1 (2列)')# 创建2x2网格中的第二个子图，占据第一行的第二列 ax2 = plt.subplot(2,2,2) ax2.plot([1,2,3,4],[1,2,3,4]) ax2.set_title('子图2')# 创建2x2网格中的第三、四子图，合并为第二行的两个单元格 ax3 = plt.subplot(2,2,(3,4)) ax3.plot([1,2,3,4],[1,8,27,64]) ax3.set_title('子图3 (跨两列)') plt.tight_layout() plt.show()

基本图表类型

线图

函数	作用	示例
`plt.plot()`	绘制线图	`plt.plot(x, y, 'r--', linewidth=2)`
`plt.scatter()`	绘制散点图	`plt.scatter(x, y, s=50, c='blue')`
`plt.errorbar()`	绘制误差线	`plt.errorbar(x, y, yerr=error)`
`plt.fill_between()`	填充区域	`plt.fill_between(x, y1, y2)`
`plt.stem()`	绘制火柴杆图	`plt.stem(x, y)`
`plt.step()`	绘制阶梯图	`plt.step(x, y)`
`plt.bar()`	绘制柱状图	`plt.bar(x, height, width=0.8)`
`plt.barh()`	绘制水平柱状图	`plt.barh(y, width, height=0.8)`
`plt.pie()`	绘制饼图	`plt.pie(sizes, labels=labels)`
`plt.hist()`	绘制直方图	`plt.hist(data, bins=20)`
`plt.boxplot()`	绘制箱线图	`plt.boxplot(data)`
`plt.violinplot()`	绘制小提琴图	`plt.violinplot(data)`
`plt.imshow()`	显示图像	`plt.imshow(img, cmap='gray')`

python

# 3.1 线图print("=== 线图示例 ===") fig, axes = plt.subplots(2,3, figsize=(15,10)) fig.suptitle('基本图表类型')# 1. 基础线图 x = np.linspace(0,10,100) axes[0,0].plot(x, np.sin(x), label='sin(x)', color='red', linewidth=2, linestyle='-') axes[0,0].plot(x, np.cos(x), label='cos(x)', color='blue', linewidth=2, linestyle='--') axes[0,0].set_title('线图') axes[0,0].legend() axes[0,0].grid(True)# 2. 散点图 np.random.seed(42) x_scatter = np.random.randn(50) y_scatter = np.random.randn(50) colors = np.random.rand(50) sizes =1000* np.random.rand(50) axes[0,1].scatter(x_scatter, y_scatter, c=colors, s=sizes, alpha=0.6, cmap='viridis') axes[0,1].set_title('散点图') axes[0,1].set_xlabel('X') axes[0,1].set_ylabel('Y')# 3. 柱状图 categories =['A','B','C','D','E'] values =[25,40,30,35,20] colors_bar =['red','blue','green','orange','purple'] axes[0,2].bar(categories, values, color=colors_bar, edgecolor='black', linewidth=2) axes[0,2].set_title('柱状图') axes[0,2].set_xlabel('类别') axes[0,2].set_ylabel('值')# 4. 直方图 data_hist = np.random.randn(1000) axes[1,0].hist(data_hist, bins=30, color='skyblue', edgecolor='black', alpha=0.7) axes[1,0].set_title('直方图') axes[1,0].set_xlabel('值') axes[1,0].set_ylabel('频数')# 5. 饼图 sizes_pie =[15,30,45,10] labels_pie =['A类','B类','C类','D类'] explode =(0,0.1,0,0)# 突出第二块 axes[1,1].pie(sizes_pie, explode=explode, labels=labels_pie, autopct='%1.1f%%', shadow=True, startangle=90) axes[1,1].set_title('饼图')# 6. 箱线图 data_box =[np.random.normal(0, std,100)for std inrange(1,5)] axes[1,2].boxplot(data_box, labels=['组1','组2','组3','组4']) axes[1,2].set_title('箱线图') axes[1,2].set_ylabel('值') plt.tight_layout() plt.show()# 3.2 更多图表类型print("=== 更多图表类型 ===") fig, axes = plt.subplots(2,3, figsize=(15,10)) fig.suptitle('更多图表类型')# 1. 误差线图 x_error = np.arange(1,6) y_error = np.array([2,3.5,4,4.5,5]) yerr = np.array([0.2,0.3,0.4,0.5,0.6]) axes[0,0].errorbar(x_error, y_error, yerr=yerr, fmt='o', capsize=5, ecolor='red', marker='s', mfc='blue', mec='blue') axes[0,0].set_title('误差线图')# 2. 填充区域图 x_fill = np.linspace(0,10,100) y1_fill = np.sin(x_fill) y2_fill = np.cos(x_fill) axes[0,1].plot(x_fill, y1_fill,'b-', label='sin(x)') axes[0,1].plot(x_fill, y2_fill,'r-', label='cos(x)') axes[0,1].fill_between(x_fill, y1_fill, y2_fill, where=(y1_fill > y2_fill), color='blue', alpha=0.3, label='sin>cos') axes[0,1].fill_between(x_fill, y1_fill, y2_fill, where=(y1_fill <= y2_fill), color='red', alpha=0.3, label='sin≤cos') axes[0,1].set_title('填充区域图') axes[0,1].legend()# 3. 阶梯图 x_step = np.arange(1,11) y_step = np.random.randint(1,10,10) axes[0,2].step(x_step, y_step, where='mid', linewidth=2, marker='o') axes[0,2].set_title('阶梯图') axes[0,2].grid(True)# 4. 火柴杆图 x_stem = np.linspace(0.1,2*np.pi,20) y_stem = np.exp(-x_stem)* np.cos(2*np.pi*x_stem) axes[1,0].stem(x_stem, y_stem, linefmt='C0-', markerfmt='C0o', basefmt='C3--') axes[1,0].set_title('火柴杆图')# 5. 水平柱状图 categories_hbar =['模型A','模型B','模型C','模型D','模型E'] accuracy =[0.85,0.92,0.78,0.95,0.88] axes[1,1].barh(categories_hbar, accuracy, color='lightgreen', edgecolor='darkgreen') axes[1,1].set_title('模型准确率') axes[1,1].set_xlabel('准确率') axes[1,1].set_xlim(0,1)# 6. 小提琴图 data_violin =[np.random.normal(0, std,100)for std inrange(1,4)] axes[1,2].violinplot(data_violin, showmeans=True, showmedians=True) axes[1,2].set_title('小提琴图') axes[1,2].set_xticks([1,2,3]) axes[1,2].set_xticklabels(['组1','组2','组3']) plt.tight_layout() plt.show()

图像显示与处理

图像显示

函数	作用	示例
`plt.imshow()`	显示图像	`plt.imshow(img, cmap='gray')`
`plt.colorbar()`	显示颜色条	`plt.colorbar()`
`plt.axis()`	坐标轴设置	`plt.axis('off')` 或 `plt.axis('equal')`
`plt.matshow()`	显示矩阵	`plt.matshow(matrix)`
`plt.contour()`	绘制等高线	`plt.contour(X, Y, Z)`
`plt.contourf()`	填充等高线	`plt.contourf(X, Y, Z)`
`plt.pcolor()`	伪彩色图	`plt.pcolor(X, Y, Z)`
`plt.pcolormesh()`	网格伪彩色图	`plt.pcolormesh(X, Y, Z)`
`plt.streamplot()`	流线图	`plt.streamplot(X, Y, U, V)`

# 4.1 图像显示print("=== 图像显示示例 ===")# 生成示例图像数据# 1. 随机图像 random_image = np.random.rand(100,100)# 2. 渐变图像 x = np.linspace(-2,2,100) y = np.linspace(-2,2,100) X, Y = np.meshgrid(x, y) Z = np.exp(-(X**2+ Y**2))# 2D高斯函数# 3. 创建带有特征的图像 circle_image = np.zeros((100,100))for i inrange(100):for j inrange(100):if(i-50)**2+(j-50)**2<400:# 半径为20的圆 circle_image[i, j]=1.0# 4. 创建RGB图像 rgb_image = np.zeros((100,100,3))# 红色渐变 rgb_image[:,:,0]= np.linspace(0,1,100).reshape(1,-1)# 红色通道水平渐变# 绿色渐变 rgb_image[:,:,1]= np.linspace(0,1,100).reshape(-1,1)# 绿色通道垂直渐变# 蓝色常量 rgb_image[:,:,2]=0.5# 蓝色通道常量# 显示图像 fig, axes = plt.subplots(2,3, figsize=(15,10)) fig.suptitle('图像显示示例')# 1. 灰度图像 axes[0,0].imshow(random_image, cmap='gray') axes[0,0].set_title('随机灰度图像') axes[0,0].axis('off')# 2. 伪彩色图像 im = axes[0,1].imshow(Z, cmap='hot') axes[0,1].set_title('2D高斯函数 (hot colormap)') axes[0,1].axis('off') plt.colorbar(im, ax=axes[0,1])# 3. 二值图像 axes[0,2].imshow(circle_image, cmap='binary') axes[0,2].set_title('圆形二值图像') axes[0,2].axis('off')# 4. RGB彩色图像 axes[1,0].imshow(rgb_image) axes[1,0].set_title('RGB彩色图像') axes[1,0].axis('off')# 5. 使用不同colormap im2 = axes[1,1].imshow(Z, cmap='viridis') axes[1,1].set_title('viridis colormap') axes[1,1].axis('off') plt.colorbar(im2, ax=axes[1,1])# 6. 带插值的图像 axes[1,2].imshow(Z, cmap='coolwarm', interpolation='bilinear') axes[1,2].set_title('双线性插值') axes[1,2].axis('off') plt.tight_layout() plt.show()# 4.2 等高线和伪彩色图print("=== 等高线和伪彩色图 ===") fig, axes = plt.subplots(2,3, figsize=(15,10)) fig.suptitle('等高线和伪彩色图')# 生成示例数据 x_contour = np.linspace(-3,3,100) y_contour = np.linspace(-3,3,100) X_contour, Y_contour = np.meshgrid(x_contour, y_contour) Z_contour = np.sin(X_contour)* np.cos(Y_contour)+0.1* X_contour # 1. 等高线图 contour = axes[0,0].contour(X_contour, Y_contour, Z_contour,20, cmap='RdGy') axes[0,0].set_title('等高线图') axes[0,0].clabel(contour, inline=True, fontsize=8) axes[0,0].set_xlabel('X') axes[0,0].set_ylabel('Y')# 2. 填充等高线图 contourf = axes[0,1].contourf(X_contour, Y_contour, Z_contour,20, cmap='viridis') axes[0,1].set_title('填充等高线图') axes[0,1].set_xlabel('X') axes[0,1].set_ylabel('Y') plt.colorbar(contourf, ax=axes[0,1])# 3. 伪彩色图 pcolor = axes[0,2].pcolor(X_contour, Y_contour, Z_contour, cmap='coolwarm', shading='auto') axes[0,2].set_title('伪彩色图') axes[0,2].set_xlabel('X') axes[0,2].set_ylabel('Y') plt.colorbar(pcolor, ax=axes[0,2])# 4. 网格伪彩色图 pcolormesh = axes[1,0].pcolormesh(X_contour, Y_contour, Z_contour, cmap='Spectral') axes[1,0].set_title('网格伪彩色图') axes[1,0].set_xlabel('X') axes[1,0].set_ylabel('Y') plt.colorbar(pcolormesh, ax=axes[1,0])# 5. 等高线与伪彩色结合 contourf2 = axes[1,1].contourf(X_contour, Y_contour, Z_contour,20, cmap='bone', alpha=0.7) contour2 = axes[1,1].contour(X_contour, Y_contour, Z_contour,20, colors='black', linewidths=0.5) axes[1,1].set_title('等高线与伪彩色结合') axes[1,1].set_xlabel('X') axes[1,1].set_ylabel('Y') plt.colorbar(contourf2, ax=axes[1,1])# 6. 3D数据显示为图像# 生成梯度数据 gradient = np.gradient(Z_contour) magnitude = np.sqrt(gradient[0]**2+ gradient[1]**2) im_contour = axes[1,2].imshow(magnitude, cmap='jet', extent=[-3,3,-3,3]) axes[1,2].set_title('梯度幅度图像') axes[1,2].set_xlabel('X') axes[1,2].set_ylabel('Y') plt.colorbar(im_contour, ax=axes[1,2]) plt.tight_layout() plt.show()

3D绘图

3D图形

函数	作用	示例
`Axes3D.plot_surface()`	绘制3D曲面	`ax.plot_surface(X, Y, Z, cmap='viridis')`
`Axes3D.plot_wireframe()`	绘制3D线框	`ax.plot_wireframe(X, Y, Z, color='black')`
`Axes3D.scatter()`	绘制3D散点	`ax.scatter(x, y, z, c=z, cmap='viridis')`
`Axes3D.plot()`	绘制3D曲线	`ax.plot(x, y, z, 'r-', linewidth=2)`
`Axes3D.contour3D()`	绘制3D等高线	`ax.contour3D(X, Y, Z, 50, cmap='binary')`
`Axes3D.quiver()`	绘制3D箭头	`ax.quiver(x, y, z, u, v, w)`

# 5.1 3D绘图print("=== 3D绘图示例 ===")# 导入3D工具from mpl_toolkits.mplot3d import Axes3D # 生成3D数据 x_3d = np.linspace(-5,5,50) y_3d = np.linspace(-5,5,50) X_3d, Y_3d = np.meshgrid(x_3d, y_3d) Z_3d = np.sin(np.sqrt(X_3d**2+ Y_3d**2))# 生成3D散点数据 np.random.seed(42) n_points =200 x_scatter_3d = np.random.randn(n_points) y_scatter_3d = np.random.randn(n_points) z_scatter_3d = np.random.randn(n_points) colors_3d = np.random.rand(n_points) sizes_3d =100* np.random.rand(n_points)# 创建3D图形 fig = plt.figure(figsize=(16,10))# 1. 3D曲面图 ax1 = fig.add_subplot(231, projection='3d') surf = ax1.plot_surface(X_3d, Y_3d, Z_3d, cmap='viridis', alpha=0.8, edgecolor='none') ax1.set_title('3D曲面图') ax1.set_xlabel('X') ax1.set_ylabel('Y') ax1.set_zlabel('Z') fig.colorbar(surf, ax=ax1, shrink=0.5, aspect=10)# 2. 3D线框图 ax2 = fig.add_subplot(232, projection='3d') ax2.plot_wireframe(X_3d, Y_3d, Z_3d, color='blue', linewidth=0.5, alpha=0.7) ax2.set_title('3D线框图') ax2.set_xlabel('X') ax2.set_ylabel('Y') ax2.set_zlabel('Z')# 3. 3D散点图 ax3 = fig.add_subplot(233, projection='3d') scatter_3d = ax3.scatter(x_scatter_3d, y_scatter_3d, z_scatter_3d, c=colors_3d, s=sizes_3d, alpha=0.6, cmap='plasma') ax3.set_title('3D散点图') ax3.set_xlabel('X') ax3.set_ylabel('Y') ax3.set_zlabel('Z') fig.colorbar(scatter_3d, ax=ax3, shrink=0.5, aspect=10)# 4. 3D柱状图 ax4 = fig.add_subplot(234, projection='3d') x_pos = np.arange(5) y_pos = np.arange(5) x_pos, y_pos = np.meshgrid(x_pos, y_pos) x_pos = x_pos.flatten() y_pos = y_pos.flatten() z_pos = np.zeros_like(x_pos) dx = dy =0.5* np.ones_like(z_pos) dz = np.random.rand(25) colors_bar3d = plt.cm.viridis(dz / dz.max()) ax4.bar3d(x_pos, y_pos, z_pos, dx, dy, dz, color=colors_bar3d, shade=True) ax4.set_title('3D柱状图') ax4.set_xlabel('X') ax4.set_ylabel('Y') ax4.set_zlabel('Z')# 5. 3D曲线图 ax5 = fig.add_subplot(235, projection='3d') theta = np.linspace(-4* np.pi,4* np.pi,100) z_curve = np.linspace(-2,2,100) r_curve = z_curve**2+1 x_curve = r_curve * np.sin(theta) y_curve = r_curve * np.cos(theta) ax5.plot(x_curve, y_curve, z_curve,'r-', linewidth=2) ax5.set_title('3D曲线图 (螺旋线)') ax5.set_xlabel('X') ax5.set_ylabel('Y') ax5.set_zlabel('Z')# 6. 3D等高线图 ax6 = fig.add_subplot(236, projection='3d') contour3d = ax6.contour3D(X_3d, Y_3d, Z_3d,50, cmap='binary') ax6.set_title('3D等高线图') ax6.set_xlabel('X') ax6.set_ylabel('Y') ax6.set_zlabel('Z') plt.tight_layout() plt.show()

高级定制

图形元素定制

元素	定制方法	示例
线条	`linestyle`, `linewidth`, `color`, `marker`	`'r--o', linewidth=2, markersize=8`
标记	`marker`, `markersize`, `markerfacecolor`	`marker='s', markersize=10, mfc='red'`
文本	`plt.text()`, `plt.annotate()`, `plt.title()`	`plt.text(x, y, 'text', fontsize=12)`
图例	`plt.legend()`, `loc`, `frameon`, `ncol`	`plt.legend(loc='upper right', ncol=2)`
网格	`plt.grid()`, `which`, `linestyle`, `alpha`	`plt.grid(True, linestyle='--', alpha=0.5)`
坐标轴	`plt.xlim()`, `plt.ylim()`, `plt.xticks()`	`plt.xlim(0, 10); plt.xticks(range(0, 11, 2))`
刻度	`plt.tick_params()`, `direction`, `labelsize`	`plt.tick_params(labelsize=10, direction='in')`
颜色条	`plt.colorbar()`, `orientation`, `shrink`	`plt.colorbar(orientation='horizontal')`
子图间距	`plt.subplots_adjust()`, `plt.tight_layout()`	`plt.subplots_adjust(wspace=0.3, hspace=0.3)`

# 6.1 图形元素定制print("=== 图形元素定制 ===")# 创建定制化的图形 fig, axes = plt.subplots(2,2, figsize=(14,10)) fig.suptitle('图形元素定制示例', fontsize=16, fontweight='bold')# 1. 线条样式定制 x = np.linspace(0,10,100) axes[0,0].plot(x, np.sin(x), color='red',# 颜色 linestyle='-',# 线型: '-', '--', '-.', ':' linewidth=2,# 线宽 marker='o',# 标记: 'o', 's', '^', 'D', '*' markersize=6,# 标记大小 markerfacecolor='blue',# 标记填充色 markeredgecolor='black',# 标记边缘色 markeredgewidth=1,# 标记边缘宽度 label='sin(x)') axes[0,0].plot(x, np.cos(x), color='green', linestyle='--', linewidth=2, marker='s', markersize=6, markerfacecolor='yellow', markeredgecolor='black', markeredgewidth=1, label='cos(x)') axes[0,0].set_title('线条样式定制') axes[0,0].legend(loc='upper right', frameon=True, fancybox=True, shadow=True, ncol=2) axes[0,0].grid(True, linestyle='--', alpha=0.5)# 2. 坐标轴定制 x_axis = np.linspace(0,2*np.pi,20) y_axis = np.sin(x_axis) axes[0,1].plot(x_axis, y_axis,'b-o', linewidth=2)# 设置坐标轴范围 axes[0,1].set_xlim(-0.5,7) axes[0,1].set_ylim(-1.2,1.2)# 设置刻度 axes[0,1].set_xticks([0, np.pi/2, np.pi,3*np.pi/2,2*np.pi]) axes[0,1].set_xticklabels(['0','π/2','π','3π/2','2π'], fontsize=10) axes[0,1].set_yticks([-1,-0.5,0,0.5,1]) axes[0,1].set_yticklabels(['-1','-0.5','0','0.5','1'], fontsize=10)# 设置刻度方向 axes[0,1].tick_params(axis='both', which='both', direction='in', length=6, width=2)# 设置坐标轴标签 axes[0,1].set_xlabel('角度 (弧度)', fontsize=12, fontweight='bold') axes[0,1].set_ylabel('正弦值', fontsize=12, fontweight='bold') axes[0,1].set_title('坐标轴定制') axes[0,1].grid(True, linestyle=':', alpha=0.7)# 3. 文本和标注 np.random.seed(42) x_text = np.arange(1,11) y_text = np.random.rand(10)*100 axes[1,0].plot(x_text, y_text,'g-D', linewidth=2, markersize=8)# 添加文本 axes[1,0].text(5,80,'峰值区域', fontsize=12, fontweight='bold', bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.5))# 添加箭头标注 axes[1,0].annotate('最大值', xy=(x_text[np.argmax(y_text)],max(y_text)), xytext=(x_text[np.argmax(y_text)]-2,max(y_text)-20), arrowprops=dict(arrowstyle='->', connectionstyle='arc3', color='red', lw=2), fontsize=12, fontweight='bold')# 添加箭头 axes[1,0].arrow(2,30,2,20, head_width=0.3, head_length=5, fc='blue', ec='blue') axes[1,0].set_title('文本和标注') axes[1,0].set_xlabel('X轴') axes[1,0].set_ylabel('Y轴') axes[1,0].grid(True, alpha=0.3)# 4. 图例和颜色条定制 x_multi = np.linspace(0,10,100)for i inrange(5): y_multi = np.sin(x_multi + i *0.5)*(i +1)*0.2 axes[1,1].plot(x_multi, y_multi, label=f'曲线 {i+1}', linewidth=2)# 定制图例 legend = axes[1,1].legend(loc='upper left', fontsize=10, title='图例标题', title_fontsize=12, frameon=True, fancybox=True, shadow=True, borderpad=1, labelspacing=0.5, handlelength=2, ncol=2)# 设置图例背景色 legend.get_frame().set_facecolor('lightgray') legend.get_frame().set_alpha(0.7) axes[1,1].set_title('图例和颜色条定制') axes[1,1].set_xlabel('X轴') axes[1,1].set_ylabel('Y轴') axes[1,1].grid(True, alpha=0.3)# 添加颜色条示例（使用伪数据） im = axes[1,1].imshow(np.random.rand(10,10), extent=[8,10,0,2], aspect='auto', alpha=0.5) cbar = plt.colorbar(im, ax=axes[1,1], orientation='vertical', pad=0.01, shrink=0.8) cbar.set_label('颜色标度', fontsize=10) plt.tight_layout() plt.show()

样式与主题

样式设置

方法	作用	示例
`plt.style.use()`	使用样式	`plt.style.use('ggplot')`
`plt.rcParams.update()`	更新配置	`plt.rcParams.update({'font.size': 12})`
`plt.rc()`	设置配置	`plt.rc('lines', linewidth=2)`
`plt.xkcd()`	XKCD漫画风格	`with plt.xkcd(): plt.plot(...)`
可用样式		`'default'`, `'ggplot'`, `'seaborn'`, `'fivethirtyeight'`, `'grayscale'`

# 7.1 样式与主题print("=== 样式与主题 ===")# 查看所有可用样式print("可用样式:", plt.style.available)# 演示不同样式 styles =['default','ggplot','seaborn','fivethirtyeight','grayscale','dark_background'] fig, axes = plt.subplots(2,3, figsize=(15,10)) fig.suptitle('不同样式比较', fontsize=16, fontweight='bold')for idx, style inenumerate(styles): row = idx //3 col = idx %3with plt.style.context(style): ax = axes[row, col]# 生成数据 x = np.linspace(0,10,100) y1 = np.sin(x) y2 = np.cos(x) y3 = np.exp(-x/5)* np.sin(x)# 绘制 ax.plot(x, y1, label='sin(x)', linewidth=2) ax.plot(x, y2, label='cos(x)', linewidth=2) ax.plot(x, y3, label='衰减正弦', linewidth=2)# 设置 ax.set_title(f'{style} 样式', fontsize=12) ax.set_xlabel('X轴') ax.set_ylabel('Y轴') ax.legend(loc='best', fontsize=8) ax.grid(True) plt.tight_layout() plt.show()# 7.2 自定义样式print("=== 自定义样式 ===")# 方法1: 使用rcParams全局设置 plt.rcParams.update({'font.size':12,# 字体大小'font.family':'sans-serif',# 字体家族'font.sans-serif':['Arial','DejaVu Sans'],# 无衬线字体'figure.figsize':(10,6),# 图形大小'figure.autolayout':True,# 自动调整布局'axes.titlesize':14,# 标题大小'axes.labelsize':12,# 坐标轴标签大小'axes.linewidth':1.5,# 坐标轴线宽'axes.grid':True,# 显示网格'grid.linestyle':'--',# 网格线型'grid.alpha':0.6,# 网格透明度'xtick.labelsize':10,# X轴刻度标签大小'ytick.labelsize':10,# Y轴刻度标签大小'legend.fontsize':10,# 图例字体大小'legend.frameon':True,# 图例边框'legend.shadow':True,# 图例阴影'lines.linewidth':2,# 线宽'lines.markersize':8,# 标记大小'savefig.dpi':300,# 保存图片DPI'savefig.bbox':'tight'# 保存时紧凑边界})# 应用自定义样式绘制图形 fig, axes = plt.subplots(2,2, figsize=(12,10))# 子图1: 曲线图 x = np.linspace(0,2*np.pi,100) axes[0,0].plot(x, np.sin(x),'r-', label='sin(x)') axes[0,0].plot(x, np.cos(x),'b--', label='cos(x)') axes[0,0].set_title('三角函数') axes[0,0].legend()# 子图2: 散点图 np.random.seed(42) x_scatter = np.random.randn(50) y_scatter = np.random.randn(50) colors = np.random.rand(50) sizes =100* np.random.rand(50) axes[0,1].scatter(x_scatter, y_scatter, c=colors, s=sizes, alpha=0.6, cmap='viridis') axes[0,1].set_title('散点图')# 子图3: 柱状图 categories =['A','B','C','D','E'] values =[25,40,30,35,20] axes[1,0].bar(categories, values, color=['red','blue','green','orange','purple']) axes[1,0].set_title('柱状图')# 子图4: 饼图 sizes_pie =[15,30,45,10] labels_pie =['A类','B类','C类','D类'] explode =(0,0.1,0,0) axes[1,1].pie(sizes_pie, explode=explode, labels=labels_pie, autopct='%1.1f%%', shadow=True, startangle=90) axes[1,1].set_title('饼图') plt.suptitle('自定义样式示例', fontsize=16, fontweight='bold') plt.tight_layout() plt.show()# 重置为默认样式 plt.rcdefaults()

保存与导出

保存图形

方法	作用	示例
`plt.savefig()`	保存图形	`plt.savefig('figure.png', dpi=300, bbox_inches='tight')`
`fig.savefig()`	保存图形对象	`fig.savefig('figure.pdf', format='pdf')`
参数
`dpi`	分辨率	`dpi=300` (每英寸点数)
`bbox_inches`	边界框	`bbox_inches='tight'` (紧凑边界)
`pad_inches`	内边距	`pad_inches=0.1`
`transparent`	透明背景	`transparent=True`
`format`	格式	`format='png'`, `'pdf'`, `'svg'`, `'jpg'`

# 8.1 保存图形print("=== 保存图形 ===")# 创建示例图形 fig,(ax1, ax2)= plt.subplots(1,2, figsize=(12,5))# 子图1: 曲线图 x = np.linspace(0,10,100) ax1.plot(x, np.sin(x),'r-', label='sin(x)', linewidth=2) ax1.plot(x, np.cos(x),'b--', label='cos(x)', linewidth=2) ax1.set_title('三角函数') ax1.set_xlabel('X') ax1.set_ylabel('Y') ax1.legend() ax1.grid(True)# 子图2: 散点图 np.random.seed(42) x_scatter = np.random.randn(100) y_scatter = np.random.randn(100) colors = np.random.rand(100) sizes =100* np.random.rand(100) scatter = ax2.scatter(x_scatter, y_scatter, c=colors, s=sizes, alpha=0.6, cmap='viridis') ax2.set_title('散点图') ax2.set_xlabel('X') ax2.set_ylabel('Y') plt.colorbar(scatter, ax=ax2) plt.suptitle('保存图形示例', fontsize=16) plt.tight_layout()# 保存为不同格式 save_dir ='./saved_figures/'import os os.makedirs(save_dir, exist_ok=True)# 1. 保存为PNG (默认格式，支持透明) plt.savefig(os.path.join(save_dir,'figure.png'), dpi=300, bbox_inches='tight', facecolor='white', edgecolor='black')print("已保存: figure.png")# 2. 保存为PDF (矢量图，无限缩放) plt.savefig(os.path.join(save_dir,'figure.pdf'),format='pdf', bbox_inches='tight')print("已保存: figure.pdf")# 3. 保存为SVG (矢量图，可编辑) plt.savefig(os.path.join(save_dir,'figure.svg'),format='svg', bbox_inches='tight')print("已保存: figure.svg")# 4. 保存为JPG (有损压缩，适合照片) plt.savefig(os.path.join(save_dir,'figure.jpg'),format='jpg', dpi=300, bbox_inches='tight', quality=95)# 质量 1-100print("已保存: figure.jpg")# 5. 保存为TIFF (高质量，适合出版) plt.savefig(os.path.join(save_dir,'figure.tiff'),format='tiff', dpi=300, bbox_inches='tight')print("已保存: figure.tiff")# 6. 保存为EPS (矢量图，适合LaTeX) plt.savefig(os.path.join(save_dir,'figure.eps'),format='eps', bbox_inches='tight')print("已保存: figure.eps")# 7. 透明背景保存 plt.savefig(os.path.join(save_dir,'figure_transparent.png'), dpi=300, bbox_inches='tight', transparent=True)# 透明背景print("已保存: figure_transparent.png (透明背景)") plt.show()# 8.2 批量保存子图print("\n=== 批量保存子图 ===")# 创建包含多个子图的图形 fig, axes = plt.subplots(2,2, figsize=(10,8)) fig.suptitle('批量保存示例')# 填充子图 plot_types =['曲线图','散点图','柱状图','饼图']for idx, ax inenumerate(axes.flat): ax.text(0.5,0.5, plot_types[idx], horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=14, fontweight='bold') ax.set_title(f'子图 {idx+1}') plt.tight_layout()# 保存整个图形 fig.savefig(os.path.join(save_dir,'multiplot.png'), dpi=300, bbox_inches='tight')# 分别保存每个子图for idx, ax inenumerate(axes.flat):# 创建一个新图形 fig_single = plt.figure(figsize=(5,4))# 将子图内容复制到新图形 ax_single = fig_single.add_subplot(111) ax_single.text(0.5,0.5, plot_types[idx], horizontalalignment='center', verticalalignment='center', transform=ax_single.transAxes, fontsize=14, fontweight='bold') ax_single.set_title(f'子图 {idx+1}') ax_single.set_xlim(0,1) ax_single.set_ylim(0,1) ax_single.set_xticks([]) ax_single.set_yticks([])# 保存单个子图 fig_single.savefig(os.path.join(save_dir,f'subplot_{idx+1}.png'), dpi=300, bbox_inches='tight') plt.close(fig_single)# 关闭图形以释放内存print(f"已保存: multiplot.png 和 4个子图")

CV工程师实用案例

图像处理可视化

# 9.1 图像处理可视化print("=== CV工程师实用案例: 图像处理可视化 ===")# 模拟图像处理流程 np.random.seed(42)# 创建测试图像 original_image = np.random.rand(100,100)# 添加一些特征# 1. 添加一个圆形for i inrange(100):for j inrange(100):if(i-30)**2+(j-70)**2<400:# 半径20的圆 original_image[i, j]=0.8# 2. 添加一个矩形 original_image[60:80,20:40]=0.3# 3. 添加一些噪声 noise = np.random.normal(0,0.1,(100,100)) noisy_image = original_image + noise noisy_image = np.clip(noisy_image,0,1)# 4. 应用高斯滤波from scipy.ndimage import gaussian_filter filtered_image = gaussian_filter(noisy_image, sigma=1.0)# 5. 边缘检测 (使用Sobel算子)from scipy.ndimage import sobel edge_x = sobel(filtered_image, axis=0) edge_y = sobel(filtered_image, axis=1) edge_image = np.sqrt(edge_x**2+ edge_y**2) edge_image = edge_image / edge_image.max()# 归一化# 6. 二值化 threshold =0.3 binary_image =(edge_image > threshold).astype(float)# 可视化处理流程 fig, axes = plt.subplots(2,3, figsize=(15,10)) fig.suptitle('图像处理流程可视化', fontsize=16, fontweight='bold')# 1. 原始图像 axes[0,0].imshow(original_image, cmap='gray', vmin=0, vmax=1) axes[0,0].set_title('原始图像') axes[0,0].axis('off')# 2. 添加噪声后的图像 axes[0,1].imshow(noisy_image, cmap='gray', vmin=0, vmax=1) axes[0,1].set_title('添加噪声后') axes[0,1].axis('off')# 3. 高斯滤波后 axes[0,2].imshow(filtered_image, cmap='gray', vmin=0, vmax=1) axes[0,2].set_title('高斯滤波后') axes[0,2].axis('off')# 4. 边缘检测 axes[1,0].imshow(edge_image, cmap='hot', vmin=0, vmax=1) axes[1,0].set_title('边缘检测结果') axes[1,0].axis('off')# 5. 二值化结果 axes[1,1].imshow(binary_image, cmap='binary', vmin=0, vmax=1) axes[1,1].set_title('二值化结果') axes[1,1].axis('off')# 6. 处理流程示意图 axes[1,2].axis('off') axes[1,2].text(0.1,0.9,'图像处理流程:', fontsize=12, fontweight='bold') axes[1,2].text(0.1,0.8,'1. 原始图像', fontsize=10) axes[1,2].text(0.1,0.7,'2. 添加噪声', fontsize=10) axes[1,2].text(0.1,0.6,'3. 高斯滤波去噪', fontsize=10) axes[1,2].text(0.1,0.5,'4. 边缘检测', fontsize=10) axes[1,2].text(0.1,0.4,'5. 二值化', fontsize=10) axes[1,2].text(0.1,0.3,'6. 特征提取', fontsize=10) plt.tight_layout() plt.savefig(os.path.join(save_dir,'image_processing_pipeline.png'), dpi=300, bbox_inches='tight') plt.show()# 9.2 模型训练过程可视化print("\n=== 模型训练过程可视化 ===")# 模拟训练日志数据 epochs =100 train_loss = np.exp(-np.linspace(0,5, epochs))+ np.random.normal(0,0.01, epochs) val_loss = np.exp(-np.linspace(0,4.5, epochs))+ np.random.normal(0,0.015, epochs) train_acc =1- np.exp(-np.linspace(0,4, epochs))+ np.random.normal(0,0.005, epochs) val_acc =1- np.exp(-np.linspace(0,3.5, epochs))+ np.random.normal(0,0.01, epochs)# 创建可视化 fig,((ax1, ax2),(ax3, ax4))= plt.subplots(2,2, figsize=(12,10)) fig.suptitle('模型训练过程可视化', fontsize=16, fontweight='bold')# 1. 训练和验证损失 ax1.plot(range(1, epochs+1), train_loss,'b-', label='训练损失', linewidth=2) ax1.plot(range(1, epochs+1), val_loss,'r--', label='验证损失', linewidth=2) ax1.set_xlabel('Epoch') ax1.set_ylabel('损失') ax1.set_title('训练和验证损失曲线') ax1.legend() ax1.grid(True, alpha=0.3)# 标记最佳验证损失 best_val_epoch = np.argmin(val_loss)+1 best_val_loss = val_loss[best_val_epoch-1] ax1.plot(best_val_epoch, best_val_loss,'go', markersize=10) ax1.annotate(f'最佳: {best_val_loss:.4f}', xy=(best_val_epoch, best_val_loss), xytext=(best_val_epoch+10, best_val_loss+0.05), arrowprops=dict(arrowstyle='->', color='green'), fontsize=10)# 2. 训练和验证准确率 ax2.plot(range(1, epochs+1), train_acc,'b-', label='训练准确率', linewidth=2) ax2.plot(range(1, epochs+1), val_acc,'r--', label='验证准确率', linewidth=2) ax2.set_xlabel('Epoch') ax2.set_ylabel('准确率') ax2.set_title('训练和验证准确率曲线') ax2.legend() ax2.grid(True, alpha=0.3)# 标记最佳验证准确率 best_val_acc_epoch = np.argmax(val_acc)+1 best_val_acc = val_acc[best_val_acc_epoch-1] ax2.plot(best_val_acc_epoch, best_val_acc,'go', markersize=10) ax2.annotate(f'最佳: {best_val_acc:.4f}', xy=(best_val_acc_epoch, best_val_acc), xytext=(best_val_acc_epoch+10, best_val_acc-0.05), arrowprops=dict(arrowstyle='->', color='green'), fontsize=10)# 3. 学习率衰减 (模拟) learning_rates = np.logspace(-2,-5, epochs) ax3.semilogy(range(1, epochs+1), learning_rates,'g-', linewidth=2) ax3.set_xlabel('Epoch') ax3.set_ylabel('学习率 (log scale)') ax3.set_title('学习率衰减') ax3.grid(True, alpha=0.3, which='both')# 4. 损失-准确率关系图 ax4.scatter(train_loss, train_acc, c=range(epochs), cmap='viridis', s=50, alpha=0.6, label='训练') ax4.scatter(val_loss, val_acc, c=range(epochs), cmap='plasma', s=50, alpha=0.6, marker='s', label='验证') ax4.set_xlabel('损失') ax4.set_ylabel('准确率') ax4.set_title('损失-准确率关系') ax4.legend() ax4.grid(True, alpha=0.3)# 添加颜色条表示训练进度 cbar_train = plt.colorbar(ax4.collections[0], ax=ax4, pad=0.01) cbar_train.set_label('训练进度 (Epoch)') plt.tight_layout() plt.savefig(os.path.join(save_dir,'training_visualization.png'), dpi=300, bbox_inches='tight') plt.show()# 9.3 检测结果可视化print("\n=== 检测结果可视化 ===")# 模拟检测结果 np.random.seed(42)# 创建测试图像 test_image = np.random.rand(200,300,3)# 模拟检测框和类别 n_detections =15 detections =[]for i inrange(n_detections):# 随机生成边界框 x1 = np.random.randint(0,250) y1 = np.random.randint(0,150) width = np.random.randint(30,80) height = np.random.randint(30,80)# 确保边界框在图像内 x2 =min(x1 + width,299) y2 =min(y1 + height,199)# 随机生成类别和置信度 category = np.random.choice(['person','car','dog','cat']) confidence = np.random.uniform(0.5,0.95) detections.append({'bbox':[x1, y1, x2, y2],'category': category,'confidence': confidence })# 类别颜色映射 category_colors ={'person':'red','car':'blue','dog':'green','cat':'orange'}# 可视化检测结果 fig, axes = plt.subplots(1,2, figsize=(14,6)) fig.suptitle('目标检测结果可视化', fontsize=16, fontweight='bold')# 1. 显示带检测框的图像 axes[0].imshow(test_image) axes[0].set_title('检测结果') axes[0].axis('off')# 绘制检测框for det in detections: bbox = det['bbox'] category = det['category'] confidence = det['confidence'] color = category_colors[category]# 绘制矩形框 rect = plt.Rectangle((bbox[0], bbox[1]), bbox[2]-bbox[0], bbox[3]-bbox[1], fill=False, edgecolor=color, linewidth=2) axes[0].add_patch(rect)# 添加标签 label =f'{category}: {confidence:.2f}' axes[0].text(bbox[0], bbox[1]-5, label, color=color, fontsize=8, fontweight='bold', bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))# 2. 统计信息 axes[1].axis('off')# 计算统计信息 categories =[det['category']for det in detections] confidences =[det['confidence']for det in detections] category_counts ={cat: categories.count(cat)for cat inset(categories)} avg_confidences ={cat: np.mean([conf for cat_det, conf inzip(categories, confidences)if cat_det == cat])for cat inset(categories)}# 显示统计信息 y_pos =0.9 axes[1].text(0.1, y_pos,'检测结果统计:', fontsize=12, fontweight='bold') y_pos -=0.05for cat insorted(category_counts.keys()): count = category_counts[cat] avg_conf = avg_confidences[cat] color = category_colors[cat] axes[1].text(0.1, y_pos,f'{cat}:', fontsize=10, fontweight='bold', color=color) axes[1].text(0.4, y_pos,f'数量: {count}', fontsize=10) axes[1].text(0.7, y_pos,f'平均置信度: {avg_conf:.3f}', fontsize=10) y_pos -=0.05# 添加总体统计 y_pos -=0.05 axes[1].text(0.1, y_pos,f'总检测数: {len(detections)}', fontsize=10, fontweight='bold') y_pos -=0.03 axes[1].text(0.1, y_pos,f'平均置信度: {np.mean(confidences):.3f}', fontsize=10, fontweight='bold')# 添加图例 y_pos -=0.05 axes[1].text(0.1, y_pos,'图例:', fontsize=10, fontweight='bold') y_pos -=0.03for cat, color in category_colors.items(): axes[1].plot([0.1,0.15],[y_pos, y_pos], color=color, linewidth=3) axes[1].text(0.18, y_pos, cat, fontsize=9) y_pos -=0.03 plt.tight_layout() plt.savefig(os.path.join(save_dir,'detection_results.png'), dpi=300, bbox_inches='tight') plt.show()

Python 数据分析

Numpy

数组创建与初始化

数组属性与信息

数组索引与切片

形状操作与重塑

数学运算

基本运算

通用函数（ufunc）

统计函数

线性代数运算

广播机制

随机数生成

图像处理相关应用

高级技巧与性能优化

实用小技巧

Pandas

核心数据结构

Series（一维数据）

DataFrame（二维数据）

数据查看与选择

数据查看方法

数据选择（索引/切片）

数据处理与清洗

数据清洗方法

数据变换方法

数据合并与连接

数据合并方法

分组与聚合操作

分组聚合方法

时间序列处理

时间序列方法

性能优化技巧

性能优化方法

CV工程师实用案例

图像数据集管理

Matplotlib

架构

绘图风格

基础绘图

创建图形

基本图表类型

线图

图像显示与处理

图像显示

3D绘图

3D图形

高级定制

图形元素定制

样式与主题

样式设置

保存与导出

保存图形

CV工程师实用案例

图像处理可视化

Read more

Python实现开源AI模型引入及测试全过程

如何快速解决GitHub访问难题：一站式Hosts同步方案

2026全网最热Claude Skills工具箱，GitHub上最受欢迎的7大Skills开源AI技能库

GitHub 访问速度优化：本地 hosts 配置与 DNS 刷新指南