C++ 性能优化实战：从内存到 CPU 的执行效率提升 | 极客日志

C++算法

C++ 性能优化实战：从内存到 CPU 的执行效率提升

综述由AI生成C++ 性能优化涉及内存管理、CPU 指令及 I/O 操作等多个层面。本文通过智能指针、预分配内存、循环合并、内联函数、异步网络 IO 及矩阵乘法案例，展示了具体的优化手段与代码实践。重点在于先测量后优化，保持代码可维护性的同时提升执行效率。

FrontendX发布于 2026/3/29更新于 2026/6/1119 浏览

性能优化示意图

核心目标

在 C++ 开发中，代码不仅要'能跑'，更要'跑得高效'。本章我们将深入探讨性能优化的核心知识，帮助你掌握提升代码执行效率的关键技巧。通过阅读，你将能够理解性能分析的方法，学会优化内存管理以减少泄漏和碎片，掌握 CPU 指令层面的优化技巧，并提升 I/O 操作的读写效率。

基础原则与分析

优化原则

性能优化不是盲目地重写代码，而是有策略的改进。遵循以下原则能让你的工作事半功倍：

先测量后优化：在动手之前，先用工具找出真正的瓶颈所在。
聚焦关键路径：只优化对整体性能影响最大的部分，避免过度设计。
保持可维护性：优化后的代码依然要易于理解和维护，不要为了速度牺牲可读性。
验证结果：优化后必须测试代码的正确性和实际的性能提升效果。

常用分析工具

工欲善其事，必先利其器。常用的性能分析工具包括：

GProf：GNU 经典的性能分析工具。
Valgrind：强大的内存调试和性能分析工具。
Perf：Linux 下系统级的性能分析利器。
Visual Studio Profiler：Windows 环境下集成的性能分析工具。

内存管理优化

智能指针与内存泄漏

手动管理 new 和 delete 极易导致内存泄漏。现代 C++ 推荐使用智能指针，让 RAII（资源获取即初始化）机制自动处理生命周期。

#include <iostream>
#include <memory>

// 使用智能指针避免内存泄漏
class MyClass {
public:
    MyClass() { std::cout << "MyClass 构造函数" << std::endl; }
    ~MyClass() { std::cout << "MyClass 析构函数" << std::endl; }
    void doSomething() { std::cout << "MyClass 正在做某事" << std::endl; }
};

// 使用智能指针
  {
    std::shared_ptr<MyClass> ptr = std::<MyClass>();
    ptr->();
    
}


{
    MyClass* ptr =  ();
    ptr->();
    
}

{
    std::cout <<  << std::endl;
    std::cout <<  << std::endl;
    ();
    std::cout << std::endl;
    std::cout <<  << std::endl;
    ();
     ;
}

相关免费在线工具

加密/解密文本
使用加密算法（如AES、TripleDES、Rabbit或RC4）加密和解密文本明文。在线工具，加密/解密文本在线工具，online
Gemini 图片去水印
基于开源反向 Alpha 混合算法去除 Gemini/Nano Banana 图片水印，支持批量处理与下载。在线工具，Gemini 图片去水印在线工具，online
Base64 字符串编码/解码
将字符串编码和解码为其 Base64 格式表示形式即可。在线工具，Base64 字符串编码/解码在线工具，online
Base64 文件转换器
将字符串、文件或图像转换为其 Base64 表示形式。在线工具，Base64 文件转换器在线工具，online
Markdown转HTML
将 Markdown（GFM）转为 HTML 片段，浏览器内 marked 解析；与 HTML转Markdown 互为补充。在线工具，Markdown转HTML在线工具，online
HTML转Markdown
将 HTML 片段转为 GitHub Flavored Markdown，支持标题、列表、链接、代码块与表格等；浏览器内处理，可链接预填。在线工具，HTML转Markdown在线工具，online

#include <iostream>
#include <vector>
#include <chrono>

// 预分配内存避免内存碎片
void preallocateMemory() {
    const int size = 10000;
    std::vector<int> vec;
    vec.reserve(size); // 预分配内存
    for (int i = 0; i < size; ++i) {
        vec.push_back(i);
    }
}

// 不预分配内存（可能导致内存碎片）
void notPreallocateMemory() {
    const int size = 10000;
    std::vector<int> vec;
    for (int i = 0; i < size; ++i) {
        vec.push_back(i);
    }
}

int main() {
    std::cout << "=== 内存碎片优化示例 ===" << std::endl;
    
    auto start = std::chrono::high_resolution_clock::now();
    preallocateMemory();
    auto end = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
    std::cout << "预分配内存耗时：" << duration << "微秒" << std::endl;

    start = std::chrono::high_resolution_clock::now();
    notPreallocateMemory();
    end = std::chrono::high_resolution_clock::now();
    duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
    std::cout << "不预分配内存耗时：" << duration << "微秒" << std::endl;

    return 0;
}

#include <iostream>
#include <vector>
#include <chrono>

// 优化循环：合并操作
void optimizedLoop() {
    const int size = 10000;
    std::vector<int> vec1(size, 1);
    std::vector<int> vec2(size, 2);
    std::vector<int> result(size, 0);
    for (int i = 0; i < size; ++i) {
        result[i] = vec1[i] + vec2[i];
    }
}

// 未优化的循环：多次遍历
void unoptimizedLoop() {
    const int size = 10000;
    std::vector<int> vec1(size, 1);
    std::vector<int> vec2(size, 2);
    std::vector<int> result(size, 0);
    for (int i = 0; i < size; ++i) {
        result[i] = vec1[i];
    }
    for (int i = 0; i < size; ++i) {
        result[i] += vec2[i];
    }
}

int main() {
    std::cout << "=== 循环优化示例 ===" << std::endl;
    
    auto start = std::chrono::high_resolution_clock::now();
    optimizedLoop();
    auto end = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
    std::cout << "优化循环耗时：" << duration << "微秒" << std::endl;

    start = std::chrono::high_resolution_clock::now();
    unoptimizedLoop();
    end = std::chrono::high_resolution_clock::now();
    duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
    std::cout << "未优化循环耗时：" << duration << "微秒" << std::endl;

    return 0;
}

#include <iostream>
#include <chrono>

// 优化函数：使用内联函数
inline int add(int a, int b) {
    return a + b;
}

// 未优化的函数：普通函数调用
int addNotInline(int a, int b) {
    return a + b;
}

// 测试函数调用开销
void testFunctionCallOverhead() {
    const int size = 1000000;
    int result = 0;
    
    auto start = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < size; ++i) {
        result += add(i, i);
    }
    auto end = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
    std::cout << "内联函数调用耗时：" << duration << "微秒" << std::endl;

    start = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < size; ++i) {
        result += addNotInline(i, i);
    }
    end = std::chrono::high_resolution_clock::now();
    duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
    std::cout << "普通函数调用耗时：" << duration << "微秒" << std::endl;
}

int main() {
    std::cout << "=== 函数优化示例 ===" << std::endl;
    testFunctionCallOverhead();
    return 0;
}

#include <iostream>
#include <fstream>
#include <string>
#include <chrono>

// 优化文件 I/O：禁用同步并控制缓冲区
void optimizedFileIO() {
    const std::string filename = "test.txt";
    const int size = 10000;
    std::ofstream file(filename);
    std::ios_base::sync_with_stdio(false); // 禁用与 C 标准库的同步
    for (int i = 0; i < size; ++i) {
        file << i << std::endl;
    }
    file.close();
}

// 未优化的文件 I/O：使用默认设置
void unoptimizedFileIO() {
    const std::string filename = "test.txt";
    const int size = 10000;
    std::ofstream file(filename);
    // 默认开启同步，可能较慢
    for (int i = 0; i < size; ++i) {
        file << i << std::endl;
    }
    file.close();
}

int main() {
    std::cout << "=== 文件 I/O 优化示例 ===" << std::endl;
    
    auto start = std::chrono::high_resolution_clock::now();
    optimizedFileIO();
    auto end = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
    std::cout << "优化文件 I/O 耗时：" << duration << "微秒" << std::endl;

    start = std::chrono::high_resolution_clock::now();
    unoptimizedFileIO();
    end = std::chrono::high_resolution_clock::now();
    duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
    std::cout << "未优化文件 I/O 耗时：" << duration << "微秒" << std::endl;

    return 0;
}

#include <iostream>
#include <boost/asio.hpp>
#include <boost/asio/ip/tcp.hpp>
#include <sstream>
#include <string>
#include <chrono>

using boost::asio::ip::tcp;
using namespace std;

// 优化网络 I/O：使用异步操作（简化版演示）
void optimizedNetworkIO() {
    try {
        boost::asio::io_service io_service;
        tcp::resolver resolver(io_service);
        tcp::resolver::query query("example.com", "http");
        tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);
        tcp::socket socket(io_service);
        boost::asio::connect(socket, endpoint_iterator);
        
        string request = "GET / HTTP/1.1\r\n";
        request += "Host: example.com\r\n";
        request += "Connection: close\r\n\r\n";
        
        boost::asio::write(socket, boost::asio::buffer(request));
        boost::asio::streambuf response;
        boost::asio::read_until(socket, response, "\r\n");
        
        string status_line;
        istringstream response_stream(&response);
        response_stream >> status_line;
    } catch (const std::exception& e) {
        cerr << "错误：" << e.what() << endl;
    }
}

// 未优化的网络 I/O：使用同步操作
void unoptimizedNetworkIO() {
    try {
        boost::asio::io_service io_service;
        tcp::resolver resolver(io_service);
        tcp::resolver::query query("example.com", "http");
        tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);
        tcp::socket socket(io_service);
        boost::asio::connect(socket, endpoint_iterator);
        
        string request = "GET / HTTP/1.1\r\n";
        request += "Host: example.com\r\n";
        request += "Connection: close\r\n\r\n";
        
        boost::asio::write(socket, boost::asio::buffer(request));
        string response;
        char buffer[1024];
        size_t len;
        while ((len = socket.read_some(boost::asio::buffer(buffer))) > 0) {
            response.append(buffer, len);
        }
    } catch (const std::exception& e) {
        cerr << "错误：" << e.what() << endl;
    }
}

int main() {
    std::cout << "=== 网络 I/O 优化示例 ===" << std::endl;
    
    auto start = std::chrono::high_resolution_clock::now();
    optimizedNetworkIO();
    auto end = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
    std::cout << "优化网络 I/O 耗时：" << duration << "毫秒" << std::endl;

    start = std::chrono::high_resolution_clock::now();
    unoptimizedNetworkIO();
    end = std::chrono::high_resolution_clock::now();
    duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
    std::cout << "未优化网络 I/O 耗时：" << duration << "毫秒" << std::endl;

    return 0;
}

MatrixMultiplicationOptimization/
├── include/
│   └── Matrix.h
├── src/
│   ├── Matrix.cpp
│   └── main.cpp
└── build/

#ifndef MATRIX_H
#define MATRIX_H

#include <vector>
#include <chrono>

using namespace std;
using namespace chrono;

class Matrix {
public:
    Matrix(int rows, int cols);
    Matrix(const vector<vector<int>>& data);
    int getRows() const;
    int getCols() const;
    int& operator()(int row, int col);
    const int& operator()(int row, int col) const;
    Matrix multiplyNaive(const Matrix& other) const;
    Matrix multiplyOptimized(const Matrix& other) const;
    void print() const;
    vector<vector<int>> getTransposed() const; // 补充声明
private:
    int rows_;
    int cols_;
    vector<vector<int>> data_;
};

#endif // MATRIX_H

#include "Matrix.h"
#include <iostream>

Matrix::Matrix(int rows, int cols)
    : rows_(rows), cols_(cols), data_(rows, vector<int>(cols, 0)) {}

Matrix::Matrix(const vector<vector<int>>& data)
    : rows_(data.size()), cols_(data[0].size()), data_(data) {}

int Matrix::getRows() const { return rows_; }
int Matrix::getCols() const { return cols_; }

int& Matrix::operator()(int row, int col) {
    return data_[row][col];
}

const int& Matrix::operator()(int row, int col) const {
    return data_[row][col];
}

Matrix Matrix::multiplyNaive(const Matrix& other) const {
    if (cols_ != other.rows_) {
        throw invalid_argument("矩阵尺寸不兼容");
    }
    Matrix result(rows_, other.cols_);
    for (int i = 0; i < rows_; ++i) {
        for (int j = 0; j < other.cols_; ++j) {
            for (int k = 0; k < cols_; ++k) {
                result(i, j) += data_[i][k] * other.data_[k][j];
            }
        }
    }
    return result;
}

Matrix Matrix::multiplyOptimized(const Matrix& other) const {
    if (cols_ != other.rows_) {
        throw invalid_argument("矩阵尺寸不兼容");
    }
    Matrix result(rows_, other.cols_);
    vector<vector<int>> otherTransposed = other.getTransposed();
    for (int i = 0; i < rows_; ++i) {
        for (int j = 0; j < other.cols_; ++j) {
            int sum = 0;
            for (int k = 0; k < cols_; ++k) {
                sum += data_[i][k] * otherTransposed[j][k];
            }
            result(i, j) = sum;
        }
    }
    return result;
}

vector<vector<int>> Matrix::getTransposed() const {
    vector<vector<int>> transposed(cols_, vector<int>(rows_));
    for (int i = 0; i < rows_; ++i) {
        for (int j = 0; j < cols_; ++j) {
            transposed[j][i] = data_[i][j];
        }
    }
    return transposed;
}

void Matrix::print() const {
    for (const auto& row : data_) {
        for (int value : row) {
            cout << value << " ";
        }
        cout << endl;
    }
}

#include <iostream>
#include <vector>
#include <chrono>
#include "Matrix.h"

using namespace std;
using namespace chrono;

int main() {
    std::cout << "=== 矩阵乘法优化示例 ===" << std::endl;
    
    const int size = 100;
    Matrix matrix1(size, size);
    Matrix matrix2(size, size);
    
    for (int i = 0; i < size; ++i) {
        for (int j = 0; j < size; ++j) {
            matrix1(i, j) = i + j;
            matrix2(i, j) = i * j;
        }
    }
    
    // 测试朴素算法
    auto start = high_resolution_clock::now();
    Matrix resultNaive = matrix1.multiplyNaive(matrix2);
    auto end = high_resolution_clock::now();
    auto duration = duration_cast<milliseconds>(end - start).count();
    std::cout << "朴素算法耗时：" << duration << "毫秒" << std::endl;
    
    // 测试优化算法
    start = high_resolution_clock::now();
    Matrix resultOptimized = matrix1.multiplyOptimized(matrix2);
    end = high_resolution_clock::now();
    duration = duration_cast<milliseconds>(end - start).count();
    std::cout << "优化算法耗时：" << duration << "毫秒" << std::endl;
    
    return 0;
}

# 创建构建目录
mkdir -p build && cd build

# 配置 CMake
cmake -DCMAKE_BUILD_TYPE=Release ..

# 编译项目
cmake --build . --config Release

# 运行程序
./MatrixMultiplicationOptimization

C++ 性能优化实战：从内存到 CPU 的执行效率提升

核心目标

基础原则与分析

优化原则

常用分析工具

内存管理优化

智能指针与内存泄漏

更多推荐文章

相关免费在线工具

预分配内存减少碎片

CPU 优化技巧

循环合并

内联函数

I/O 操作优化

文件 I/O 加速

网络 I/O 异步化

综合案例：矩阵乘法优化

项目结构

核心代码

Header (`include/Matrix.h`)

Implementation (`src/Matrix.cpp`)

Main Entry (`src/main.cpp`)

构建与运行

总结与实践

核心回顾

实战练习

进阶探索

更多推荐文章

相关免费在线工具

C++ 性能优化实战：从内存到 CPU 的执行效率提升

核心目标

基础原则与分析

优化原则

常用分析工具

内存管理优化

智能指针与内存泄漏

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

预分配内存减少碎片

CPU 优化技巧

循环合并

内联函数

I/O 操作优化

文件 I/O 加速

网络 I/O 异步化

综合案例：矩阵乘法优化

项目结构

核心代码

Header (include/Matrix.h)

Implementation (src/Matrix.cpp)

Main Entry (src/main.cpp)

构建与运行

总结与实践

核心回顾

实战练习

进阶探索

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

Header (`include/Matrix.h`)

Implementation (`src/Matrix.cpp`)

Main Entry (`src/main.cpp`)