PostgreSQL 聚合查询优化：ROLLUP 与 CUBE 的使用

PostgreSQL 聚合查询优化：ROLLUP 与 CUBE 的使用 | 极客日志

-- ROLLUP 语法
SELECT ... FROM table GROUP BY ROLLUP(col1, col2, col3);

-- CUBE 语法
SELECT ... FROM table GROUP BY CUBE(col1, col2, col3);

-- GROUPING SETS 语法（更灵活）
SELECT ... FROM table GROUP BY GROUPING SETS ((col1, col2), (col1), ());

CREATE TABLE orders (
    id SERIAL PRIMARY KEY,
    order_date DATE NOT NULL,
    product_category VARCHAR(50) NOT NULL,
    region VARCHAR(50) NOT NULL,
    amount DECIMAL(10,2) NOT NULL
);

-- 按地区和产品类别
SELECT region, product_category, SUM(amount) FROM orders GROUP BY region, product_category;

-- 按地区
SELECT region, NULL as product_category, SUM(amount) FROM orders GROUP BY region;

-- 按产品类别
SELECT NULL as region, product_category, SUM(amount) FROM orders GROUP BY product_category;

-- 总计
SELECT NULL as region, NULL as product_category, SUM(amount) FROM orders;

SELECT region, product_category, SUM(amount) as total_amount 
FROM orders 
GROUP BY CUBE(region, product_category) 
ORDER BY region, product_category;

SELECT region, product_category, SUM(amount) as total_amount 
FROM orders 
GROUP BY ROLLUP(region, product_category) 
ORDER BY region, product_category;

SELECT region, product_category, GROUPING(region) as region_grouped, GROUPING(product_category) as category_grouped, SUM(amount) as total_amount 
FROM orders 
GROUP BY CUBE(region, product_category);

SELECT 
CASE WHEN GROUPING(region)=1 THEN '总计' ELSE COALESCE(region,'未知地区') END as region_display,
CASE WHEN GROUPING(product_category)=1 THEN '总计' ELSE COALESCE(product_category,'未知类别') END as category_display,
SUM(amount) as total_amount 
FROM orders 
GROUP BY CUBE(region, product_category) 
ORDER BY GROUPING(region), region, GROUPING(product_category), product_category;

EXPLAIN ANALYZE SELECT region, product_category, SUM(amount) FROM orders GROUP BY CUBE(region, product_category);

-- 对于 CUBE(region, product_category)
CREATE INDEX idx_orders_cube_covering ON orders (region, product_category) INCLUDE (amount);

-- 对于 ROLLUP(order_date, region, product_category)
CREATE INDEX idx_orders_rollup_covering ON orders (order_date, region, product_category) INCLUDE (amount);

-- 只为活跃地区创建索引
CREATE INDEX idx_orders_active_regions ON orders (product_category, amount) WHERE region IN('北京','上海','广州','深圳');

CREATE INDEX idx_orders_year_month ON orders (EXTRACT(YEAR FROM order_date), EXTRACT(MONTH FROM order_date));

-- 临时增加 work_mem（仅对当前会话有效）
SET work_mem = '256MB';
-- 执行聚合查询
SELECT ... GROUP BY CUBE(...);

-- 按年份分区
CREATE TABLE orders (
    id SERIAL,
    order_date DATE NOT NULL,
    product_category VARCHAR(50),
    region VARCHAR(50),
    amount DECIMAL(10,2)
) PARTITION BY RANGE (order_date);

-- 创建年度分区
CREATE TABLE orders_2023 PARTITION OF orders FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');
CREATE TABLE orders_2024 PARTITION OF orders FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');

<dependencies>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-data-jpa</artifactId>
    </dependency>
    <dependency>
        <groupId>org.postgresql</groupId>
        <artifactId>postgresql</artifactId>
        <scope>runtime</scope>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>
</dependencies>

@Entity
@Table(name = "orders")
public class Order {
    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private Long id;

    @Column(name = "order_date", nullable = false)
    private LocalDate orderDate;

    @Column(name = "product_category", nullable = false)
    private String productCategory;

    @Column(name = "region", nullable = false)
    private String region;

    @Column(name = "amount", precision = 10, scale = 2, nullable = false)
    private BigDecimal amount;

    // 构造函数、getter、setter...
    public Order() {}

    public Order(LocalDate orderDate, String productCategory, String region, BigDecimal amount) {
        this.orderDate = orderDate;
        this.productCategory = productCategory;
        this.region = region;
        this.amount = amount;
    }

    // getters and setters
    public Long getId() { return id; }
    public void setId(Long id) { this.id = id; }
    public LocalDate getOrderDate() { return orderDate; }
    public void setOrderDate(LocalDate orderDate) { this.orderDate = orderDate; }
    public String getProductCategory() { return productCategory; }
    public void setProductCategory(String productCategory) { this.productCategory = productCategory; }
    public String getRegion() { return region; }
    public void setRegion(String region) { this.region = region; }
    public BigDecimal getAmount() { return amount; }
    public void setAmount(BigDecimal amount) { this.amount = amount; }
}

@Repository
public interface OrderRepository extends JpaRepository<Order, Long> {
    /**
     * 使用 CUBE 进行多维汇总查询
     */
    @Query(value = """
        SELECT COALESCE(region, '总计') as region, COALESCE(product_category, '总计') as product_category, 
               SUM(amount) as total_amount, GROUPING(region) as region_grouped, GROUPING(product_category) as category_grouped 
        FROM orders 
        GROUP BY CUBE(region, product_category) 
        ORDER BY region_grouped, region, category_grouped, product_category 
        """, nativeQuery = true)
    List<Object[]> findSalesSummaryWithCube();

    /**
     * 使用 ROLLUP 进行层次化汇总查询
     */
    @Query(value = """
        SELECT COALESCE(region, '总计') as region, COALESCE(product_category, '总计') as product_category, 
               SUM(amount) as total_amount, GROUPING(region) as region_grouped, GROUPING(product_category) as category_grouped 
        FROM orders 
        GROUP BY ROLLUP(region, product_category) 
        ORDER BY region_grouped, region, category_grouped, product_category 
        """, nativeQuery = true)
    List<Object[]> findSalesSummaryWithRollup();

    /**
     * 带日期范围的 CUBE 查询
     */
    @Query(value = """
        SELECT EXTRACT(YEAR FROM order_date) as year, EXTRACT(MONTH FROM order_date) as month, 
               COALESCE(region, '总计') as region, SUM(amount) as total_amount, 
               GROUPING(EXTRACT(YEAR FROM order_date)) as year_grouped, GROUPING(EXTRACT(MONTH FROM order_date)) as month_grouped, GROUPING(region) as region_grouped 
        FROM orders 
        WHERE order_date BETWEEN :startDate AND :endDate 
        GROUP BY CUBE(EXTRACT(YEAR FROM order_date), EXTRACT(MONTH FROM order_date), region) 
        ORDER BY year_grouped, year, month_grouped, month, region_grouped, region 
        """, nativeQuery = true)
    List<Object[]> findTimeBasedSalesSummary(@Param("startDate") LocalDate startDate, @Param("endDate") LocalDate endDate);
}

public class SalesSummaryDTO {
    private String region;
    private String productCategory;
    private BigDecimal totalAmount;
    private boolean isRegionTotal;
    private boolean isCategoryTotal;

    public SalesSummaryDTO(String region, String productCategory, BigDecimal totalAmount, boolean isRegionTotal, boolean isCategoryTotal) {
        this.region = region;
        this.productCategory = productCategory;
        this.totalAmount = totalAmount;
        this.isRegionTotal = isRegionTotal;
        this.isCategoryTotal = isCategoryTotal;
    }

    // getters and setters
    public String getRegion() { return region; }
    public void setRegion(String region) { this.region = region; }
    public String getProductCategory() { return productCategory; }
    public void setProductCategory(String productCategory) { this.productCategory = productCategory; }
    public BigDecimal getTotalAmount() { return totalAmount; }
    public void setTotalAmount(BigDecimal totalAmount) { this.totalAmount = totalAmount; }
    public boolean isRegionTotal() { return isRegionTotal; }
    public void setRegionTotal(boolean regionTotal) { isRegionTotal = regionTotal; }
    public boolean isCategoryTotal() { return isCategoryTotal; }
    public void setCategoryTotal(boolean categoryTotal) { isCategoryTotal = categoryTotal; }
    public boolean isGrandTotal() { return isRegionTotal && isCategoryTotal; }
}

@Service
@Transactional(readOnly = true)
public class SalesAnalysisService {
    @Autowired
    private OrderRepository orderRepository;

    public List<SalesSummaryDTO> getSalesSummaryWithCube() {
        List<Object[]> results = orderRepository.findSalesSummaryWithCube();
        return results.stream().map(this::mapToSalesSummaryDTO).collect(Collectors.toList());
    }

    public List<SalesSummaryDTO> getSalesSummaryWithRollup() {
        List<Object[]> results = orderRepository.findSalesSummaryWithRollup();
        return results.stream().map(this::mapToSalesSummaryDTO).collect(Collectors.toList());
    }

    private SalesSummaryDTO mapToSalesSummaryDTO(Object[] row) {
        String region = (String) row[0];
        String productCategory = (String) row[1];
        BigDecimal totalAmount = (BigDecimal) row[2];
        Integer regionGrouped = ((Number) row[3]).intValue();
        Integer categoryGrouped = ((Number) row[4]).intValue();
        return new SalesSummaryDTO(region, productCategory, totalAmount, regionGrouped == 1, categoryGrouped == 1);
    }
}

@RestController
@RequestMapping("/api/sales")
public class SalesAnalysisController {
    @Autowired
    private SalesAnalysisService salesAnalysisService;

    @GetMapping("/summary/cube")
    public ResponseEntity<List<SalesSummaryDTO>> getSalesSummaryWithCube() {
        List<SalesSummaryDTO> summary = salesAnalysisService.getSalesSummaryWithCube();
        return ResponseEntity.ok(summary);
    }

    @GetMapping("/summary/rollup")
    public ResponseEntity<List<SalesSummaryDTO>> getSalesSummaryWithRollup() {
        List<SalesSummaryDTO> summary = salesAnalysisService.getSalesSummaryWithRollup();
        return ResponseEntity.ok(summary);
    }
}

// 假设这是前端 JavaScript 代码
const renderRow = (row) => {
    const isTotal = row.isRegionTotal || row.isCategoryTotal;
    const rowClass = isTotal ? 'total-row' : 'detail-row';
    return `<tr class="${rowClass}"> <td>${row.region}</td> <td>${row.productCategory}</td> <td>${row.totalAmount}</td> </tr>`;
};

-- 同时使用 ROLLUP 和普通分组
SELECT year, quarter, region, SUM(amount) FROM sales_data GROUP BY ROLLUP(year, quarter), region;

SELECT region, product_category, SUM(amount) as total_amount, 
       SUM(CASE WHEN order_date >= CURRENT_DATE - INTERVAL '30 days' THEN amount ELSE 0 END) as last_30_days_amount, COUNT(*) as order_count 
FROM orders 
GROUP BY CUBE(region, product_category);

SELECT region, product_category, SUM(amount) as total_amount, 
       ROUND(SUM(amount)*100.0/SUM(SUM(amount)) OVER(), 2) as percentage_of_total 
FROM orders 
GROUP BY CUBE(region, product_category) 
ORDER BY GROUPING(region), region, GROUPING(product_category), product_category;

-- 创建测试表
CREATE TABLE test_orders AS 
SELECT generate_series(1,1000000) as id, 
       'Region'||(random()*5+1)::int as region, 
       'Category'||(random()*10+1)::int as product_category, 
       (random()*1000+10)::decimal(10,2) as amount, 
       CURRENT_DATE-(random()*365)::int as order_date;

-- 添加索引
CREATE INDEX idx_test_orders_region_category ON test_orders(region, product_category);

EXPLAIN ANALYZE 
SELECT region, product_category, SUM(amount),'detail' as type FROM test_orders GROUP BY region, product_category 
UNION ALL 
SELECT region,NULL,SUM(amount),'region_total' FROM test_orders GROUP BY region 
UNION ALL 
SELECT NULL, product_category,SUM(amount),'category_total' FROM test_orders GROUP BY product_category 
UNION ALL 
SELECT NULL,NULL,SUM(amount),'grand_total' FROM test_orders;

EXPLAIN ANALYZE SELECT region, product_category, SUM(amount) FROM test_orders GROUP BY CUBE(region, product_category);

查询方法	CUBE	UNION ALL
执行时间	~1s	~3s
内存使用	中等	较低
表扫描次数	1	4
性能优势	3x	-
维护成本	低	高
一致性	高	中

-- 只需要特定的分组组合
SELECT ... FROM sales GROUP BY GROUPING SETS ((region, product_category, year), (region, year), (product_category, year), (year), ());

ORDER BY GROUPING(region), -- 汇总行排在后面 region, GROUPING(product_category), product_category

-- MySQL ROLLUP (supported)
SELECT region, product_category, SUM(amount) FROM orders GROUP BY region, product_category WITH ROLLUP;

-- MySQL CUBE (not supported)
-- This will cause an error in MySQL

-- 创建物化视图存储常用汇总
CREATE MATERIALIZED VIEW sales_summary_mv AS 
SELECT EXTRACT(YEAR FROM order_date) as year, region, product_category, SUM(amount) as total_amount, COUNT(*) as order_count 
FROM orders 
GROUP BY EXTRACT(YEAR FROM order_date), region, product_category;

-- 创建索引
CREATE INDEX idx_sales_summary_mv ON sales_summary_mv(year, region, product_category);

@Service
public class DynamicAnalyticsService {
    @Autowired
    private JdbcTemplate jdbcTemplate;

    public List<Map<String,Object>> getDynamicSummary(List<String> dimensions, DateRange dateRange) {
        // 构建动态 GROUPING SETS 查询
        String groupingSets = buildGroupingSets(dimensions);
        String selectColumns = buildSelectColumns(dimensions);
        String sql = String.format(
            "SELECT %s, SUM(amount) as total_amount, %s FROM orders WHERE order_date BETWEEN ? AND ? GROUP BY GROUPING SETS (%s) ORDER BY %s",
            selectColumns, buildGroupingFunctions(dimensions), groupingSets, buildOrderBy(dimensions)
        );
        return jdbcTemplate.queryForList(sql, dateRange.getStart(), dateRange.getEnd());
    }

    private String buildGroupingSets(List<String> dimensions) {
        // 根据选择的维度构建分组集
        // 例如：如果选择 [year, region]，则生成 "(year, region), (year), ()"
        return dimensions.stream()
                .map(dim -> "(" + String.join(", ", dimensions.subList(0, dimensions.indexOf(dim)+1)) + ")")
                .collect(Collectors.joining(", ")) + ", ()";
    }
}

PostgreSQL 聚合查询优化：ROLLUP 与 CUBE 的使用

PostgreSQL - 聚合查询的优化：ROLLUP 与 CUBE 的使用

什么是 ROLLUP 和 CUBE？

ROLLUP：层次化汇总

CUBE：全维度组合汇总

PostgreSQL 中的语法支持

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

实际应用场景分析

传统方法的局限性

使用 CUBE 的优雅解决方案

使用 ROLLUP 的层次化方案

理解 NULL 值的含义

GROUPING() 函数

改进的查询示例

性能分析与优化

执行计划分析

索引优化策略

1. 覆盖索引（Covering Index）

2. 部分索引（Partial Index）

3. 表达式索引

内存和临时文件

分区表优化

Java 应用集成

项目依赖配置

实体类定义

Repository 层实现

DTO 类定义

Service 层实现

Controller 层实现

前端展示示例

高级使用技巧

混合分组集

条件聚合与分组集结合

使用窗口函数增强分析

实际性能对比测试

测试环境设置

测试查询

测试结果分析

常见陷阱与最佳实践

1. NULL 值混淆问题

2. 维度爆炸问题

3. 排序复杂性

4. 内存使用监控

5. 索引策略

与其他数据库的兼容性

PostgreSQL vs MySQL

PostgreSQL vs SQL Server

PostgreSQL vs Oracle

实际业务案例

电商销售分析仪表板

需求分析

技术方案

物化视图实现

动态查询服务

未来发展趋势

PostgreSQL 16+ 的新特性

总结

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具