CREATE TABLE orders (
id SERIAL PRIMARY KEY,
order_date DATENOT NULL,
product_category VARCHAR(50) NOT NULL,
region VARCHAR(50) NOT NULL,
amount DECIMAL(10,2) NOT NULL
);
我们需要生成一个多维度的销售分析报表,包含:
每个地区每个产品类别的销售额
每个地区的总销售额
每个产品类别的总销售额
所有订单的总销售额
传统方法的局限性
使用传统的 GROUP BY,我们需要分别执行多个查询:
-- 按地区和产品类别SELECT region, product_category, SUM(amount) FROM orders GROUPBY region, product_category;
-- 按地区SELECT region, NULLas product_category, SUM(amount) FROM orders GROUPBY region;
-- 按产品类别SELECTNULLas region, product_category, SUM(amount) FROM orders GROUPBY product_category;
-- 总计SELECTNULLas region, NULLas product_category, SUM(amount) FROM orders;
然后在应用层将这些结果合并。这种方法存在以下问题:
多次数据库访问:需要执行 4 次查询,增加了网络开销
代码复杂度高:需要在应用层处理结果合并逻辑
数据一致性风险:如果在多次查询之间数据发生变化,可能导致结果不一致
性能问题:重复扫描同一张表,浪费资源
使用 CUBE 的优雅解决方案
使用 CUBE,我们只需要一个查询就能获得所有需要的汇总数据:
SELECT region, product_category, SUM(amount) as total_amount
FROM orders
GROUPBYCUBE(region, product_category)
ORDERBY region, product_category;
这个查询会返回所有可能的组合,包括:
具体的地区 + 产品类别组合
地区总计(product_category 为 NULL)
产品类别总计(region 为 NULL)
全局总计(两者都为 NULL)
使用 ROLLUP 的层次化方案
如果我们只需要层次化的汇总(比如先按地区,再按产品类别),可以使用 ROLLUP:
SELECT region, product_category, SUM(amount) as total_amount
FROM orders
GROUPBYROLLUP(region, product_category)
ORDERBY region, product_category;
SELECT region, product_category, GROUPING(region) as region_grouped, GROUPING(product_category) as category_grouped, SUM(amount) as total_amount
FROM orders
GROUPBYCUBE(region, product_category);
这样我们就能清楚地知道每一行的 NULL 是真实数据还是汇总占位符。
改进的查询示例
我们可以利用 GROUPING() 函数来美化输出:
SELECTCASEWHENGROUPING(region)=1THEN'总计'ELSECOALESCE(region,'未知地区') ENDas region_display,
CASEWHENGROUPING(product_category)=1THEN'总计'ELSECOALESCE(product_category,'未知类别') ENDas category_display,
SUM(amount) as total_amount
FROM orders
GROUPBYCUBE(region, product_category)
ORDERBYGROUPING(region), region, GROUPING(product_category), product_category;
EXPLAIN ANALYZE SELECT region, product_category, SUM(amount) FROM orders GROUPBYCUBE(region, product_category);
典型的执行计划可能包含:
HashAggregate:用于计算各种分组集
Seq Scan:全表扫描(如果没有合适的索引)
索引优化策略
为了提高性能,我们可以考虑以下索引策略:
1. 覆盖索引(Covering Index)
创建包含所有分组列和聚合列的索引:
-- 对于 CUBE(region, product_category)CREATE INDEX idx_orders_cube_covering ON orders (region, product_category) INCLUDE (amount);
-- 对于 ROLLUP(order_date, region, product_category)CREATE INDEX idx_orders_rollup_covering ON orders (order_date, region, product_category) INCLUDE (amount);
覆盖索引可以让 PostgreSQL 直接从索引中获取所有需要的数据,避免回表操作。
2. 部分索引(Partial Index)
如果某些维度的值分布很不均匀,可以考虑创建部分索引:
-- 只为活跃地区创建索引CREATE INDEX idx_orders_active_regions ON orders (product_category, amount) WHERE region IN('北京','上海','广州','深圳');
3. 表达式索引
如果经常按日期的年月进行分组,可以创建表达式索引:
CREATE INDEX idx_orders_year_month ON orders (EXTRACT(YEARFROM order_date), EXTRACT(MONTHFROM order_date));
@RepositorypublicinterfaceOrderRepositoryextendsJpaRepository<Order, Long> {
/**
* 使用 CUBE 进行多维汇总查询
*/@Query(value = """
SELECT COALESCE(region, '总计') as region, COALESCE(product_category, '总计') as product_category,
SUM(amount) as total_amount, GROUPING(region) as region_grouped, GROUPING(product_category) as category_grouped
FROM orders
GROUP BY CUBE(region, product_category)
ORDER BY region_grouped, region, category_grouped, product_category
""", nativeQuery = true)
List<Object[]> findSalesSummaryWithCube();
/**
* 使用 ROLLUP 进行层次化汇总查询
*/@Query(value = """
SELECT COALESCE(region, '总计') as region, COALESCE(product_category, '总计') as product_category,
SUM(amount) as total_amount, GROUPING(region) as region_grouped, GROUPING(product_category) as category_grouped
FROM orders
GROUP BY ROLLUP(region, product_category)
ORDER BY region_grouped, region, category_grouped, product_category
""", nativeQuery = true)
List<Object[]> findSalesSummaryWithRollup();
/**
* 带日期范围的 CUBE 查询
*/@Query(value = """
SELECT EXTRACT(YEAR FROM order_date) as year, EXTRACT(MONTH FROM order_date) as month,
COALESCE(region, '总计') as region, SUM(amount) as total_amount,
GROUPING(EXTRACT(YEAR FROM order_date)) as year_grouped, GROUPING(EXTRACT(MONTH FROM order_date)) as month_grouped, GROUPING(region) as region_grouped
FROM orders
WHERE order_date BETWEEN :startDate AND :endDate
GROUP BY CUBE(EXTRACT(YEAR FROM order_date), EXTRACT(MONTH FROM order_date), region)
ORDER BY year_grouped, year, month_grouped, month, region_grouped, region
""", nativeQuery = true)
List<Object[]> findTimeBasedSalesSummary(@Param("startDate") LocalDate startDate, @Param("endDate") LocalDate endDate);
}
-- MySQL ROLLUP (supported)SELECT region, product_category, SUM(amount) FROM orders GROUPBY region, product_category WITHROLLUP;
-- MySQL CUBE (not supported)-- This will cause an error in MySQL
PostgreSQL vs SQL Server
SQL Server 完全支持 ROLLUP、CUBE 和 GROUPING SETS,语法与 PostgreSQL 基本相同。