WITH user_session AS (
-- 时序分析:按 5 分钟会话窗口切分SELECT
user_id,
SESSION_WINDOW(transaction_time, INTERVAL'5 minutes') as session_window,
-- 窗口函数:计算会话内统计COUNT(*) as trans_count,
SUM(amount) as total_amount,
-- JSON 分析:提取设备特征
JSONB_AGG(DISTINCT device_info->>'model') as device_models,
-- 空间分析:交易地点分布半径
ST_ClusterRadius(ARRAY_AGG(location)) as cluster_radius
FROM transactions
WHERE transaction_date =CURRENT_DATEGROUPBY1, 2
)
-- 图计算:关联用户社交关系SELECT
u.user_id,
u.session_window,
u.trans_count,
-- 判断是否团伙:同一会话内有关联用户EXISTS (
SELECT1FROM user_relations r
WHERE r.user_id = u.user_id
AND r.related_user_id IN (
SELECT user_id
FROM user_session s2
WHERE s2.session_window && u.session_window
AND ST_DWithin(s2.avg_location, u.avg_location, 500)
)
) as is_group_behavior
FROM user_session u
WHERE u.trans_count >10; -- 高频交易会话
-- 错误示范:在 WHERE 里做多层 JSON 解析SELECT*FROM orders
WHERE order_info->'user'->>'name'LIKE'张%'AND order_info->'items'->0->>'price'::numeric>1000;
-- 问题:每次查询都要解析整个 JSON,无法利用索引-- 正确做法:提取常用字段为生成列ALTER TABLE orders ADDCOLUMN user_name VARCHAR GENERATED ALWAYS AS (order_info->'user'->>'name') STORED;
CREATE INDEX idx_user_name ON orders(user_name);
-- 查询直接走 B-tree 索引
坑 2:JSONB 的索引选择
-- 场景:查询 device_info 里 os_type 和 app_version 的组合-- 方案 1:建两个 gin 索引(错!)CREATE INDEX idx_os ON devices USING gin ((device_info->'os'));
CREATE INDEX idx_app ON devices USING gin ((device_info->'app'));
-- 方案 2:建一个多列 gin 索引(对!)CREATE INDEX idx_device_combo ON devices USING gin ((device_info->'os'), (device_info->'app'));
-- 第二个索引大小只有第一个的 60%,查询时能同时命中两个条件
金仓的 JSONB 索引有个特性:支持部分索引。比如只给 Android 设备建索引:
CREATE INDEX idx_android_users ON users USING gin ((profile->'device'))
WHERE profile->>'os_type'='Android';
索引大小直接减半。
2. 时序数据的老化策略
时序数据最大的特点是'越新的越热,越旧的越冷'。金仓的分区表很好用,但自动老化需要自己配置:
-- 创建按天分区的交易表CREATE TABLE transactions (
trans_id BIGSERIAL,
user_id BIGINT,
amount NUMERIC(10,2),
trans_time TIMESTAMPTZ NOT NULL
) PARTITIONBYRANGE (trans_time);
-- 关键:提前创建分区CREATEOR REPLACE PROCEDURE create_transaction_partitions()
LANGUAGE plpgsql AS $$
DECLARE start_date DATE :=CURRENT_DATE;
i INT;
BEGINFOR i IN0..30 LOOP
DECLARE part_date DATE := start_date + i;
part_name TEXT :='trans_'|| to_char(part_date, 'YYYYMMDD');
BEGIN
IF NOTEXISTS (
SELECT1FROM pg_tables WHERE tablename = part_name
) THENEXECUTE format(
'CREATE TABLE %I PARTITION OF transactions ''FOR VALUES FROM (%L) TO (%L) ''WITH (fillfactor=95)',
part_name, part_date, part_date +1
);
END IF;
END;
END LOOP;
END;
$$;
-- 自动清理旧数据CREATEOR REPLACE PROCEDURE drop_old_partitions(retention_days INTDEFAULT90)
LANGUAGE plpgsql AS $$
DECLARE old_date DATE :=CURRENT_DATE- retention_days;
part_record RECORD;
BEGINFOR part_record INSELECT inhrelid::regclass as part_name
FROM pg_inherits JOIN pg_class ON inhrelid = oid
WHERE inhparent ='transactions'::regclass
AND relname ~'^trans_\d{8}$'ANDsubstring(relname from'trans_(\d{4})(\d{2})(\d{2})')::DATE< old_date
LOOP
-- 先解除分区关系EXECUTE format('ALTER TABLE transactions DETACH PARTITION %s', part_record.part_name);
-- 再删除表EXECUTE format('DROP TABLE %s', part_record.part_name);
RAISE NOTICE 'Dropped partition: %', part_record.part_name;
END LOOP;
END;
$$;
设置每天凌晨 2 点跑这两个存储过程,保证了:
永远有未来 30 天的空分区等着
90 天前的数据自动清理
业务完全无感知
3. 混合负载的资源隔离
HTAP 听起来美好,但分析查询把交易拖垮的事情太常见。资源组功能是解决之道:
-- 创建两个资源组CREATE RESOURCE GROUP oltp_group WITH (
concurrency =100, -- 最大并发数
cpu_rate_limit =70, -- CPU 使用率上限
memory_limit ='4GB', -- 内存上限
io_priority ='HIGH'-- IO 优先级
);
CREATE RESOURCE GROUP olap_group WITH (
concurrency =20,
cpu_rate_limit =30,
memory_limit ='8GB',
io_priority ='LOW'
);
-- 用户绑定资源组ALTERUSER app_user SET resource_group ='oltp_group';
ALTERUSER bi_user SET resource_group ='olap_group';
-- 更细粒度:按查询类型动态分配CREATEOR REPLACE FUNCTION assign_resource_group()
RETURNS void LANGUAGE plpgsql AS $$
BEGIN-- 交易类查询走 OLTP 组
IF current_query() LIKE'INSERT%'OR current_query() LIKE'UPDATE%'OR current_query() LIKE'DELETE%'OR
(current_query() LIKE'SELECT%'AND current_query() ~'WHERE.*=.*') THENSETLOCAL resource_group ='oltp_group';
-- 分析类查询走 OLAP 组
ELSEIF current_query() LIKE'SELECT%'AND (
current_query() LIKE'%GROUP BY%'OR
current_query() LIKE'%WINDOW%'OR
current_query() LIKE'%PARTITION%'
) THENSETLOCAL resource_group ='olap_group';
END IF;
END;
$$;