问题描述
基表
CREATE TABLE IF NOT EXISTS test_sessions
(
session_id UInt64,session_name String,created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY (session_id);
有以下数据
INSERT INTO test_sessions (session_id,session_name,created_at) VALUES
(1,'start','2021-01-31 00:00:00'),(1,'stop','2021-01-31 01:00:00'),(2,'2021-01-31 01:00:00')
;
创建了 2 个物化视图来关闭会话
CREATE MATERIALIZED VIEW IF NOT EXISTS test_session_aggregate_states
(
session_id UInt64,started_at AggregateFunction(minIf,DateTime,UInt8),stopped_at AggregateFunction(maxIf,UInt8)
)
ENGINE = AggregatingMergeTree
PARTITION BY tuple()
ORDER BY (session_id)
POPULATE AS
SELECT session_id,minIfState(created_at,session_name = 'start') AS started_at,maxIfState(created_at,session_name = 'stop') AS stopped_at
FROM test_sessions
GROUP BY session_id;
CREATE VIEW IF NOT EXISTS test_session_completed
(
session_id UInt64,started_at DateTime,stopped_at DateTime
)
AS
SELECT session_id,minIfMerge(started_at) AS started_at,maxIfMerge(stopped_at) AS stopped_at
FROM test_session_aggregate_states
GROUP BY session_id
HAVING (started_at != '0000-00-00 00:00:00') AND
(stopped_at != '0000-00-00 00:00:00')
;
正常工作:返回 1 行已存在的“开始”和“停止”
SELECT * FROM test_session_completed;
-- 1,2021-01-31 00:00:00,2021-01-31 01:00:00
尝试创建基于 test_session_completed
的物化视图并连接到其他表(示例中没有连接)
CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv
(
session_id UInt64
)
ENGINE = MergeTree
PARTITION BY tuple()
ORDER BY (session_id)
POPULATE AS
SELECT session_id
FROM test_session_completed
;
编写测试查询来测试 test_mv
INSERT INTO test_sessions (session_id,created_at) VALUES
(3,'2021-01-31 02:00:00'),(3,'2021-01-31 03:00:00');
SELECT * FROM test_session_completed;
-- SUCCESS
-- 3,2021-01-31 02:00:00,2021-01-31 03:00:00
-- 1,2021-01-31 01:00:00
SELECT * FROM test_mv;
-- FAILURE
-- 1
-- EXPECTED RESULT
-- 3
-- 1
如何根据test_mv
填写test_session_completed
?
ClickHouse 版本:20.11.4.13
解决方法
- 无法在视图中创建 MV。
- MV 是一个插入触发器,在同一个表中没有状态
completed
的情况下不可能获得状态started
。如果您不需要检查started
发生在completed
之前,那么您可以制作更简单的 MV,只需检查where completed
。 - 您不需要
minIfState
,您可以使用min
(SimpleAggregateFunction)。它将减少存储的数据并提高性能。 - 我觉得第二个MV太过分了。
检查这个: https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf
https://youtu.be/ckChUkC3Pns?list=PLO3lfQbpDVI-hyw4MyqxEk3rDHw95SzxJ&t=9371
我会这样做:
CREATE TABLE IF NOT EXISTS test_sessions ( session_id UInt64,session_name String,created_at DateTime ) ENGINE = MergeTree() PARTITION BY toYYYYMM(created_at) ORDER BY (session_id); CREATE MATERIALIZED VIEW IF NOT EXISTS test_session_aggregate_states ( session_id UInt64,started_at SimpleAggregateFunction(min,DateTime),stopped_at SimpleAggregateFunction(max,DateTime) ) ENGINE = AggregatingMergeTree PARTITION BY tuple() ORDER BY (session_id) POPULATE AS SELECT session_id,minIf(created_at,session_name = 'start') AS started_at,maxIf(created_at,session_name = 'stop') AS stopped_at FROM test_sessions GROUP BY session_id; INSERT INTO test_sessions (session_id,session_name,created_at) VALUES (3,'start','2021-01-31 02:00:00'),(3,'stop','2021-01-31 03:00:00'); completed sessions: SELECT session_id,min(started_at) AS started_at,max(stopped_at) AS stopped_at FROM test_session_aggregate_states GROUP BY session_id HAVING (started_at != '0000-00-00 00:00:00') AND (stopped_at != '0000-00-00 00:00:00'); ┌─session_id─┬──────────started_at─┬──────────stopped_at─┐ │ 1 │ 2021-01-31 00:00:00 │ 2021-01-31 01:00:00 │ └────────────┴─────────────────────┴─────────────────────┘
并且使用 argMaxState 您可以在一个 session_id 内聚合多个 start
stop