问题描述
“ test_sessions”表
CREATE TABLE IF NOT EXISTS test_sessions (
id UInt64,name String,created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;
“ test_sessions”表数据
INSERT INTO test_sessions(id,name,created_at) VALUES
(1,'start',Now()),(1,'stop',Now() + INTERVAL 1 day),(2,Now() + INTERVAL 1 HOUR );
+----+-------+---------------------+
| id | name | created_at |
+----+-------+---------------------+
| 1 | start | 2020-11-10 07:58:19 |
+----+-------+---------------------+
| 2 | start | 2020-11-10 08:58:19 |
+----+-------+---------------------+
| 1 | stop | 2020-11-11 07:58:19 |
+----+-------+---------------------+
“ finished_sessions”实例化视图
CREATE MATERIALIZED VIEW finished_sessions (
id UInt64,start_at DateTime,end_at DateTime
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(start_at)
ORDER BY (id)
POPULATE AS
SELECT
id,minIf(created_at,name = 'start') AS start_at,maxIf(created_at,name = 'stop') AS end_at
FROM test_sessions
GROUP BY id
HAVING end_at <> '1970-01-01 00:00:00';
“ finished_sessions”实例化视图数据
SELECT * FROM finished_sessions;
+----+---------------------+---------------------+
| id | start_at | end_at |
+----+---------------------+---------------------+
| 1 | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+---------------------+---------------------+
直到这一刻,一切正常:只有1个关闭的会话
第二次会议结束后
INSERT INTO test_sessions(id,created_at) VALUES
(2,Now())
发生不正确的填充
SELECT * from finished_sessions ORDER BY id;
+----+-------------------------------+---------------------+
| id | start_at | end_at |
+----+-------------------------------+---------------------+
| 1 | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+-------------------------------+---------------------+
| 2 | ---> 1970-01-01 00:00:00 <--- | 2020-11-10 08:06:24 |
+----+-------------------------------+---------------------+
如何解决?
解决方法
-
您应该使用AggregateFunction或更好的SimpleAggregateFunction
-
不可能通过AggregateFunction对表进行分区。因为AggregateFunction是在合并期间计算的,并且合并是在分区上执行的。
-
MV是插入触发器。 https://youtu.be/ckChUkC3Pns?list=PLO3lfQbpDVI-hyw4MyqxEk3rDHw95SzxJ https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf
CREATE TABLE IF NOT EXISTS test_sessions ( id UInt64,name String,created_at DateTime ) ENGINE = MergeTree() PARTITION BY toYYYYMM(created_at) ORDER BY name; INSERT INTO test_sessions(id,name,created_at) VALUES (1,'start',now()),(1,'stop',now() + INTERVAL 1 day),(2,now() + INTERVAL 1 HOUR ); CREATE MATERIALIZED VIEW finished_sessions ENGINE = AggregatingMergeTree ORDER BY (id) POPULATE AS SELECT id,minStateIf(created_at,name = 'start') AS start_at,maxStateIf(created_at,name = 'stop') AS end_at FROM test_sessions GROUP BY id INSERT INTO test_sessions(id,created_at) VALUES (2,now()); SELECT id,minMerge(start_at),maxMerge(end_at) FROM finished_sessions GROUP BY id Query id: d797eee4-6088-40b8-aa12-b10da62b60c5 ┌─id─┬──minMerge(start_at)─┬────maxMerge(end_at)─┐ │ 2 │ 2020-11-10 15:18:19 │ 2020-11-10 14:21:54 │ │ 1 │ 2020-11-10 14:18:19 │ 2020-11-11 14:18:19 │ └────┴─────────────────────┴─────────────────────┘
CREATE TABLE IF NOT EXISTS test_sessions ( id UInt64,created_at DateTime ) ENGINE = MergeTree() PARTITION BY toYYYYMM(created_at) ORDER BY name; INSERT INTO test_sessions(id,created_at) VALUES (1,now() + INTERVAL 1 HOUR ); CREATE MATERIALIZED VIEW finished_sessions ( id UInt64,start_at SimpleAggregateFunction(min,DateTime),end_at SimpleAggregateFunction(max,DateTime) ) ENGINE = AggregatingMergeTree ORDER BY (id) POPULATE AS SELECT id,minIf(created_at,maxIf(created_at,name = 'stop') AS end_at FROM test_sessions GROUP BY id; INSERT INTO test_sessions(id,created_at) VALUES (2,now()) optimize table finished_sessions final; SELECT id,min(start_at),max(end_at) FROM finished_sessions GROUP BY id ┌─id─┬───────min(start_at)─┬─────────max(end_at)─┐ │ 2 │ 1970-01-01 00:00:00 │ 2020-11-10 14:29:30 │ │ 1 │ 2020-11-10 14:29:15 │ 2020-11-11 14:29:15 │ └────┴─────────────────────┴─────────────────────┘