实例化视图的填充不正确

问题描述

“ test_sessions”表

CREATE TABLE IF NOT EXISTS test_sessions (
    id UInt64,name String,created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;

“ test_sessions”表数据

INSERT INTO test_sessions(id,name,created_at) VALUES
(1,'start',Now()),(1,'stop',Now() + INTERVAL 1 day),(2,Now() + INTERVAL 1 HOUR );

+----+-------+---------------------+
| id | name  | created_at          |
+----+-------+---------------------+
| 1  | start | 2020-11-10 07:58:19 |
+----+-------+---------------------+
| 2  | start | 2020-11-10 08:58:19 |
+----+-------+---------------------+
| 1  | stop  | 2020-11-11 07:58:19 |
+----+-------+---------------------+

“ finished_sessions”实例化视图

CREATE MATERIALIZED VIEW finished_sessions (
    id UInt64,start_at DateTime,end_at DateTime
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(start_at)
ORDER BY (id)
POPULATE AS
SELECT
    id,minIf(created_at,name = 'start') AS start_at,maxIf(created_at,name = 'stop')  AS end_at
FROM test_sessions
GROUP BY id
HAVING end_at <> '1970-01-01 00:00:00';

“ finished_sessions”实例化视图数据

SELECT * FROM finished_sessions;

+----+---------------------+---------------------+
| id | start_at            | end_at              |
+----+---------------------+---------------------+
| 1  | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+---------------------+---------------------+

直到这一刻,一切正常:只有1个关闭的会话

第二次会议结束后

INSERT INTO test_sessions(id,created_at) VALUES
(2,Now())

发生不正确的填充

SELECT * from finished_sessions ORDER BY id;

+----+-------------------------------+---------------------+
| id | start_at                      | end_at              |
+----+-------------------------------+---------------------+
| 1  | 2020-11-10 07:58:19           | 2020-11-11 07:58:19 |
+----+-------------------------------+---------------------+
| 2  | ---> 1970-01-01 00:00:00 <--- | 2020-11-10 08:06:24 |
+----+-------------------------------+---------------------+

如何解决

解决方法

  1. 您应该使用AggregateFunction或更好的SimpleAggregateFunction

  2. 不可能通过AggregateFunction对表进行分区。因为AggregateFunction是在合并期间计算的,并且合并是在分区上执行的。

  3. MV是插入触发器。 https://youtu.be/ckChUkC3Pns?list=PLO3lfQbpDVI-hyw4MyqxEk3rDHw95SzxJ https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf

    CREATE TABLE IF NOT EXISTS test_sessions (
        id UInt64,name String,created_at DateTime
    )
    ENGINE = MergeTree()
    PARTITION BY toYYYYMM(created_at)
    ORDER BY name;
    
    INSERT INTO test_sessions(id,name,created_at) VALUES
    (1,'start',now()),(1,'stop',now() + INTERVAL 1 day),(2,now() + INTERVAL 1 HOUR );
    
    CREATE MATERIALIZED VIEW finished_sessions
    ENGINE = AggregatingMergeTree
    ORDER BY (id)
    POPULATE AS
    SELECT
        id,minStateIf(created_at,name = 'start') AS start_at,maxStateIf(created_at,name = 'stop')  AS end_at
    FROM test_sessions
    GROUP BY id
    
    INSERT INTO test_sessions(id,created_at) VALUES
    (2,now());
    
    SELECT
        id,minMerge(start_at),maxMerge(end_at)
    FROM finished_sessions
    GROUP BY id
    
    Query id: d797eee4-6088-40b8-aa12-b10da62b60c5
    
    ┌─id─┬──minMerge(start_at)─┬────maxMerge(end_at)─┐
    │  2 │ 2020-11-10 15:18:19 │ 2020-11-10 14:21:54 │
    │  1 │ 2020-11-10 14:18:19 │ 2020-11-11 14:18:19 │
    └────┴─────────────────────┴─────────────────────┘
CREATE TABLE IF NOT EXISTS test_sessions (
    id UInt64,created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;

INSERT INTO test_sessions(id,created_at) VALUES
(1,now() + INTERVAL 1 HOUR );

CREATE MATERIALIZED VIEW finished_sessions
(
    id UInt64,start_at SimpleAggregateFunction(min,DateTime),end_at SimpleAggregateFunction(max,DateTime)
)
ENGINE = AggregatingMergeTree
ORDER BY (id)
POPULATE AS
SELECT
    id,minIf(created_at,maxIf(created_at,name = 'stop')  AS end_at
FROM test_sessions
GROUP BY id;


INSERT INTO test_sessions(id,created_at) VALUES
(2,now())

optimize table finished_sessions final;


SELECT
        id,min(start_at),max(end_at)
    FROM finished_sessions
    GROUP BY id

┌─id─┬───────min(start_at)─┬─────────max(end_at)─┐
│  2 │ 1970-01-01 00:00:00 │ 2020-11-10 14:29:30 │
│  1 │ 2020-11-10 14:29:15 │ 2020-11-11 14:29:15 │
└────┴─────────────────────┴─────────────────────┘