带有移位过滤器的 clickhouse 选择请求

问题描述

需要一些帮助)我有一个简单的请求:

    SELECT uniq(deviceid)
    from events
    where EventDateTime between '2020-12-28 00:00:00' and '2020-12-28 23:59:59'

现在,我需要用一小时班次找到相同的结果:

    SELECT uniq(deviceid)
    from events
    where EventDateTime between '2020-12-27 00:23:00' and '2020-12-28 22:59:59'

还有一个小时:

    SELECT uniq(deviceid)
    from events
    where EventDateTime between '2020-12-27 00:22:00' and '2020-12-28 21:59:59'

所以,对于 24 小时,我需要 24 个请求 - 有没有办法在一个请求中做到这一点?

预期结果:

'2020-12-28 00:00:00' - '2020-12-28 23:59:59','uniqValue1'
'2020-12-27 00:23:00' - '2020-12-28 22:59:59','uniqValue2' 
'2020-12-27 00:22:00' - '2020-12-28 21:59:59','uniqValue3'
... 
'2020-12-27 00:00:00' - '2020-12-27 23:59:59,'uniqValue24'

解决方法

更新:

WITH 5 AS steps
SELECT result.2 EventDateTime,result.1 UniqDeviceId
FROM
(
    SELECT
        groupArray(hour) AS hour_arr,groupArray(uniqDeviceIdState) AS state_arr,arrayReduceInRanges('uniqMerge',arrayMap(x -> (x,length(state_arr)),range(1,1 + steps)),state_arr) AS calculated_uniq_metrics,arrayMap((x,i) -> (x,hour_arr[i]),calculated_uniq_metrics,arrayEnumerate(calculated_uniq_metrics)) metric_hour_arr,arrayJoin(metric_hour_arr) result
    FROM
    (
        SELECT
            toStartOfHour(EventDateTime) AS hour,uniqState(DeviceId) AS uniqDeviceIdState
        FROM
        (
            /* Emulate the test dataset. */
            SELECT
                addHours(toDate('2020-05-05'),hour) AS EventDateTime,arrayJoin(DeviceIds) AS DeviceId
            FROM
            (
                SELECT
                    number % 48 AS hour,groupArray(number) AS DeviceIds
                FROM numbers(96)
                GROUP BY hour
            )
        )
        WHERE EventDateTime between '2020-05-05 00:00:00' and '2020-05-07 23:59:59'
        GROUP BY hour
        ORDER BY hour ASC
    )
)

/*
┌───────EventDateTime─┬─UniqDeviceId─┐
│ 2020-05-05 00:00:00 │           96 │
│ 2020-05-05 01:00:00 │           94 │
│ 2020-05-05 02:00:00 │           92 │
│ 2020-05-05 03:00:00 │           90 │
│ 2020-05-05 04:00:00 │           88 │
└─────────────────────┴──────────────┘
*/

考虑使用分组和 toStartOfHour 函数:

SELECT toStartOfHour(EventDateTime) startOfHour,uniq(DeviceId)
from events
where EventDateTime between '2020-12-28 00:00:00' and '2020-12-28 23:59:59'
group by startOfHour
order by startOfHour