Clickhouse中带\的null和字符串的数据类型

问题描述

{"DEVICE_GROUP":null,"DEVICE":"10.84.130.44","RULE":"check-in-traffic","TOPIC1":"interface.statistics","FIELDS_HIGH_THRESHOLD":800000000,"FIELDS_LOW_THRESHOLD":500000000,"FIELDS_OUT_OCTETS_STATS_VALUE":null,"FIELDS_TANDINGESTTIMESTAMP":1598127990844870700,"FIELDS_TANDTIMEOFFSET":"719.508643ms","KEYS_INTERFACE_NAME":"em3","KEYS_PLAYBOOK_NAME":"interface-kpis-playbook","KEYS_INSTANCE_ID":"[\"i1\"]"

上面是我从卡夫卡那里得到的杰森。我能够使用大多数键创建表,只是想知道我应该为KEYS_INSTANCE_ID提供哪种数据类型,以便使用MergerTree和使用Materialized视图的Kafka引擎在Clickhouse中创建表。我尝试使用字符串,但不适用于创建表。

#使用mergetree引擎创建表:

CREATE TABLE IF NOT EXISTS readings_hb_trial_11
(
  KEYS_INSTANCE_ID  String
)
ENGINE = MergeTree
ORDER BY KEYS_INSTANCE_ID

#使用kafka引擎创建表:

CREATE TABLE IF NOT EXISTS readings_queue_hb_trial_11
(
  KEYS_INSTANCE_ID String
)
ENGINE = Kafka
SETTINGS kafka_broker_list = '10########2',kafka_topic_list = 'R########B',kafka_group_name = 'readings_consumer_group3',kafka_format = 'JSONEachRow',kafka_max_block_size = 1048576

#实例化该表:

CREATE MATERIALIZED VIEW readings_queue_mv_hb_trial_11 TO readings_hb_trial_11 AS
SELECT
  KEYS_INSTANCE_ID
FROM readings_queue_hb_trial_11

解决方法

我怀疑将数组括在双引号中时在JSON中出错,因此正确的外观应为“ KEYS_INSTANCE_ID”:[“ i1”]

在这种情况下,类型 Array(String)应该会有所帮助。

让我们测试一下:

/* Emulate the table with Kafka-engine */
CREATE TABLE readings_queue_hb_trial_11
(
    `KEYS_INSTANCE_ID` Array(String)
)
ENGINE = Memory
/* MV takes just the first item from an array (as I understood it is your case). */
CREATE MATERIALIZED VIEW readings_queue_mv_hb_trial_11 TO readings_hb_trial_11 AS
SELECT
  empty(KEYS_INSTANCE_ID) ? '' : KEYS_INSTANCE_ID[1] AS KEYS_INSTANCE_ID
FROM readings_queue_hb_trial_11

模拟处理一些消息:

INSERT INTO readings_queue_hb_trial_11
SELECT JSONExtractArrayRaw('{"KEYS_INSTANCE_ID":["i1"]}','KEYS_INSTANCE_ID')
UNION ALL
SELECT JSONExtractArrayRaw('{"KEYS_INSTANCE_ID":[]}','KEYS_INSTANCE_ID')
UNION ALL
SELECT JSONExtractArrayRaw('{"KEYS_INSTANCE_ID":["i1","i2"]}','KEYS_INSTANCE_ID')

处理结果:

SELECT *
FROM readings_hb_trial_11

┌─KEYS_INSTANCE_ID─┐
│ "i1"             │
│                  │
│ "i1"             │
└──────────────────┘