问题描述
我正在尝试对 elasticsearch 上的聚合数据进行平均。这是我的数据结构:
文档 1
{
"groupId":"TEST_01","lag":10,"detectionDate":"2021-02-26T21:42:30.010Z","tipo":"uno","topics":[
{
"name":"topic_01","valore":2
},{
"name":"topic_02","valore":4
}
]
}
文档 2
{
"groupId":"TEST_01","valore":4
},"valore":8
}
]
}
我必须按 groupId 和主题名称创建聚合,并在此聚合上计算值字段的平均值。但是用源码试了一下,得到的平均值结果是错误的。
有了以上文档一和二的数据,预期的结果应该是:
groupId | topicName | 平均 |
---|---|---|
TEST_01 | topic_01 | 3 |
TEST_01 | topic_02 | 6 |
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.terms("topicName")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));
解决方法
首先确保您的主题字段类型为“嵌套”,因为如果它是“对象”,则 topicName 和 valores 将被扁平化。这意味着您最终会得到一组 valores 和 topicNames,它们之间没有关系。
映射
{
"test_ynsanity" : {
"mappings" : {
"properties" : {
"detectionDate" : {
"type" : "date"
},"groupId" : {
"type" : "text","fields" : {
"keyword" : {
"type" : "keyword","ignore_above" : 256
}
}
},"lag" : {
"type" : "long"
},"tipo" : {
"type" : "text","topics" : {
"type" : "nested","properties" : {
"name" : {
"type" : "text","fields" : {
"keyword" : {
"type" : "keyword","ignore_above" : 256
}
}
},"valore" : {
"type" : "long"
}
}
}
}
}
}
}
提取数据
POST test_ynsanity/_doc
{
"groupId":"TEST_01","lag":10,"detectionDate":"2021-02-26T21:42:30.010Z","tipo":"uno","topics":[
{
"name":"topic_01","valore":2
},{
"name":"topic_02","valore":4
}
]
}
POST test_ynsanity/_doc
{
"groupId":"TEST_01","valore":4
},"valore":8
}
]
}
查询
POST test_ynsanity/_search
{
"size": 0,"aggs": {
"groups": {
"terms": {
"field": "groupId.keyword","size": 10
},"aggs": {
"topics": {
"nested": {
"path": "topics"
},"aggs": {
"topic_names": {
"terms": {
"field": "topics.name.keyword"
},"aggs": {
"topic_avg": {
"avg": {
"field": "topics.valore"
}
}
}
}
}
}
}
}
}
}
回复
{
"took" : 1,"timed_out" : false,"_shards" : {
"total" : 1,"successful" : 1,"skipped" : 0,"failed" : 0
},"hits" : {
"total" : {
"value" : 2,"relation" : "eq"
},"max_score" : null,"hits" : [ ]
},"aggregations" : {
"groups" : {
"doc_count_error_upper_bound" : 0,"sum_other_doc_count" : 0,"buckets" : [
{
"key" : "TEST_01","doc_count" : 2,"topics" : {
"doc_count" : 4,"topic_names" : {
"doc_count_error_upper_bound" : 0,"buckets" : [
{
"key" : "topic_01","NAME" : {
"value" : 3.0
}
},{
"key" : "topic_02","NAME" : {
"value" : 6.0
}
}
]
}
}
}
]
}
}
}
我现在无法访问 Java DSL,但查询应如下所示:
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.nested("agg","topics")
.terms("topic_names")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));