问题描述
我有一个用例,我需要从Elasticsearch获取所有唯一的用户ID,并且应该按时间戳进行排序。
我当前使用的是带有子聚合的复合词聚合,它将返回最新的时间戳。
(由于它会减慢脚本速度,因此我无法在客户端对其进行排序)
弹性搜索中的样本数据
{
"_index": "logstash-2020.10.29","_type": "doc","_id": "L0Urc3UBttS_uoEtubDk","_version": 1,"_score": null,"_source": {
"@version": "1","@timestamp": "2020-10-29T06:56:00.000Z","timestamp_string": "1603954560","search_query": "example 3","user_uuid": "asdfrghcwehf","browsing_url": "https://www.google.com/search?q=example+3",},"fields": {
"@timestamp": [
"2020-10-29T06:56:00.000Z"
]
},"sort": [
1603954560000
]
}
预期输出:
[
{
"key" : "bjvexyducsls","doc_count" : 846,"1" : {
"value" : 1.603948557E12,"value_as_string" : "2020-10-29T05:15:57.000Z"
}
},{
"key" : "lhmsbq2osski","doc_count" : 420,"1" : {
"value" : 1.6039476E12,"value_as_string" : "2020-10-29T05:00:00.000Z"
}
},{
"key" : "m2wiaufcbvvi","doc_count" : 1,"1" : {
"value" : 1.603893635E12,"value_as_string" : "2020-10-28T14:00:35.000Z"
}
},{
"key" : "rrm3vd5ovqwg","1" : {
"value" : 1.60389362E12,"value_as_string" : "2020-10-28T14:00:20.000Z"
}
},{
"key" : "x42lk4t3frfc","doc_count" : 72,"1" : {
"value" : 1.60389318E12,"value_as_string" : "2020-10-28T13:53:00.000Z"
}
}
]
解决方法
添加包含索引数据,映射,搜索查询和搜索结果的工作示例
索引映射:
{
"mappings":{
"properties":{
"user":{
"type":"keyword"
},"date":{
"type":"date"
}
}
}
}
索引数据:
{
"date": "2015-01-01","user": "user1"
}
{
"date": "2014-01-01","user": "user2"
}
{
"date": "2015-01-11","user": "user3"
}
搜索查询:
{
"size": 0,"aggs": {
"user_id": {
"terms": {
"field": "user","order": {
"sort_user": "asc"
}
},"aggs": {
"sort_user": {
"min": {
"field": "date"
}
}
}
}
}
}
搜索结果:
"aggregations": {
"user_id": {
"doc_count_error_upper_bound": 0,"sum_other_doc_count": 0,"buckets": [
{
"key": "user2","doc_count": 1,"sort_user": {
"value": 1.3885344E12,"value_as_string": "2014-01-01T00:00:00.000Z"
}
},{
"key": "user1","sort_user": {
"value": 1.4200704E12,"value_as_string": "2015-01-01T00:00:00.000Z"
}
},{
"key": "user3","sort_user": {
"value": 1.4209344E12,"value_as_string": "2015-01-11T00:00:00.000Z"
}
}
]
}