java将字符串数据映射到python中的字典

问题描述

我正在从数据源获取 Java 映射字符串。

{0={_shards={total=1,失败=0,成功=1,跳过=0}, 命中={命中=[{_index=filebeat-7.10.0-2021.02.02-000001,_type=_doc,_source={input={type=log},agent={hostname=ubuntu_fresh,name=ubuntu_fresh,id=879f36f2-4ade-47b6-a7b9-7972634c7b8c,类型=文件节拍,ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63, 版本=7.10.0},@timestamp=2021-02-04T12:36:33.475Z, ecs={version=1.6.0},log={file={path=/var/log/auth.log},偏移=46607},服务={类型=系统},主机={主机名=ubuntu_fresh, os={kernel=4.15.0-135-generic,代号=仿生,名称=Ubuntu, 家庭=debian,版本=18.04.1 LTS(仿生海狸),平台=ubuntu}, 容器化=假,ip=[10.0.2.15,fe80::a00:27ff:fe82:f598, 192.168.56.22,fe80::a00:27ff:fe32:fab0],名称=ubuntu_fresh,id=cdfcdf6a39d44b98b2aa51700134f415,mac=[08:00:27:82:f5 08:00:27:32:fa:b0],架构=x86_64},文件集={name=auth}, message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]:密码失败 root 来自 192.168.56.1 端口 35830 ssh2,错误={message=Provided Grok 表达式与字段值不匹配:[Feb 4 12:36:28 ubuntu_fresh sshd[2662]: 来自 192.168.56.1 端口 35830 的 root 密码失败 ssh2]},事件={摄取=2021-02-04T12:36:39.482598548Z, 时区=+00:00,模块=系统,数据集=system.auth}}, _id=nNALbXcBbfKg8Fh6Zci7,_score=25.188179}],total={value=1,relation=eq},max_score=25.188179},take=1,timed_out=false}}

我没有在java中转换它的特权。 我有一个 python 应用程序,我想以 python 字典的形式访问该数据。 所以想把它转换成python字典。

解决方法

Java 集合(.toString()Map 等)的 List 是有损的,因为它不会消除分隔符的歧义。因此,无法从 Map.toString() 的输出中 100% 可靠地重建数据结构。但是,如果对问题应用了一些约束:

  1. 键和值不包含某些字符(大约 {}=[],"
  2. 数组不包含原始值和对象/数组的混合

然后我们可以在某种程度上可靠地将 toString() 的输出转换为 JSON,然后将 JSON 解析为 Python 数据结构。我不会在生产中使用此代码,但只要您知道它可能会损坏,它在某些情况下可能很有用:

TEST_VALUE = "{0={_shards={total=1,failed=0,successful=1,skipped=0},hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001,_type=_doc,_source={input={type=log},agent={hostname=ubuntu_fresh,name=ubuntu_fresh,id=879f36f2-4ade-47b6-a7b9-7972634c7b8c,type=filebeat,ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63,version=7.10.0},@timestamp=2021-02-04T12:36:33.475Z,ecs={version=1.6.0},log={file={path=/var/log/auth.log},offset=46607},service={type=system},host={hostname=ubuntu_fresh,os={kernel=4.15.0-135-generic,codename=bionic,name=Ubuntu,family=debian,version=18.04.1 LTS (Bionic Beaver),platform=ubuntu},containerized=false,ip=[10.0.2.15,fe80::a00:27ff:fe82:f598,192.168.56.22,fe80::a00:27ff:fe32:fab0],id=cdfcdf6a39d44b98b2aa51700134f415,mac=[08:00:27:82:f5:98,08:00:27:32:fa:b0],architecture=x86_64},fileset={name=auth},message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2,error={message=Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]},event={ingested=2021-02-04T12:36:39.482598548Z,timezone=+00:00,module=system,dataset=system.auth}},_id=nNALbXcBbfKg8Fh6Zci7,_score=25.188179}],total={value=1,relation=eq},max_score=25.188179},took=1,timed_out=false}}"

def quote_value_array_values(match):
    s = match.group()
    qvalues = [f'"{value}"' for value in s.split(r",")]
    return ",".join(qvalues)

def javastr_to_jsonstr(s):
    import re
    s = re.sub(r"(?<==\[)[^{\[\]]+(?=\])",quote_value_array_values,s)
    s = re.sub(r'(?<={)([^"=]+)[=:](?!{|\[)([^,}]+)',r'"\1":"\2"',s)
    s = re.sub(r'(?<=,)([^"=]+)[=:](?!{|\[)([^,s)
    s = re.sub(r'(?<={)([^"=]+)=(?!")',r'"\1":',)([^"=]+)=(?!")',s)
    return s

import json
json_str = javastr_to_jsonstr(TEST_VALUE)
json_obj = json.loads(json_str)
print(json.dumps(json_obj,indent=1))

输出:

{
 "0": {
  "_shards": {
   "total": "1","failed": "0","successful": "1","skipped": "0"
  },"hits": {
   "hits": [
    {
     "_index": "filebeat-7.10.0-2021.02.02-000001","_type": "_doc","_source": {
      "input": {
       "type": "log"
      },"agent": {
       "hostname": "ubuntu_fresh","name": "ubuntu_fresh","id": "879f36f2-4ade-47b6-a7b9-7972634c7b8c","type": "filebeat","ephemeral_id": "5676523f-bc61-4c12-b319-8b463348ba63","version": "7.10.0"
      },"@timestamp": "2021-02-04T12:36:33.475Z","ecs": {
       "version": "1.6.0"
      },"log": {
       "file": {
        "path": "/var/log/auth.log"
       },"offset": "46607"
      },"service": {
       "type": "system"
      },"host": {
       "hostname": "ubuntu_fresh","os": {
        "kernel": "4.15.0-135-generic","codename": "bionic","name": "Ubuntu","family": "debian","version": "18.04.1 LTS (Bionic Beaver)","platform": "ubuntu"
       },"containerized": "false","ip": [
        "10.0.2.15","fe80::a00:27ff:fe82:f598","192.168.56.22","fe80::a00:27ff:fe32:fab0"
       ],"id": "cdfcdf6a39d44b98b2aa51700134f415","mac": [
        "08:00:27:82:f5:98","08:00:27:32:fa:b0"
       ],"architecture": "x86_64"
      },"fileset": {
       "name": "auth"
      },"message": "Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2","error": {
       "message": "Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]"
      },"event": {
       "ingested": "2021-02-04T12:36:39.482598548Z","timezone": "+00:00","module": "system","dataset": "system.auth"
      }
     },"_id": "nNALbXcBbfKg8Fh6Zci7","_score": "25.188179"
    }
   ],"total": {
    "value": "1","relation": "eq"
   },"max_score": "25.188179"
  },"took": "1","timed_out": "false"
 }
}