使用HBase运行Apache PredictionIO时出错

问题描述

我正在尝试开发一个基于Apache predictionio的项目,并按照this指南进行docker预测的dockerization。有一个问题。我想将Hbase与hadoop一起用于PIO的事件数据。因此,在docker目录中(在上面的链接存储库内),我正在创建一个hbase目录,在其中,我使用这些docker-compose文件

docker-compose.base.yml


version: "3"
services:
  namenode:
    image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
    container_name: namenode
    volumes:
      - hadoop_namenode:/hadoop/dfs/name
    environment:
      - CLUSTER_NAME=test
    env_file:
      - hbase/hadoop.env
    ports:
      - 50070:50070

  datanode:
    image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
    container_name: datanode
    volumes:
      - hadoop_datanode:/hadoop/dfs/data
    environment:
      SERVICE_PRECONDITION: "namenode:50070"
    env_file:
      - hbase/hadoop.env
    ports:
      - 50075:50075

  resourcemanager:
    image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8
    container_name: resourcemanager
    environment:
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075"
    env_file:
      - hbase/hadoop.env
    ports:
      - 8088:8088

  nodemanager1:
    image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
    container_name: nodemanager
    environment:
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
    env_file:
      - hbase/hadoop.env
    ports:
      - 8042:8042

  historyserver:
    image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8
    container_name: historyserver
    volumes:
      - hadoop_historyserver:/hadoop/yarn/timeline
    environment:
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
    env_file:
      - hbase/hadoop.env
    ports:
      - 8188:8188
  
  zoo:
    image: zookeeper:3.4.10
    container_name: zoo
    hostname: zoo
    restart: always
    environment:
      ZOO_MY_ID: 1
      ZOO_SERVERS: server.1=0.0.0.0:2888:3888
    ports:
      - 2181:2181

  hbase-master:
    image: bde2020/hbase-master:1.0.0-hbase1.2.6
    container_name: hbase-master
    hostname: hbase-master
    depends_on:
      - zoo
    env_file:
      - hbase/hbase-distributed-local.env
    environment:
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181"
    ports:
      - 16010:16010

  hbase-region:
    image: bde2020/hbase-regionserver:1.0.0-hbase1.2.6
    container_name: hbase-regionserver
    hostname: hbase-regionserver
    env_file:
      - hbase/hbase-distributed-local.env
    environment:
      HBASE_CONF_hbase_regionserver_hostname: hbase-region
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181 hbase-master:16010"
    ports:
      - 16030:16030
  pio:
    depends_on:
      - hbase-master
    environment:
      PIO_STORAGE_SOURCES_HBASE_TYPE: hbase
      PIO_STORAGE_SOURCES_HBASE_HOSTS: hbase-master

volumes:
  hadoop_namenode:
  hadoop_datanode:
  hadoop_historyserver:

和docker-compose.event.yml;

version: "3"
services:
  pio:
    environment:
        PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME: pio_event
        PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE: HBASE

我也有hbase-distributed-local.env之类的环境文件

HBASE_CONF_hbase_rootdir=hdfs://namenode:9000/hbase
HBASE_CONF_hbase_cluster_distributed=true
HBASE_CONF_hbase_zookeeper_quorum=zoo
HBASE_CONF_hbase_zookeeper_property_clientPort=2181
HBASE_CONF_hbase_cluster_distributed=true
HBASE_CONF_hbase_master=hbase-master:16000
HBASE_CONF_hbase_master_hostname=hbase-master
HBASE_CONF_hbase_master_port=16000
HBASE_CONF_hbase_master_info_port=16010
HBASE_CONF_hbase_regionserver_port=16020
HBASE_CONF_hbase_regionserver_info_port=16030

HBASE_MANAGES_ZK=false

和hadoop.env;

CORE_CONF_fs_defaultFS=hdfs://namenode:9000
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
CORE_CONF_io_compression_codecs=org.apache.hadoop.io.compress.SnappyCodec

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_scheduler_class=org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___mb=8192
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___vcores=4
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_mapreduce_map_output_compress=true
YARN_CONF_mapred_map_output_compress_codec=org.apache.hadoop.io.compress.SnappyCodec
YARN_CONF_yarn_nodemanager_resource_memory___mb=16384
YARN_CONF_yarn_nodemanager_resource_cpu___vcores=8
YARN_CONF_yarn_nodemanager_disk___health___checker_max___disk___utilization___per___disk___percentage=98.5
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle

MAPRED_CONF_mapreduce_framework_name=yarn
MAPRED_CONF_mapred_child_java_opts=-Xmx4096m
MAPRED_CONF_mapreduce_map_memory_mb=4096
MAPRED_CONF_mapreduce_reduce_memory_mb=8192
MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m
MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m

因此,我再次按照文档操作,试图用这些命令编写内容

docker-compose -f docker-compose.yml \
  -f docker-compose.spark.yml \
  -f elasticsearch/docker-compose.base.yml \
  -f elasticsearch/docker-compose.Meta.yml \
  -f hbase/docker-compose.base.yml \
  -f hbase/docker-compose.event.yml \
  -f localfs/docker-compose.model.yml \
  up

这是完整的错误

pio_1              | [INFO] [Management$] Inspecting predictionio...
pio_1              | [INFO] [Management$] predictionio 0.13.0 is installed at /usr/share/predictionio
pio_1              | [INFO] [Management$] Inspecting Apache Spark...
pio_1              | [INFO] [Management$] Apache Spark is installed at /usr/share/spark-2.2.2-bin-hadoop2.7
pio_1              | [INFO] [Management$] Apache Spark 2.2.2 detected (meets minimum requirement of 1.6.3)
pio_1              | [INFO] [Management$] Inspecting storage backend connections...
pio_1              | [INFO] [Storage$] Verifying Meta Data Backend (Source: ELASTICSEARCH)...
pio_1              | [INFO] [Storage$] Verifying Model Data Backend (Source: LOCALFS)...
pio_1              | [INFO] [Storage$] Verifying Event Data Backend (Source: HBASE)...
pio_1              | [ERROR] [RecoverableZooKeeper] ZooKeeper exists Failed after 1 attempts
pio_1              | [ERROR] [ZooKeeperWatcher] hconnection-0x78de58ea,quorum=localhost:2181,baseZNode=/hbase Received unexpected KeeperException,re-throwing exception
pio_1              | [WARN] [ZooKeeperRegistry] Can't retrieve clusterId from Zookeeper
pio_1              | [ERROR] [StorageClient] Cannot connect to ZooKeeper (ZooKeeper ensemble: localhost). Please make sure that the configuration is pointing at the correct ZooKeeper ensemble. By default,HBase manages its own ZooKeeper,so if you have not configured HBase to use an external ZooKeeper,that means your HBase is not started or configured properly.
pio_1              | [ERROR] [Storage$] Error initializing storage client for source HBASE.
pio_1              | org.apache.hadoop.hbase.ZooKeeperConnectionException: Can't connect to ZooKeeper
pio_1              |    at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2358)
pio_1              |    at org.apache.predictionio.data.storage.hbase.StorageClient.<init>(StorageClient.scala:53)
pio_1              |    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
pio_1              |    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
pio_1              |    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
pio_1              |    at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getClient(Storage.scala:252)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.org$apache$predictionio$data$storage$Storage$$updateS2CM(Storage.scala:283)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1              |    at scala.collection.mutable.HashMap.getorElseUpdate(HashMap.scala:79)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.sourcesToClientMeta(Storage.scala:244)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObjectFromrepo(Storage.scala:300)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.verifyAllDataObjects(Storage.scala:384)
pio_1              |    at org.apache.predictionio.tools.commands.Management$.status(Management.scala:156)
pio_1              |    at org.apache.predictionio.tools.console.Pio$.status(Pio.scala:155)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:721)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1              |    at scala.Option.map(Option.scala:146)
pio_1              |    at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1              |    at org.apache.predictionio.tools.console.Console.main(Console.scala)
pio_1              | Caused by: org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /hbase
pio_1              |    at org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
pio_1              |    at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
pio_1              |    at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1045)
pio_1              |    at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1073)
pio_1              |    at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2349)
pio_1              |    ... 22 more
pio_1              | 
pio_1              | 
pio_1              | 
pio_1              | [ERROR] [Management$] Unable to connect to all storage backends successfully.
pio_1              | The following shows the error message from the storage backend.
pio_1              | 
pio_1              | Data source HBASE was not properly initialized. (org.apache.predictionio.data.storage.StorageClientException)
pio_1              | 
pio_1              | Dumping configuration of initialized storage backend sources.
pio_1              | Please make sure they are correct.
pio_1              | 
pio_1              | Source Name: ELASTICSEARCH; Type: elasticsearch; Configuration: HOSTS -> elasticsearch,TYPE -> elasticsearch,SCHEMES -> http,PORTS -> 9200
pio_1              | Source Name: LOCALFS; Type: localfs; Configuration: PATH -> /work/pio_store/models,TYPE -> localfs
pio_1              | Source Name: HBASE; Type: (error); Configuration: (error)
pio_1              | [INFO] [Management$] Creating Event Server at 0.0.0.0:7070
pio_1              | [ERROR] [RecoverableZooKeeper] ZooKeeper exists Failed after 1 attempts
pio_1              | [ERROR] [ZooKeeperWatcher] hconnection-0x159a48a6,that means your HBase is not started or configured properly.
pio_1              | [ERROR] [Storage$] Error initializing storage client for source HBASE.
pio_1              | org.apache.hadoop.hbase.ZooKeeperConnectionException: Can't connect to ZooKeeper
pio_1              |    at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2358)
pio_1              |    at org.apache.predictionio.data.storage.hbase.StorageClient.<init>(StorageClient.scala:53)
pio_1              |    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
pio_1              |    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
pio_1              |    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
pio_1              |    at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getClient(Storage.scala:252)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.org$apache$predictionio$data$storage$Storage$$updateS2CM(Storage.scala:283)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1              |    at scala.collection.mutable.HashMap.getorElseUpdate(HashMap.scala:79)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.sourcesToClientMeta(Storage.scala:244)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObjectFromrepo(Storage.scala:300)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1              |    at org.apache.predictionio.data.api.EventServer$.createEventServer(EventServer.scala:636)
pio_1              |    at org.apache.predictionio.tools.commands.Management$.eventserver(Management.scala:77)
pio_1              |    at org.apache.predictionio.tools.console.Pio$.eventserver(Pio.scala:124)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:708)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1              |    at scala.Option.map(Option.scala:146)
pio_1              |    at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1              |    at org.apache.predictionio.tools.console.Console.main(Console.scala)
pio_1              | Caused by: org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /hbase
pio_1              |    at org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
pio_1              |    at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
pio_1              |    at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1045)
pio_1              |    at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1073)
pio_1              |    at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2349)
pio_1              |    ... 22 more
pio_1              | 
pio_1              | 
pio_1              | 
pio_1              | Exception in thread "main" org.apache.predictionio.data.storage.StorageClientException: Data source HBASE was not properly initialized.
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$10.apply(Storage.scala:316)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$10.apply(Storage.scala:316)
pio_1              |    at scala.Option.getorElse(Option.scala:121)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObjectFromrepo(Storage.scala:300)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1              |    at org.apache.predictionio.data.api.EventServer$.createEventServer(EventServer.scala:636)
pio_1              |    at org.apache.predictionio.tools.commands.Management$.eventserver(Management.scala:77)
pio_1              |    at org.apache.predictionio.tools.console.Pio$.eventserver(Pio.scala:124)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:708)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1              |    at scala.Option.map(Option.scala:146)
pio_1              |    at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1              |    at org.apache.predictionio.tools.console.Console.main(Console.scala)

我进入了所有容器,并用dev / tcp检查了host:port,所有端口似乎都打开了,但是有一个我无法弄清的问题。

PIO版本:0.13.0
ES版本:5.5.2
HBASE:1.2.6
HADOOP:2.7.7

有什么想法吗?

edit:在pio容器中出现此错误之后,我执行了-jps -l命令,这是输出,也许有帮助。

root@67662213df1e:/usr/share/predictionio# jps -l
1120 sun.tools.jps.Jps
926 org.apache.predictionio.tools.console.Console

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)