问题描述
我正在尝试开发一个基于Apache predictionio的项目,并按照this指南进行docker预测的dockerization。有一个问题。我想将Hbase与hadoop一起用于PIO的事件数据。因此,在docker目录中(在上面的链接存储库内),我正在创建一个hbase目录,在其中,我使用这些docker-compose文件。
docker-compose.base.yml
version: "3"
services:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
container_name: namenode
volumes:
- hadoop_namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=test
env_file:
- hbase/hadoop.env
ports:
- 50070:50070
datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
container_name: datanode
volumes:
- hadoop_datanode:/hadoop/dfs/data
environment:
SERVICE_PRECONDITION: "namenode:50070"
env_file:
- hbase/hadoop.env
ports:
- 50075:50075
resourcemanager:
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8
container_name: resourcemanager
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075"
env_file:
- hbase/hadoop.env
ports:
- 8088:8088
nodemanager1:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
container_name: nodemanager
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
env_file:
- hbase/hadoop.env
ports:
- 8042:8042
historyserver:
image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8
container_name: historyserver
volumes:
- hadoop_historyserver:/hadoop/yarn/timeline
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
env_file:
- hbase/hadoop.env
ports:
- 8188:8188
zoo:
image: zookeeper:3.4.10
container_name: zoo
hostname: zoo
restart: always
environment:
ZOO_MY_ID: 1
ZOO_SERVERS: server.1=0.0.0.0:2888:3888
ports:
- 2181:2181
hbase-master:
image: bde2020/hbase-master:1.0.0-hbase1.2.6
container_name: hbase-master
hostname: hbase-master
depends_on:
- zoo
env_file:
- hbase/hbase-distributed-local.env
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181"
ports:
- 16010:16010
hbase-region:
image: bde2020/hbase-regionserver:1.0.0-hbase1.2.6
container_name: hbase-regionserver
hostname: hbase-regionserver
env_file:
- hbase/hbase-distributed-local.env
environment:
HBASE_CONF_hbase_regionserver_hostname: hbase-region
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181 hbase-master:16010"
ports:
- 16030:16030
pio:
depends_on:
- hbase-master
environment:
PIO_STORAGE_SOURCES_HBASE_TYPE: hbase
PIO_STORAGE_SOURCES_HBASE_HOSTS: hbase-master
volumes:
hadoop_namenode:
hadoop_datanode:
hadoop_historyserver:
和docker-compose.event.yml;
version: "3"
services:
pio:
environment:
PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME: pio_event
PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE: HBASE
我也有hbase-distributed-local.env之类的环境文件;
HBASE_CONF_hbase_rootdir=hdfs://namenode:9000/hbase
HBASE_CONF_hbase_cluster_distributed=true
HBASE_CONF_hbase_zookeeper_quorum=zoo
HBASE_CONF_hbase_zookeeper_property_clientPort=2181
HBASE_CONF_hbase_cluster_distributed=true
HBASE_CONF_hbase_master=hbase-master:16000
HBASE_CONF_hbase_master_hostname=hbase-master
HBASE_CONF_hbase_master_port=16000
HBASE_CONF_hbase_master_info_port=16010
HBASE_CONF_hbase_regionserver_port=16020
HBASE_CONF_hbase_regionserver_info_port=16030
HBASE_MANAGES_ZK=false
和hadoop.env;
CORE_CONF_fs_defaultFS=hdfs://namenode:9000
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
CORE_CONF_io_compression_codecs=org.apache.hadoop.io.compress.SnappyCodec
HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_scheduler_class=org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___mb=8192
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___vcores=4
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_mapreduce_map_output_compress=true
YARN_CONF_mapred_map_output_compress_codec=org.apache.hadoop.io.compress.SnappyCodec
YARN_CONF_yarn_nodemanager_resource_memory___mb=16384
YARN_CONF_yarn_nodemanager_resource_cpu___vcores=8
YARN_CONF_yarn_nodemanager_disk___health___checker_max___disk___utilization___per___disk___percentage=98.5
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle
MAPRED_CONF_mapreduce_framework_name=yarn
MAPRED_CONF_mapred_child_java_opts=-Xmx4096m
MAPRED_CONF_mapreduce_map_memory_mb=4096
MAPRED_CONF_mapreduce_reduce_memory_mb=8192
MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m
MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m
docker-compose -f docker-compose.yml \
-f docker-compose.spark.yml \
-f elasticsearch/docker-compose.base.yml \
-f elasticsearch/docker-compose.Meta.yml \
-f hbase/docker-compose.base.yml \
-f hbase/docker-compose.event.yml \
-f localfs/docker-compose.model.yml \
up
这是完整的错误;
pio_1 | [INFO] [Management$] Inspecting predictionio...
pio_1 | [INFO] [Management$] predictionio 0.13.0 is installed at /usr/share/predictionio
pio_1 | [INFO] [Management$] Inspecting Apache Spark...
pio_1 | [INFO] [Management$] Apache Spark is installed at /usr/share/spark-2.2.2-bin-hadoop2.7
pio_1 | [INFO] [Management$] Apache Spark 2.2.2 detected (meets minimum requirement of 1.6.3)
pio_1 | [INFO] [Management$] Inspecting storage backend connections...
pio_1 | [INFO] [Storage$] Verifying Meta Data Backend (Source: ELASTICSEARCH)...
pio_1 | [INFO] [Storage$] Verifying Model Data Backend (Source: LOCALFS)...
pio_1 | [INFO] [Storage$] Verifying Event Data Backend (Source: HBASE)...
pio_1 | [ERROR] [RecoverableZooKeeper] ZooKeeper exists Failed after 1 attempts
pio_1 | [ERROR] [ZooKeeperWatcher] hconnection-0x78de58ea,quorum=localhost:2181,baseZNode=/hbase Received unexpected KeeperException,re-throwing exception
pio_1 | [WARN] [ZooKeeperRegistry] Can't retrieve clusterId from Zookeeper
pio_1 | [ERROR] [StorageClient] Cannot connect to ZooKeeper (ZooKeeper ensemble: localhost). Please make sure that the configuration is pointing at the correct ZooKeeper ensemble. By default,HBase manages its own ZooKeeper,so if you have not configured HBase to use an external ZooKeeper,that means your HBase is not started or configured properly.
pio_1 | [ERROR] [Storage$] Error initializing storage client for source HBASE.
pio_1 | org.apache.hadoop.hbase.ZooKeeperConnectionException: Can't connect to ZooKeeper
pio_1 | at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2358)
pio_1 | at org.apache.predictionio.data.storage.hbase.StorageClient.<init>(StorageClient.scala:53)
pio_1 | at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
pio_1 | at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
pio_1 | at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
pio_1 | at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getClient(Storage.scala:252)
pio_1 | at org.apache.predictionio.data.storage.Storage$.org$apache$predictionio$data$storage$Storage$$updateS2CM(Storage.scala:283)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1 | at scala.collection.mutable.HashMap.getorElseUpdate(HashMap.scala:79)
pio_1 | at org.apache.predictionio.data.storage.Storage$.sourcesToClientMeta(Storage.scala:244)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObjectFromrepo(Storage.scala:300)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1 | at org.apache.predictionio.data.storage.Storage$.verifyAllDataObjects(Storage.scala:384)
pio_1 | at org.apache.predictionio.tools.commands.Management$.status(Management.scala:156)
pio_1 | at org.apache.predictionio.tools.console.Pio$.status(Pio.scala:155)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:721)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1 | at scala.Option.map(Option.scala:146)
pio_1 | at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1 | at org.apache.predictionio.tools.console.Console.main(Console.scala)
pio_1 | Caused by: org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /hbase
pio_1 | at org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
pio_1 | at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
pio_1 | at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1045)
pio_1 | at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1073)
pio_1 | at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2349)
pio_1 | ... 22 more
pio_1 |
pio_1 |
pio_1 |
pio_1 | [ERROR] [Management$] Unable to connect to all storage backends successfully.
pio_1 | The following shows the error message from the storage backend.
pio_1 |
pio_1 | Data source HBASE was not properly initialized. (org.apache.predictionio.data.storage.StorageClientException)
pio_1 |
pio_1 | Dumping configuration of initialized storage backend sources.
pio_1 | Please make sure they are correct.
pio_1 |
pio_1 | Source Name: ELASTICSEARCH; Type: elasticsearch; Configuration: HOSTS -> elasticsearch,TYPE -> elasticsearch,SCHEMES -> http,PORTS -> 9200
pio_1 | Source Name: LOCALFS; Type: localfs; Configuration: PATH -> /work/pio_store/models,TYPE -> localfs
pio_1 | Source Name: HBASE; Type: (error); Configuration: (error)
pio_1 | [INFO] [Management$] Creating Event Server at 0.0.0.0:7070
pio_1 | [ERROR] [RecoverableZooKeeper] ZooKeeper exists Failed after 1 attempts
pio_1 | [ERROR] [ZooKeeperWatcher] hconnection-0x159a48a6,that means your HBase is not started or configured properly.
pio_1 | [ERROR] [Storage$] Error initializing storage client for source HBASE.
pio_1 | org.apache.hadoop.hbase.ZooKeeperConnectionException: Can't connect to ZooKeeper
pio_1 | at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2358)
pio_1 | at org.apache.predictionio.data.storage.hbase.StorageClient.<init>(StorageClient.scala:53)
pio_1 | at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
pio_1 | at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
pio_1 | at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
pio_1 | at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getClient(Storage.scala:252)
pio_1 | at org.apache.predictionio.data.storage.Storage$.org$apache$predictionio$data$storage$Storage$$updateS2CM(Storage.scala:283)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1 | at scala.collection.mutable.HashMap.getorElseUpdate(HashMap.scala:79)
pio_1 | at org.apache.predictionio.data.storage.Storage$.sourcesToClientMeta(Storage.scala:244)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObjectFromrepo(Storage.scala:300)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1 | at org.apache.predictionio.data.api.EventServer$.createEventServer(EventServer.scala:636)
pio_1 | at org.apache.predictionio.tools.commands.Management$.eventserver(Management.scala:77)
pio_1 | at org.apache.predictionio.tools.console.Pio$.eventserver(Pio.scala:124)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:708)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1 | at scala.Option.map(Option.scala:146)
pio_1 | at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1 | at org.apache.predictionio.tools.console.Console.main(Console.scala)
pio_1 | Caused by: org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /hbase
pio_1 | at org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
pio_1 | at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
pio_1 | at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1045)
pio_1 | at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1073)
pio_1 | at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2349)
pio_1 | ... 22 more
pio_1 |
pio_1 |
pio_1 |
pio_1 | Exception in thread "main" org.apache.predictionio.data.storage.StorageClientException: Data source HBASE was not properly initialized.
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$10.apply(Storage.scala:316)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$10.apply(Storage.scala:316)
pio_1 | at scala.Option.getorElse(Option.scala:121)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObjectFromrepo(Storage.scala:300)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1 | at org.apache.predictionio.data.api.EventServer$.createEventServer(EventServer.scala:636)
pio_1 | at org.apache.predictionio.tools.commands.Management$.eventserver(Management.scala:77)
pio_1 | at org.apache.predictionio.tools.console.Pio$.eventserver(Pio.scala:124)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:708)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1 | at scala.Option.map(Option.scala:146)
pio_1 | at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1 | at org.apache.predictionio.tools.console.Console.main(Console.scala)
我进入了所有容器,并用dev / tcp检查了host:port,所有端口似乎都打开了,但是有一个我无法弄清的问题。
PIO版本:0.13.0
ES版本:5.5.2
HBASE:1.2.6
HADOOP:2.7.7
有什么想法吗?
edit:在pio容器中出现此错误之后,我执行了-jps -l命令,这是输出,也许有帮助。
root@67662213df1e:/usr/share/predictionio# jps -l
1120 sun.tools.jps.Jps
926 org.apache.predictionio.tools.console.Console
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)