问题描述
简介
我正在尝试使用Zookeeper与SolrCloud一起玩。 我知道SolrCloud有自己的内置Zookeeper,但是由于不建议使用该设置,因此我模仿(或者至少希望如此)外部Zookeeper集成-Solr Cloud设置(3个ZK节点,2个Solr节点)。
为方便起见,我创建了以下docker-compose.yml
:
version: '3.8'
services:
zoo1:
image: library/zookeeper:3.5.7
container_name: zoo1
restart: always
hostname: zoo1
ports:
- 8184:8080
environment:
TZ: Europe/Paris
ZOO_MY_ID: 1
ZOO_SERVERS: server.1=0.0.0.0:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
networks:
- solr
command: >
sh -c "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime &&
echo $TZ > /etc/timezone &&
sed -i 's/autopurge.purgeInterval=0/autopurge.purgeInterval=1/g' /conf/zoo.cfg &&
echo 4lw.commands.whitelist=mntr,conf,ruok >> /conf/zoo.cfg &&
exec zkServer.sh start-foreground"
zoo2:
image: library/zookeeper:3.5.7
container_name: zoo2
restart: always
hostname: zoo2
ports:
- 8284:8080
environment:
TZ: Europe/Paris
ZOO_MY_ID: 2
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=0.0.0.0:2888:3888;2181 server.3=zoo3:2888:3888;2181
networks:
- solr
command: >
sh -c "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime &&
echo $TZ > /etc/timezone &&
sed -i 's/autopurge.purgeInterval=0/autopurge.purgeInterval=1/g' /conf/zoo.cfg &&
echo 4lw.commands.whitelist=mntr,ruok >> /conf/zoo.cfg &&
exec zkServer.sh start-foreground"
zoo3:
image: library/zookeeper:3.5.7
container_name: zoo3
restart: always
hostname: zoo3
ports:
- 8384:8080
environment:
TZ: Europe/Paris
ZOO_MY_ID: 3
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=0.0.0.0:2888:3888;2181
networks:
- solr
command: >
sh -c "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime &&
echo $TZ > /etc/timezone &&
sed -i 's/autopurge.purgeInterval=0/autopurge.purgeInterval=1/g' /conf/zoo.cfg &&
echo 4lw.commands.whitelist=mntr,ruok >> /conf/zoo.cfg &&
exec zkServer.sh start-foreground"
solr1:
image: library/solr:8.6.3
container_name: solr1
ports:
- "8981:8983"
environment:
ZK_HOST: zoo1:2181,zoo2:2181,zoo3:2181
networks:
- solr
depends_on:
- zoo1
- zoo2
- zoo3
solr2:
image: library/solr:8.6.3
container_name: solr2
ports:
- "8982:8983"
environment:
ZK_HOST: zoo1:2181,zoo3:2181
networks:
- solr
depends_on:
- zoo1
- zoo2
- zoo3
networks:
solr:
name: solr_zookeeper_cluster
因此,使用此文件,一切都会轻松愉快地启动。 我实际上有3个ZK节点,其中一个是领导者,还有2个Solr节点...
问题
但是(这是我的实际问题)Solr UI在显示ZK状态时表现有点怪异。
我在zkStatus
中总是有2个ZK实例,它们没有问题,但是有一个“不正常” ...
大多数时候,两个Solr节点在同一个Zookeeper节点上都有问题,但是一旦我开始玩耍(例如:停止领导者触发领导者选举并重新启动该特定节点),它就会变得非常随机。
初始启动后的屏幕截图:
触发领导人选举后的屏幕截图
某些节点日志
2020-10-14 09:31:18.597 INFO (main) [ ] o.e.j.s.Server Started @7571ms
2020-10-14 09:32:20.539 INFO (qtp247162961-18) [ ] o.a.s.c.TransientSolrCoreCacheDefault Allocating transient cache for 2147483647 transient cores
2020-10-14 09:32:20.540 INFO (qtp247162961-18) [ ] o.a.s.s.HttpSolrCall [admin] webapp=null path=/admin/cores params={indexInfo=false&wt=json&_=1602667940461} status=0 QTime=6
2020-10-14 09:32:20.552 WARN (qtp247162961-17) [ ] o.a.s.h.a.ZookeeperStatusHandler Failed talking to zookeeper 0.0.0.0:2181 => org.apache.solr.common.solrException: Failed talking to Zookeeper 0.0.0.0:2181
at org.apache.solr.handler.admin.ZookeeperStatusHandler.getZkRawResponse(ZookeeperStatusHandler.java:294)
org.apache.solr.common.solrException: Failed talking to Zookeeper 0.0.0.0:2181
at org.apache.solr.handler.admin.ZookeeperStatusHandler.getZkRawResponse(ZookeeperStatusHandler.java:294) ~[?:?]
at org.apache.solr.handler.admin.ZookeeperStatusHandler.monitorZookeeper(ZookeeperStatusHandler.java:238) ~[?:?]
at org.apache.solr.handler.admin.ZookeeperStatusHandler.getZkStatus(ZookeeperStatusHandler.java:144) ~[?:?]
at org.apache.solr.handler.admin.ZookeeperStatusHandler.handleRequestBody(ZookeeperStatusHandler.java:84) ~[?:?]
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:214) ~[?:?]
at org.apache.solr.servlet.HttpSolrCall.handleAdmin(HttpSolrCall.java:857) ~[?:?]
at org.apache.solr.servlet.HttpSolrCall.handleAdminRequest(HttpSolrCall.java:821) ~[?:?]
at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:566) ~[?:?]
at org.apache.solr.servlet.solrdispatchFilter.doFilter(SolrdispatchFilter.java:415) ~[?:?]
at org.apache.solr.servlet.solrdispatchFilter.doFilter(SolrdispatchFilter.java:345) ~[?:?]
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1596) ~[jetty-servlet-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:545) ~[jetty-servlet-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:590) ~[jetty-security-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1610) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1300) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:485) ~[jetty-servlet-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1580) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1215) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:221) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.InetAccessHandler.handle(InetAccessHandler.java:177) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:146) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:322) ~[jetty-rewrite-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.Server.handle(Server.java:500) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:383) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:547) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:375) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.server.httpconnection.onFillable(httpconnection.java:273) ~[jetty-server-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311) ~[jetty-io-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:103) ~[jetty-io-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:117) ~[jetty-io-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:336) ~[jetty-util-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:313) ~[jetty-util-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:171) ~[jetty-util-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:129) ~[jetty-util-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:375) ~[jetty-util-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:806) ~[jetty-util-9.4.27.v20200227.jar:9.4.27.v20200227]
at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:938) ~[jetty-util-9.4.27.v20200227.jar:9.4.27.v20200227]
at java.lang.Thread.run(UnkNown Source) [?:?]
Caused by: java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:?]
at java.net.AbstractPlainSocketImpl.doConnect(UnkNown Source) ~[?:?]
at java.net.AbstractPlainSocketImpl.connectToAddress(UnkNown Source) ~[?:?]
at java.net.AbstractPlainSocketImpl.connect(UnkNown Source) ~[?:?]
at java.net.socksSocketImpl.connect(UnkNown Source) ~[?:?]
at java.net.socket.connect(UnkNown Source) ~[?:?]
at java.net.socket.connect(UnkNown Source) ~[?:?]
at java.net.socket.<init>(UnkNown Source) ~[?:?]
at java.net.socket.<init>(UnkNown Source) ~[?:?]
at org.apache.solr.handler.admin.ZookeeperStatusHandler.getZkRawResponse(ZookeeperStatusHandler.java:285) ~[?:?]
... 46 more
2020-10-14 09:32:20.564 INFO (qtp247162961-22) [ ] o.a.s.s.HttpSolrCall [admin] webapp=null path=/admin/info/system params={wt=json&_=1602667940462} status=0 QTime=29
2020-10-14 09:32:20.573 INFO (qtp247162961-17) [ ] o.a.s.s.HttpSolrCall [admin] webapp=null path=/admin/zookeeper/status params={wt=json&_=1602667940521} status=0 QTime=39
2020-10-14 09:32:20.589 INFO (qtp247162961-20) [ ] o.a.s.h.a.CollectionsHandler Invoked Collection Action :list with params action=LIST&wt=json&_=1602667940462 and sendToOCPQueue=true
2020-10-14 09:32:20.589 INFO (qtp247162961-20) [ ] o.a.s.s.HttpSolrCall [admin] webapp=null path=/admin/collections params={action=LIST&wt=json&_=1602667940462} status=0 QTime=0
2020-10-14 09:32:20.612 INFO (qtp247162961-18) [ ] o.a.s.h.a.CollectionsHandler Invoked Collection Action :listaliases with params action=LISTALIASES&wt=json&_=1602667940462 and sendToOCPQueue=true
2020-10-14 09:32:20.615 INFO (qtp247162961-18) [ ] o.a.s.s.HttpSolrCall [admin] webapp=null path=/admin/collections params={action=LISTALIASES&wt=json&_=1602667940462} status=0 QTime=2
解决方法
您不应使用0.0.0.0
,而应使用通过dockerfile定义的主机名。因此,在zoo1的配置服务器1上应该是zoo1,在zoo2的服务器2上应该是zoo2,在zoo3的服务器上应该是zoo3。