问题描述
我遇到来自 Varnish (6.5) 的针对特定路由的间歇性空回复或连接重置错误。这是有问题的 VCL(更大的 VCL 集的一部分,但这是路由使用的):
vcl 4.1;
import gcis;
import std;
include "/etc/varnish/conf.d/backend_default.vcl";
include "/etc/varnish/conf.d/acl.vcl";
sub vcl_init {
new idmap = gcis.idmap();
}
sub vcl_recv {
if (!client.ip ~ internal_acl) {
return (synth (
405,"Client IP " + client.ip +
" is not allowed to access this service."));
}
}
sub vcl_deliver {
// If the request was for reloading the ACL,reload the list here.
if (req.url == "/svc/reload_acl") {
std.log(
idmap.load(std.getenv("GCISWG_ACL_FILE"))
+ " restricted identifiers reloaded from database."
);
ban ("req.url ~ ^/iiif/");
ban ("req.url == /svc/dump_acl");
}
}
如果我重复发送以下请求:
curl -iXPOST varnish/svc/reload_acl
有时我会得到预期的响应;有时我得到 curl: (52) Empty reply from server
,有时我得到 curl: (56) Recv failure: Connection reset by peer
。
空回复的 varnishlog 条目如下所示:
varnish_1 | * << BeReq >> 9
varnish_1 | - Begin bereq 8 pass
varnish_1 | - VCL_use svc
varnish_1 | - Timestamp Start: 1623449680.866799 0.000000 0.000000
varnish_1 | - BereqMethod POST
varnish_1 | - BereqURL /svc/reload_acl
varnish_1 | - BereqProtocol HTTP/1.1
varnish_1 | - BereqHeader User-Agent: curl/7.64.0
varnish_1 | - BereqHeader Accept: */*
varnish_1 | - BereqHeader X-Forwarded-For: 172.18.0.5
varnish_1 | - BereqHeader host: varnish
varnish_1 | - BereqHeader X-Varnish: 9
varnish_1 | - VCL_call BACKEND_FETCH
varnish_1 | - VCL_return fetch
varnish_1 | - BackendOpen 32 default 172.18.0.8 8080 172.18.0.9 34554 reuse
varnish_1 | - Timestamp Bereq: 1623449680.866890 0.000091 0.000091
varnish_1 | - Timestamp Beresp: 1623449680.878314 0.011515 0.011424
varnish_1 | - BerespProtocol HTTP/1.1
varnish_1 | - BerespStatus 201
varnish_1 | - BerespReason CREATED
varnish_1 | - BerespHeader Server: Nginx/1.18.0
varnish_1 | - BerespHeader Date: Fri,11 Jun 2021 22:14:40 GMT
varnish_1 | - BerespHeader Content-Type: text/html; charset=utf-8
varnish_1 | - BerespHeader Content-Length: 31
varnish_1 | - BerespHeader Connection: keep-alive
varnish_1 | - VCL_call BACKEND_RESPONSE
varnish_1 | - VCL_return deliver
varnish_1 | - Filters
varnish_1 | - Storage malloc Transient
varnish_1 | - Fetch_Body 3 length stream
varnish_1 | - BackendClose 32 default recycle
varnish_1 | - Timestamp BerespBody: 1623449680.878392 0.011593 0.000078
varnish_1 | - Length 31
varnish_1 | - BereqAcct 129 0 129 167 31 198
varnish_1 | - End
varnish_1 |
varnish_1 | * << Session >> 7
varnish_1 | - Begin sess 0 HTTP/1
varnish_1 | - SessOpen 172.18.0.5 57306 nossl 172.18.0.9 80 1623449680.866658 30
varnish_1 | - Link req 8 rxreq
varnish_1 | - VSL flush
varnish_1 | - End synth
varnish_1 |
varnish_1 | * << Request >> 8
varnish_1 | - Begin req 7 rxreq
varnish_1 | - Timestamp Start: 1623449680.866744 0.000000 0.000000
varnish_1 | - Timestamp Req: 1623449680.866744 0.000000 0.000000
varnish_1 | - VCL_use main
varnish_1 | - ReqStart 172.18.0.5 57306 nossl
varnish_1 | - ReqMethod POST
varnish_1 | - ReqURL /svc/reload_acl
varnish_1 | - ReqProtocol HTTP/1.1
varnish_1 | - ReqHeader Host: varnish
varnish_1 | - ReqHeader User-Agent: curl/7.64.0
varnish_1 | - ReqHeader Accept: */*
varnish_1 | - ReqHeader X-Forwarded-For: 172.18.0.5
varnish_1 | - VCL_call RECV
varnish_1 | - VCL_use svc via svc_vcl
varnish_1 | - VCL_return vcl
varnish_1 | - ReqHeader X-Forwarded-For: 172.18.0.5
varnish_1 | - VCL_call RECV
varnish_1 | - VCL_acl MATCH internal_acl "172.0.0.0/8"
varnish_1 | - ReqUnset Host: varnish
varnish_1 | - ReqHeader host: varnish
varnish_1 | - VCL_return pass
varnish_1 | Log abandoned (vsl)
varnish_1 | Log reacquired
varnish_1 | - VCL_call HASH
varnish_1 | - VCL_return lookup
varnish_1 | - VCL_call PASS
varnish_1 | - VCL_return fetch
varnish_1 | - Link bereq 9 pass
varnish_1 | - Timestamp Fetch: 1623449680.878425 0.011680 0.011680
varnish_1 | - RespProtocol HTTP/1.1
varnish_1 | - RespStatus 201
varnish_1 | - RespReason CREATED
varnish_1 | - RespHeader Server: Nginx/1.18.0
varnish_1 | - RespHeader Date: Fri,11 Jun 2021 22:14:40 GMT
varnish_1 | - RespHeader Content-Type: text/html; charset=utf-8
varnish_1 | - RespHeader Content-Length: 31
varnish_1 | - RespHeader X-Varnish: 8
varnish_1 | - RespHeader Age: 0
varnish_1 | - RespHeader Via: 1.1 varnish (Varnish/6.5)
varnish_1 | - VCL_call DELIVER
varnish_1 | - VSL flush
varnish_1 | - End synth
会发生什么?竞争条件?我在交付子文件中遗漏了什么吗?顺便说一句,我也注意到了这些没有禁止命令的问题。
感谢您的帮助。
更新:按照评论者的建议,我检查了恐慌转储:
Panic at: Mon,14 Jun 2021 17:25:36 GMT
Wrong turn at cache/cache_main.c:287:
Signal 11 (Segmentation fault) received at 0 si_code 128
version = varnish-6.5.1 revision 1dae23376bb5ea7a6b8e9e4b9ed95cdc9469fb64,vrt api = 12.0
ident = Linux,5.12.6-arch1-1,x86_64,-junix,-sdefault,-sfile,-hcritbit,epoll
Now = 148500.767810 (mono),1623691535.535737 (real)
Backtrace:
ip=0x55c79cbc5b19,sp=0x7f941dbf8460 <PAN_already+0x1304>
ip=0x55c79cc1c8de,sp=0x7f941dbf85a0 <VAS_Fail+0x22>
ip=0x55c79cbc1cb2,sp=0x7f941dbf85e0 <THR_GetRequest+0x14a>
ip=0x7f941dd75c4b,sp=0x7f941dbf8ac0 <sigwaitinfo+0x8>
pthread.self = 0x7f9417c63b38
pthread.name = (cache-worker)
pthread.attr = {
guard = 8192,stack_bottom = 0x7f9417c55000,stack_top = 0x7f9417c63b08,stack_size = 60168,}
thr.req = 0x7f93f736eb20 {
vxid = 32770,transport = HTTP/1 {
state = HTTP1::Proc
}
step = R_STP_DELIVER,req_body = none,restarts = 0,esi_level = 0,sp = 0x7f93f737f030 {
fd = 28,vxid = 32769,t_open = 1623691535.525822,t_idle = 1623691535.525822,ws = 0x7f93f737f070 {
id = \"ses\",{s,f,r,e} = {0x7f93f737f0c8,+96,+576},},transport = HTTP/1 {
state = HTTP1::Proc
}
client = 172.18.0.5 57692 :80,worker = 0x7f9417c63970 {
ws = 0x7f9417c63a18 {
id = \"wrk\",e} = {0x7f9417c62f50,+0,+2040},VCL::method = inside DELIVER,VCL::return = 0x0,VCL::methods = {RECV,PASS,HASH,DELIVER},ws = 0x7f93f736ec70 {
id = \"req\",e} = {0x7f93f7370bd0,+168,+57128},http_conn = 0x7f93f7370b48 {
fd = 28 (@0x7f93f737f054),doclose = NULL,ws = 0x7f93f736ec70 {
[Already dumped,see above]
},{rxbuf_b,rxbuf_e} = {0x7f93f7370bd0,0x7f93f7370c26},{pipeline_b,pipeline_e} = {0,0},content_length = -1,body_status = none,first_byte_timeout = 0.000000,between_bytes_timeout = 0.000000,http[req] = 0x7f93f736ed10 {
ws = 0x7f93f736ec70 {
[Already dumped,hdrs {
\"POST\",\"/svc/reload_acl\",\"HTTP/1.1\",\"User-Agent: curl/7.64.0\",\"Accept: */*\",\"X-Forwarded-For: 172.18.0.5\",\"host: varnish\",http[resp] = 0x7f93f736f600 {
ws = 0x7f93f736ec70 {
[Already dumped,hdrs {
\"HTTP/1.1\",\"201\",\"CREATED\",\"Server: Nginx/1.18.0\",\"Date: Mon,14 Jun 2021 17:25:35 GMT\",\"Content-Type: text/html; charset=utf-8\",\"Content-Length: 31\",\"X-Varnish: 32770\",\"Age: 0\",\"Via: 1.1 varnish (Varnish/6.5)\",vdc = 0x7f93f7370b28 {
nxt = 0,retval = 0,vcl = {
name = \"svc\",busy = 1,discard = 0,state = auto,temp = warm,conf = {
Syntax = \"41\",srcname = {
\"/etc/varnish/conf.d/svc.vcl\",\"Builtin\",\"/etc/varnish/conf.d/backend_default.vcl\",\"/etc/varnish/conf.d/acl.vcl\",instances = {
\"idmap\" = 0x7f93f75fe070,objcore[REQ] = 0x7f93f75d51f0 {
refcnt = 1,flags = {hfm,private},exp_flags = {},exp = {1623691535.535603,-1.000000,0.000000,0.000000},objhead = 0x7f941dc16890,stevedore = 0x55c79cc9dee0 (malloc Transient) {
Simple = 0x7f93f75d58d0,Obj = 0x7f93f771ad98 {priv=0x7f93f771ad90,ptr=0x7f93f75d58d0,len=264,space=264},LEN = 0x000000000000001f,VXID = 0x00008003,FLAGS = 0x00,GZIPBITS = 0x0...0,LASTMODIFIED = 0x41d831e443c00000,vary = {len=0,ptr=0},HEADERS = {len=144,ptr=0x7f93f75d5948},Body = 0x7f93f7593818 {priv=0x7f93f7593810,ptr=0x7f93f75d5c90,len=31,space=31},flags = {
},privs = 0x7f93f736ed00 {
},top = 0x7f93f7370bb0 {
req = 0x7f93f736eb20 {
[Already dumped,privs = 0x7f93f7370bc8 {
},vcl0 = {
name = \"main\",conf = {
Syntax = \"41\",srcname = {
\"/etc/varnish/conf.d/main.vcl\",\"/etc/varnish/conf.d/synth.vcl\",instances = {
\"idmap\" = 0x7f93f75fe070,thr.busyobj = 0 {
},vmods = {
blob = {0x7f941860edb0,Varnish 6.5.1 1dae23376bb5ea7a6b8e9e4b9ed95cdc9469fb64,0.0},blobdigest = {0x7f941860ee20,curl = {0x7f941860ef00,12.0},frozen = {0x7f93f7625020,gcis = {0x7f93f7625090,std = {0x7f93f7625100,
我注意到,如果在我构建的自定义 VMOD 中删除对函数的调用,则不会发生恐慌。这是有问题的函数:
VCL_INT
vmod_idmap_load (VRT_CTX,IDMap *idmap,VCL_STRING fpath)
{
int i = 0;
char *line = NULL;
FILE *fh = NULL;
idmap_clear (idmap);
LOG ("opening file %s",fpath);
fh = fopen (fpath,"r");
if (!fh) {
VRT_fail (ctx,"Error opening %s",fpath);
i = -1;
goto finally;
}
size_t size = 0;
int ret;
while (getline (&line,&size,fh) >= 0) {
char *id = strndup (line,UUID_STR_LEN - 1);
//LOG ("UUID: %s",id);
khint_t cur = kh_put (uuid_ht,idmap->ht,id,&ret);
if (cur != kh_end (idmap->ht)) i++;
else LOG ("Duplicate ID: %s",id);
line = NULL;
size = 0;
};
LOG ("%d identifiers loaded.",i);
finally:
free (line);
if (fh) fclose (fh);
idmap->ct = i;
return i;
}
不幸的是,我无法从转储中分辨出它的段错误究竟在哪里。我可以使用调试符号编译 Varnish 以了解更多信息吗?
解决方法
我敢打赌 vmod 代码破坏了堆栈,搞乱了恐慌,因为堆栈跟踪毫无意义。
如果您不确定哪里出了问题,最好的办法是撒一些 VSLb(ctx,VCL_Error,"at line %d",__LINE__)
以查看使用 varnishlog
的效果。
不过,请注意,您没有测试很多输入和返回,我建议断言/检查更多。例如:https://github.com/varnish/varnish-modules/blob/master/src/vmod_str.c#L15
另一个例子是 strndup()
不应该失败,所以你应该在它后面有一个 AN(id)
。
希望有帮助