清漆丢失和重新获取日志 - 连接断开

问题描述

我遇到来自 Varnish (6.5) 的针对特定路由的间歇性空回复或连接重置错误。这是有问题的 VCL(更大的 VCL 集的一部分,但这是路由使用的):

vcl 4.1;

import gcis;
import std;

include "/etc/varnish/conf.d/backend_default.vcl";
include "/etc/varnish/conf.d/acl.vcl";


sub vcl_init {
    new idmap = gcis.idmap();
}


sub vcl_recv {
    if (!client.ip ~ internal_acl) {
        return (synth (
                405,"Client IP " + client.ip +
                " is not allowed to access this service."));
    }
}


sub vcl_deliver {
    // If the request was for reloading the ACL,reload the list here.
    if (req.url == "/svc/reload_acl") {
        std.log(
            idmap.load(std.getenv("GCISWG_ACL_FILE"))
            + " restricted identifiers reloaded from database."
        );

        ban ("req.url ~ ^/iiif/");
        ban ("req.url == /svc/dump_acl");
    }
}

如果我重复发送以下请求:

curl -iXPOST varnish/svc/reload_acl

有时我会得到预期的响应;有时我得到 curl: (52) Empty reply from server,有时我得到 curl: (56) Recv failure: Connection reset by peer

回复的 varnishlog 条目如下所示:

varnish_1      | *   << BeReq    >> 9         
varnish_1      | -   Begin          bereq 8 pass
varnish_1      | -   VCL_use        svc
varnish_1      | -   Timestamp      Start: 1623449680.866799 0.000000 0.000000
varnish_1      | -   BereqMethod    POST
varnish_1      | -   BereqURL       /svc/reload_acl
varnish_1      | -   BereqProtocol  HTTP/1.1
varnish_1      | -   BereqHeader    User-Agent: curl/7.64.0
varnish_1      | -   BereqHeader    Accept: */*
varnish_1      | -   BereqHeader    X-Forwarded-For: 172.18.0.5
varnish_1      | -   BereqHeader    host: varnish
varnish_1      | -   BereqHeader    X-Varnish: 9
varnish_1      | -   VCL_call       BACKEND_FETCH
varnish_1      | -   VCL_return     fetch
varnish_1      | -   BackendOpen    32 default 172.18.0.8 8080 172.18.0.9 34554 reuse
varnish_1      | -   Timestamp      Bereq: 1623449680.866890 0.000091 0.000091
varnish_1      | -   Timestamp      Beresp: 1623449680.878314 0.011515 0.011424
varnish_1      | -   BerespProtocol HTTP/1.1
varnish_1      | -   BerespStatus   201
varnish_1      | -   BerespReason   CREATED
varnish_1      | -   BerespHeader   Server: Nginx/1.18.0
varnish_1      | -   BerespHeader   Date: Fri,11 Jun 2021 22:14:40 GMT
varnish_1      | -   BerespHeader   Content-Type: text/html; charset=utf-8
varnish_1      | -   BerespHeader   Content-Length: 31
varnish_1      | -   BerespHeader   Connection: keep-alive
varnish_1      | -   VCL_call       BACKEND_RESPONSE
varnish_1      | -   VCL_return     deliver
varnish_1      | -   Filters        
varnish_1      | -   Storage        malloc Transient
varnish_1      | -   Fetch_Body     3 length stream
varnish_1      | -   BackendClose   32 default recycle
varnish_1      | -   Timestamp      BerespBody: 1623449680.878392 0.011593 0.000078
varnish_1      | -   Length         31
varnish_1      | -   BereqAcct      129 0 129 167 31 198
varnish_1      | -   End            
varnish_1      | 
varnish_1      | *   << Session  >> 7         
varnish_1      | -   Begin          sess 0 HTTP/1
varnish_1      | -   SessOpen       172.18.0.5 57306 nossl 172.18.0.9 80 1623449680.866658 30
varnish_1      | -   Link           req 8 rxreq
varnish_1      | -   VSL            flush
varnish_1      | -   End            synth
varnish_1      | 
varnish_1      | *   << Request  >> 8         
varnish_1      | -   Begin          req 7 rxreq
varnish_1      | -   Timestamp      Start: 1623449680.866744 0.000000 0.000000
varnish_1      | -   Timestamp      Req: 1623449680.866744 0.000000 0.000000
varnish_1      | -   VCL_use        main
varnish_1      | -   ReqStart       172.18.0.5 57306 nossl
varnish_1      | -   ReqMethod      POST
varnish_1      | -   ReqURL         /svc/reload_acl
varnish_1      | -   ReqProtocol    HTTP/1.1
varnish_1      | -   ReqHeader      Host: varnish
varnish_1      | -   ReqHeader      User-Agent: curl/7.64.0
varnish_1      | -   ReqHeader      Accept: */*
varnish_1      | -   ReqHeader      X-Forwarded-For: 172.18.0.5
varnish_1      | -   VCL_call       RECV
varnish_1      | -   VCL_use        svc via svc_vcl
varnish_1      | -   VCL_return     vcl
varnish_1      | -   ReqHeader      X-Forwarded-For: 172.18.0.5
varnish_1      | -   VCL_call       RECV
varnish_1      | -   VCL_acl        MATCH internal_acl "172.0.0.0/8"
varnish_1      | -   ReqUnset       Host: varnish
varnish_1      | -   ReqHeader      host: varnish
varnish_1      | -   VCL_return     pass
varnish_1      | Log abandoned (vsl)
varnish_1      | Log reacquired
varnish_1      | -   VCL_call       HASH
varnish_1      | -   VCL_return     lookup
varnish_1      | -   VCL_call       PASS
varnish_1      | -   VCL_return     fetch
varnish_1      | -   Link           bereq 9 pass
varnish_1      | -   Timestamp      Fetch: 1623449680.878425 0.011680 0.011680
varnish_1      | -   RespProtocol   HTTP/1.1
varnish_1      | -   RespStatus     201
varnish_1      | -   RespReason     CREATED
varnish_1      | -   RespHeader     Server: Nginx/1.18.0
varnish_1      | -   RespHeader     Date: Fri,11 Jun 2021 22:14:40 GMT
varnish_1      | -   RespHeader     Content-Type: text/html; charset=utf-8
varnish_1      | -   RespHeader     Content-Length: 31
varnish_1      | -   RespHeader     X-Varnish: 8
varnish_1      | -   RespHeader     Age: 0
varnish_1      | -   RespHeader     Via: 1.1 varnish (Varnish/6.5)
varnish_1      | -   VCL_call       DELIVER
varnish_1      | -   VSL            flush
varnish_1      | -   End            synth

会发生什么?竞争条件?我在交付子文件中遗漏了什么吗?顺便说一句,我也注意到了这些没有禁止命令的问题。

感谢您的帮助。

更新:按照评论者的建议,我检查了恐慌转储:

Panic at: Mon,14 Jun 2021 17:25:36 GMT
Wrong turn at cache/cache_main.c:287:
Signal 11 (Segmentation fault) received at 0 si_code 128
version = varnish-6.5.1 revision 1dae23376bb5ea7a6b8e9e4b9ed95cdc9469fb64,vrt api = 12.0
ident = Linux,5.12.6-arch1-1,x86_64,-junix,-sdefault,-sfile,-hcritbit,epoll
Now = 148500.767810 (mono),1623691535.535737 (real)
Backtrace:
  ip=0x55c79cbc5b19,sp=0x7f941dbf8460 <PAN_already+0x1304>
  ip=0x55c79cc1c8de,sp=0x7f941dbf85a0 <VAS_Fail+0x22>
  ip=0x55c79cbc1cb2,sp=0x7f941dbf85e0 <THR_GetRequest+0x14a>
  ip=0x7f941dd75c4b,sp=0x7f941dbf8ac0 <sigwaitinfo+0x8>
pthread.self = 0x7f9417c63b38
pthread.name = (cache-worker)
pthread.attr = {
  guard = 8192,stack_bottom = 0x7f9417c55000,stack_top = 0x7f9417c63b08,stack_size = 60168,}
thr.req = 0x7f93f736eb20 {
  vxid = 32770,transport = HTTP/1 {
    state = HTTP1::Proc
  }
  step = R_STP_DELIVER,req_body = none,restarts = 0,esi_level = 0,sp = 0x7f93f737f030 {
    fd = 28,vxid = 32769,t_open = 1623691535.525822,t_idle = 1623691535.525822,ws = 0x7f93f737f070 {
      id = \"ses\",{s,f,r,e} = {0x7f93f737f0c8,+96,+576},},transport = HTTP/1 {
      state = HTTP1::Proc
    }
    client = 172.18.0.5 57692 :80,worker = 0x7f9417c63970 {
    ws = 0x7f9417c63a18 {
      id = \"wrk\",e} = {0x7f9417c62f50,+0,+2040},VCL::method = inside DELIVER,VCL::return = 0x0,VCL::methods = {RECV,PASS,HASH,DELIVER},ws = 0x7f93f736ec70 {
    id = \"req\",e} = {0x7f93f7370bd0,+168,+57128},http_conn = 0x7f93f7370b48 {
    fd = 28 (@0x7f93f737f054),doclose = NULL,ws = 0x7f93f736ec70 {
      [Already dumped,see above]
    },{rxbuf_b,rxbuf_e} = {0x7f93f7370bd0,0x7f93f7370c26},{pipeline_b,pipeline_e} = {0,0},content_length = -1,body_status = none,first_byte_timeout = 0.000000,between_bytes_timeout = 0.000000,http[req] = 0x7f93f736ed10 {
    ws = 0x7f93f736ec70 {
      [Already dumped,hdrs {
      \"POST\",\"/svc/reload_acl\",\"HTTP/1.1\",\"User-Agent: curl/7.64.0\",\"Accept: */*\",\"X-Forwarded-For: 172.18.0.5\",\"host: varnish\",http[resp] = 0x7f93f736f600 {
    ws = 0x7f93f736ec70 {
      [Already dumped,hdrs {
      \"HTTP/1.1\",\"201\",\"CREATED\",\"Server: Nginx/1.18.0\",\"Date: Mon,14 Jun 2021 17:25:35 GMT\",\"Content-Type: text/html; charset=utf-8\",\"Content-Length: 31\",\"X-Varnish: 32770\",\"Age: 0\",\"Via: 1.1 varnish (Varnish/6.5)\",vdc = 0x7f93f7370b28 {
    nxt = 0,retval = 0,vcl = {
    name = \"svc\",busy = 1,discard = 0,state = auto,temp = warm,conf = {
      Syntax = \"41\",srcname = {
        \"/etc/varnish/conf.d/svc.vcl\",\"Builtin\",\"/etc/varnish/conf.d/backend_default.vcl\",\"/etc/varnish/conf.d/acl.vcl\",instances = {
        \"idmap\" = 0x7f93f75fe070,objcore[REQ] = 0x7f93f75d51f0 {
    refcnt = 1,flags = {hfm,private},exp_flags = {},exp = {1623691535.535603,-1.000000,0.000000,0.000000},objhead = 0x7f941dc16890,stevedore = 0x55c79cc9dee0 (malloc Transient) {
      Simple = 0x7f93f75d58d0,Obj = 0x7f93f771ad98 {priv=0x7f93f771ad90,ptr=0x7f93f75d58d0,len=264,space=264},LEN = 0x000000000000001f,VXID = 0x00008003,FLAGS = 0x00,GZIPBITS = 0x0...0,LASTMODIFIED = 0x41d831e443c00000,vary = {len=0,ptr=0},HEADERS = {len=144,ptr=0x7f93f75d5948},Body = 0x7f93f7593818 {priv=0x7f93f7593810,ptr=0x7f93f75d5c90,len=31,space=31},flags = {
  },privs = 0x7f93f736ed00 {
  },top = 0x7f93f7370bb0 {
    req = 0x7f93f736eb20 {
      [Already dumped,privs = 0x7f93f7370bc8 {
    },vcl0 = {
      name = \"main\",conf = {
        Syntax = \"41\",srcname = {
          \"/etc/varnish/conf.d/main.vcl\",\"/etc/varnish/conf.d/synth.vcl\",instances = {
          \"idmap\" = 0x7f93f75fe070,thr.busyobj = 0 {
},vmods = {
  blob = {0x7f941860edb0,Varnish 6.5.1 1dae23376bb5ea7a6b8e9e4b9ed95cdc9469fb64,0.0},blobdigest = {0x7f941860ee20,curl = {0x7f941860ef00,12.0},frozen = {0x7f93f7625020,gcis = {0x7f93f7625090,std = {0x7f93f7625100,

我注意到,如果在我构建的自定义 VMOD 中删除函数调用,则不会发生恐慌。这是有问题的函数

VCL_INT
vmod_idmap_load (VRT_CTX,IDMap *idmap,VCL_STRING fpath)
{
    int i = 0;
    char *line = NULL;
    FILE *fh = NULL;

    idmap_clear (idmap);

    LOG ("opening file %s",fpath);
    fh = fopen (fpath,"r");
    if (!fh) {
        VRT_fail (ctx,"Error opening %s",fpath);
        i = -1;
        goto finally;
    }

    size_t size = 0;
    int ret;

    while (getline (&line,&size,fh) >= 0) {
        char *id = strndup (line,UUID_STR_LEN - 1);
        //LOG ("UUID: %s",id);
        khint_t cur = kh_put (uuid_ht,idmap->ht,id,&ret);
        if (cur != kh_end (idmap->ht)) i++;
        else LOG ("Duplicate ID: %s",id);

        line = NULL;
        size = 0;
    };
    LOG ("%d identifiers loaded.",i);

finally:
    free (line);
    if (fh) fclose (fh);

    idmap->ct = i;

    return i;
}

不幸的是,我无法从转储中分辨出它的段错误究竟在哪里。我可以使用调试符号编译 Varnish 以了解更多信息吗?

解决方法

我敢打赌 vmod 代码破坏了堆栈,搞乱了恐慌,因为堆栈跟踪毫无意义。

如果您不确定哪里出了问题,最好的办法是撒一些 VSLb(ctx,VCL_Error,"at line %d",__LINE__) 以查看使用 varnishlog 的效果。

不过,请注意,您没有测试很多输入和返回,我建议断言/检查更多。例如:https://github.com/varnish/varnish-modules/blob/master/src/vmod_str.c#L15 另一个例子是 strndup() 不应该失败,所以你应该在它后面有一个 AN(id)

希望有帮助