问题描述
像 Google breakpad 这样的崩溃报告工具的主要目的是从剥离的二进制文件中生成核心转储或小型转储文件,以便稍后使用调试符号进行处理。通常,这些二进制文件是应用了编译器优化的发布版本,也被剥离了。
在 Linux 上重现问题:
1.构建+安装google breakpad:
git clone https://chromium.googlesource.com/breakpad/breakpad && cd breakpad
git clone https://chromium.googlesource.com/linux-syscall-support src/third_party/lss
./configure --prefix=/usr/local
make -j$(nproc) && sudo make install
2.代码:
CMakeLists.txt
:
cmake_minimum_required(VERSION 3.10)
project(BreakPadTest)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_required ON)
set(CMAKE_VERBOSE_MAKEFILE TRUE)
set(BREAKPAD_DIR "/usr/local/include/breakpad")
option(OPTION_WITH_O1 "With -O1" OFF)
if(OPTION_WITH_O1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -fno-omit-frame-pointer")
endif()
include_directories(
${BREAKPAD_DIR}
)
add_executable(${PROJECT_NAME} "main.cc")
target_link_libraries(${PROJECT_NAME}
-lstdc++fs
-pthread
libbreakpad_client.a
)
main.cc
:
#include <thread>
#include <experimental/filesystem>
#include <client/linux/handler/exception_handler.h>
namespace breakpad = google_breakpad;
namespace filesystem = std::experimental::filesystem;
static bool DumpCallBack(const breakpad::MinidumpDescriptor& md,void* context,bool success) {
(void)md;
(void)context;
return success;
}
static void fault(unsigned after) {
std::this_thread::sleep_for(std::chrono::seconds{after});
delete reinterpret_cast<std::string*>(0xFEE1DEAD);
}
int32_t main(int argc,char** argv) {
(void)argc;
(void)argv;
auto pwd = filesystem::current_path();
const auto dumpDir = pwd.string() + "/dumps";
filesystem::create_directory(dumpDir);
breakpad::MinidumpDescriptor md(dumpDir);
new google_breakpad::ExceptionHandler(
md,/* FilterCallback */ nullptr,DumpCallBack,/* callback_context */ nullptr,true,-1
);
fault(1U);
return EXIT_SUCCESS;
}
dump.sh
:
#!/bin/bash
#
# e.g ./dump.sh ./exec $PWD/dumps
#
set -e
set -u
DBG_INFO=$(realpath ${1})
DUMPS_DIR=$(realpath ${2:-$PWD/dumps})
DUMP_SYMS=${3:-~/WorkSpace/libraries/breakpad/src/tools/linux/dump_syms/dump_syms}
STAK_WALK=${4:-~/WorkSpace/libraries/breakpad/src/processor/minidump_stackwalk}
#
# Generate debug symbols
#
base=$(basename $DBG_INFO)
$DUMP_SYMS $DBG_INFO > $DUMPS_DIR/$base.sym
#
# Create dump dir structure
#
list=($(head -n1 $DUMPS_DIR/$base.sym))
hash=${list[3]}
mkdir -p $DUMPS_DIR/symbols/$base/$hash
mv $DUMPS_DIR/$base.sym $DUMPS_DIR/symbols/$base/$hash
#
# Produce stack trace
#
RED='\033[0;36m'
NC='\033[0m' # No Color
tree $DUMPS_DIR
for dmp in $DUMPS_DIR/*.dmp ; do
filename=$(basename -- "${dmp}")
filename="${filename%.*}"
echo -e "generating stack trace for -> ${RED}${dmp}${NC}"
$STAK_WALK ${dmp} $DUMPS_DIR/symbols > $DUMPS_DIR/${filename}.txt 2>/dev/null
done
3.运行普通Debug
版本:
cmake -DCMAKE_BUILD_TYPE=Debug . && make
./BreakPadTest
4.处理第 3 阶段生成的小型转储:
./dump.sh ./BreakPadTest ./dumps
stackwalk
:
Operating system: Linux
0.0.0 Linux 4.19.0-16-amd64 #1 SMP Debian 4.19.181-1 (2021-03-19) x86_64
cpu: amd64
family 6 model 58 stepping 9
1 cpu
GPU: UNKNowN
Crash reason: SIGSEGV /SEGV_MAPERR
Crash address: 0xfee1dead
Process uptime: not available
Thread 0 (crashed)
0 BreakPadTest!std::__cxx11::basic_string<char,std::char_traits<char>,std::allocator<char> >::_M_data() const [basic_string.h : 176 + 0x4]
rax = 0x00000000fee1dead rdx = 0x00007ffcfccfcb90
rcx = 0x00007f5b0cbb4bc1 rbx = 0x0000557d577eb8e0
rsi = 0x00007ffcfccfcb90 rdi = 0x00000000fee1dead
rbp = 0x00007ffcfccfcb50 rsp = 0x00007ffcfccfcb50
r8 = 0x0000000000000000 r9 = 0x0000557d577efaf8
r10 = 0xfffffffffffff60b r11 = 0x0000000000000246
r12 = 0x0000557d56d3d2c0 r13 = 0x00007ffcfccfce50
r14 = 0x0000000000000000 r15 = 0x0000000000000000
rip = 0x0000557d56d3dfda
Found by: given as instruction pointer in context
1 BreakPadTest!std::__cxx11::basic_string<char,std::allocator<char> >::_M_is_local() const [basic_string.h : 211 + 0xc]
rbx = 0x0000557d577eb8e0 rbp = 0x00007ffcfccfcb80
rsp = 0x00007ffcfccfcb60 r12 = 0x0000557d56d3d2c0
r13 = 0x00007ffcfccfce50 r14 = 0x0000000000000000
r15 = 0x0000000000000000 rip = 0x0000557d56d3e2c5
Found by: call frame info
2 BreakPadTest!std::__cxx11::basic_string<char,std::allocator<char> >::_M_dispose() [basic_string.h : 220 + 0xc]
rbx = 0x0000557d577eb8e0 rbp = 0x00007ffcfccfcba0
rsp = 0x00007ffcfccfcb90 r12 = 0x0000557d56d3d2c0
r13 = 0x00007ffcfccfce50 r14 = 0x0000000000000000
r15 = 0x0000000000000000 rip = 0x0000557d56d3dff8
Found by: call frame info
3 BreakPadTest!std::__cxx11::basic_string<char,std::allocator<char> >::~basic_string() [basic_string.h : 657 + 0xc]
rbx = 0x0000557d577eb8e0 rbp = 0x00007ffcfccfcbc0
rsp = 0x00007ffcfccfcbb0 r12 = 0x0000557d56d3d2c0
r13 = 0x00007ffcfccfce50 r14 = 0x0000000000000000
r15 = 0x0000000000000000 rip = 0x0000557d56d3d930
Found by: call frame info
4 BreakPadTest!fault [main.cc : 19 + 0xa]
rbx = 0x0000557d577eb8e0 rbp = 0x00007ffcfccfcbf0
rsp = 0x00007ffcfccfcbd0 r12 = 0x0000557d56d3d2c0
r13 = 0x00007ffcfccfce50 r14 = 0x0000000000000000
r15 = 0x0000000000000000 rip = 0x0000557d56d3d3f0
Found by: call frame info
5 BreakPadTest!main [main.cc : 39 + 0xa]
rbx = 0x0000557d577eb8e0 rbp = 0x00007ffcfccfcd70
rsp = 0x00007ffcfccfcc00 r12 = 0x0000557d56d3d2c0
r13 = 0x00007ffcfccfce50 r14 = 0x0000000000000000
r15 = 0x0000000000000000 rip = 0x0000557d56d3d4fd
Found by: call frame info
6 libc.so.6 + 0x2409b
rbx = 0x0000000000000000 rbp = 0x0000557d56d78b80
rsp = 0x00007ffcfccfcd80 r12 = 0x0000557d56d3d2c0
r13 = 0x00007ffcfccfce50 r14 = 0x0000000000000000
r15 = 0x0000000000000000 rip = 0x00007f5b0ca0609b
Found by: call frame info
7 BreakPadTest!fault [main.cc : 20 + 0x3]
rsp = 0x00007ffcfccfcda0 rip = 0x0000557d56d3d402
Found by: stack scanning
8 ld-linux-x86-64.so.2 + 0xf476
rsp = 0x00007ffcfccfce10 rip = 0x00007f5b0cf1c476
Found by: stack scanning
9 BreakPadTest!_start + 0x2a
rsp = 0x00007ffcfccfce40 rip = 0x0000557d56d3d2ea
Found by: stack scanning
10 0x7ffcfccfce48
rsp = 0x00007ffcfccfce48 rip = 0x00007ffcfccfce48
Found by: call frame info
Loaded modules:
0x557d56d34000 - 0x557d56d78fff BreakPadTest ??? (main)
0x7f5b0c9e2000 - 0x7f5b0cb4bfff libc.so.6 ??? (WARNING: No symbols,libc.so.6,A8A9B91823C5CFE5E5B5D946D605D0920)
0x7f5b0cba3000 - 0x7f5b0cbb7fff libpthread.so.0 ???
0x7f5b0cbc4000 - 0x7f5b0cbd7fff libgcc_s.so.1 ???
0x7f5b0cbde000 - 0x7f5b0cc89fff libm.so.6 ???
0x7f5b0cd61000 - 0x7f5b0ce95fff libstdc++.so.6 ???
0x7f5b0cf0d000 - 0x7f5b0cf2bfff ld-linux-x86-64.so.2 ??? (WARNING: No symbols,ld-linux-x86-64.so.2,7BFD5DF2BE95A34B86FD71080ACCAE8C0)
0x7ffcfcdc5000 - 0x7ffcfcdc6fff linux-gate.so ???
5.使用 -O1
运行第 3 阶段:
cmake -DCMAKE_BUILD_TYPE=Debug -DOPTION_WITH_O1=ON . && make
./BreakPadTest
6.像第 4 阶段一样处理小型转储:
stackwalk
:
Operating system: Linux
0.0.0 Linux 4.19.0-16-amd64 #1 SMP Debian 4.19.181-1 (2021-03-19) x86_64
cpu: amd64
family 6 model 58 stepping 9
1 cpu
GPU: UNKNowN
Crash reason: SIGSEGV /SEGV_MAPERR
Crash address: 0xfee1dead
Process uptime: not available
Thread 0 (crashed)
0 BreakPadTest!main [basic_string.h : 176 + 0x0]
rax = 0x0000000000000000 rdx = 0x000055bd46f66a40
rcx = 0x00007f7633ea8bc1 rbx = 0x00007ffde4cc7c40
rsi = 0x00007ffde4cc7c40 rdi = 0x00007ffde4cc7c40
rbp = 0x00007ffde4cc7d90 rsp = 0x00007ffde4cc7c20
r8 = 0x0000000000000000 r9 = 0x000055bd474caaf8
r10 = 0x0000000000000000 r11 = 0x0000000000000246
r12 = 0x000055bd474c64f0 r13 = 0x000055bd474c64f0
r14 = 0x0000000000000000 r15 = 0x0000000000000000
rip = 0x000055bd46f1b8dd
Found by: given as instruction pointer in context
1 libc.so.6 + 0x2409b
rbx = 0x0000000000000000 rbp = 0x000055bd46f555e0
rsp = 0x00007ffde4cc7da0 r12 = 0x000055bd46f1b270
r13 = 0x00007ffde4cc7e70 r14 = 0x0000000000000000
r15 = 0x0000000000000000 rip = 0x00007f7633cfa09b
Found by: call frame info
2 BreakPadTest!DumpCallBack [main.cc : 15 + 0x3]
rsp = 0x00007ffde4cc7dc0 rip = 0x000055bd46f1b358
Found by: stack scanning
3 ld-linux-x86-64.so.2 + 0xf476
rsp = 0x00007ffde4cc7e30 rip = 0x00007f7634210476
Found by: stack scanning
4 BreakPadTest!_start + 0x2a
rsp = 0x00007ffde4cc7e60 rip = 0x000055bd46f1b29a
Found by: stack scanning
5 0x7ffde4cc7e68
rsp = 0x00007ffde4cc7e68 rip = 0x00007ffde4cc7e68
Found by: call frame info
Loaded modules:
0x55bd46f14000 - 0x55bd46f55fff BreakPadTest ??? (main)
0x7f7633cd6000 - 0x7f7633e3ffff libc.so.6 ??? (WARNING: No symbols,A8A9B91823C5CFE5E5B5D946D605D0920)
0x7f7633e97000 - 0x7f7633eabfff libpthread.so.0 ???
0x7f7633eb8000 - 0x7f7633ecbfff libgcc_s.so.1 ???
0x7f7633ed2000 - 0x7f7633f7dfff libm.so.6 ???
0x7f7634055000 - 0x7f7634189fff libstdc++.so.6 ???
0x7f7634201000 - 0x7f763421ffff ld-linux-x86-64.so.2 ??? (WARNING: No symbols,7BFD5DF2BE95A34B86FD71080ACCAE8C0)
0x7ffde4d9a000 - 0x7ffde4d9bfff linux-gate.so ???
正如我们所看到的,正确的符号从第 6 阶段的堆栈遍历中消失了。
虽然在 GDB
等其他工具中,即使在阶段 5 中使用 -O1,我们也有正确的跟踪指向正确的位置:
Program received signal SIGSEGV,Segmentation fault.
fault (after=1) at /home/iman/WorkSpace/projects/BreakPadTest/src/main.cc:26
26 delete reinterpret_cast<std::string*>(0xFEE1DEAD);
或者在其他工具中,例如 backward-cpp :
Stack trace (most recent call last):
#3 Object "",at 0xffffffffffffffff,in
#2 Object "/home/iman/WorkSpace/projects/build-CrashReporter-Desktop_Qt_5_11_3_GCC-Debug/CrashReporter",at 0x55f32a66b579,in _start
#1 Source "/build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c",line 308,in __libc_start_main [0x7f56288be09a]
#0 | Source "/home/iman/WorkSpace/projects/BreakPadTest/src/main.cc",line 48,in main
| 46: #endif // WITH_BREAKPAD
| 47:
| > 48: fault(1U);
| 49:
| 50: return EXIT_SUCCESS;
| Source "/home/iman/WorkSpace/projects/BreakPadTest/src/main.cc",line 26,in fault
| 24: static void fault(unsigned after) {
| 25: std::this_thread::sleep_for(std::chrono::seconds{after});
| > 26: delete reinterpret_cast<std::string*>(0xFEE1DEAD);
| 27: }
| Source "/usr/include/c++/8/bits/basic_string.h",line 657,in
| 655: */
| 656: ~basic_string()
| > 657: { _M_dispose(); }
| 658:
| 659: /**
| Source "/usr/include/c++/8/bits/basic_string.h",line 220,in
| 218: _M_dispose()
| 219: {
| > 220: if (!_M_is_local())
| 221: _M_destroy(_M_allocated_capacity);
| 222: }
| Source "/usr/include/c++/8/bits/basic_string.h",line 211,in
| 209: bool
| 210: _M_is_local() const
| > 211: { return _M_data() == _M_local_data(); }
| 212:
| 213: // Create & Destroy
Source "/usr/include/c++/8/bits/basic_string.h",line 176,in main [0x55f32a66b66c]
174: pointer
175: _M_data() const
> 176: { return _M_dataplus._M_p; }
177:
178: pointer
179: _M_local_data()
Segmentation fault (Address not mapped to object [0xfee1dead])
Segmentation fault
有什么想法或提示吗?
解决方法
通过优化 -O1,几乎所有 std::basic_string
函数都是内联的。这些函数是内联的,因为 std::basic_string
是一个模板并在 heder 文件中定义。
google breakpad 处理器子系统在处理内联函数或从 Linux 上的 DWARF 调试信息中提取内联函数元数据时存在一个古老而活跃的问题,如以下主题所述:
- https://bugzilla.mozilla.org/show_bug.cgi?id=524410
- https://bugzilla.mozilla.org/show_bug.cgi?id=563776
- https://bugzilla.mozilla.org/show_bug.cgi?id=1665367
- https://bugzilla.mozilla.org/show_bug.cgi?id=1398533
- https://bugzilla.mozilla.org/show_bug.cgi?id=1636194
- https://groups.google.com/g/google-breakpad-discuss/c/ZQOSZBbdF7U/m/58tpSnM1EBsJ
这需要修改 breakpad 内部符号表示和堆栈遍历器,但作为一种解决方法,您可以为 GDB
生成核心转储并使用它进行回溯,为此,breakpad 代码库中有实用程序({{1 }}) 称为 minidump-2-core,因此在使用调试信息构建发布二进制文件后:
src/tools/linux/md2core