Skip to content

Commit 6f68505

Browse files
committed
crashtracking custom unwinding
Re-Add the capability to emit symbols in process
1 parent e2f184c commit 6f68505

File tree

10 files changed

+351
-74
lines changed

10 files changed

+351
-74
lines changed

crashtracker/build.rs

+29-8
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,34 @@
1-
fn main() {
2-
println!("cargo:rerun-if-changed=build.rs");
1+
use std::env;
32

4-
// Link the system `libunwind`
5-
println!("cargo:rustc-link-lib=unwind");
3+
// inspired from libunwind-sys
64

7-
// Link `libgcc_s` (needed for `_Unwind` symbols)
8-
println!("cargo:rustc-link-lib=dylib=gcc_s");
9-
// todo: avoid hard coding these paths
10-
// Specify library search paths
5+
fn main() {
6+
println!("cargo:rerun-if-changed=build.rs");
7+
// Choose build.
8+
let target = env::var("TARGET").unwrap();
9+
let link_lib_arch = match target.as_str() {
10+
"x86_64-unknown-linux-gnu"| "x86_64-unknown-linux-musl" => "x86_64",
11+
"i686-unknown-linux-gnu"|"i586-unknown-linux-gnu" => "x86",
12+
"arm-unknown-linux-gnueabihf" => "arm",
13+
_ => ""
14+
};
15+
if link_lib_arch.is_empty() {
16+
println!("cargo:warning=target {} is unsupported",target);
17+
return;
18+
}
19+
1120
println!("cargo:rustc-link-search=native=/usr/local/lib");
1221
println!("cargo:rustc-link-search=native=/usr/lib");
22+
#[cfg(target_arch = "x86_64")]
23+
println!("cargo:rustc-link-search=native=/usr/lib/x86_64-linux-gnu");
24+
if target.contains("musl") {
25+
// possibly lzma compressed symbol tables. Do we really need it ?
26+
println!("cargo:rustc-link-lib=static=lzma");
27+
println!("cargo:rustc-link-lib=static=unwind-{}", link_lib_arch);
28+
println!("cargo:rustc-link-lib=static=unwind");
29+
}
30+
else {
31+
println!("cargo:rustc-link-lib=static=unwind-{}", link_lib_arch);
32+
println!("cargo:rustc-link-lib=static=unwind");
33+
}
1334
}

crashtracker/src/collector/emitters.rs

+60-32
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
use crate::collector::additional_tags::consume_and_emit_additional_tags;
55
use crate::collector::counters::emit_counters;
66
use crate::collector::spans::{emit_spans, emit_traces};
7+
use crate::shared::configuration::StackTraceUnwinding;
78
use crate::shared::constants::*;
89
use crate::CrashtrackerConfiguration;
910
use crate::StacktraceCollection;
@@ -14,6 +15,16 @@ use std::{
1415
fs::File,
1516
io::{Read, Write},
1617
};
18+
use std::ffi::CStr;
19+
20+
#[cfg(target_arch = "x86_64")]
21+
use crate::collector::libunwind_x86_64 as libunwind;
22+
23+
#[cfg(target_arch = "arm")]
24+
use crate::collector::libunwind_arm as libunwind;
25+
26+
use libunwind::{UnwCursor, UnwContext, unw_init_local, unw_step, unw_get_reg, unw_get_proc_name, UNW_REG_IP, UNW_REG_SP};
27+
1728

1829
/// Emit a stacktrace onto the given handle as formatted json.
1930
/// SAFETY:
@@ -82,50 +93,65 @@ unsafe fn emit_backtrace_by_frames(
8293
}
8394

8495

85-
// libunwind structures and functions
86-
#[repr(C)]
87-
struct UnwContext([u8; 1024]); // Placeholder size for unw_context_t
88-
89-
#[repr(C)]
90-
struct UnwCursor([u8; 1024]); // Placeholder size for unw_cursor_t
91-
92-
extern "C" {
93-
fn _ULx86_64_init_local(cursor: *mut UnwCursor, context: *mut UnwContext) -> i32;
94-
fn _ULx86_64_step(cursor: *mut UnwCursor) -> i32;
95-
fn _ULx86_64_get_reg(cursor: *mut UnwCursor, reg: i32, valp: *mut u64) -> i32;
96-
}
97-
98-
const UNW_REG_IP: i32 = 16; // Register number for Instruction Pointer
99-
const UNW_REG_SP: i32 = 17; // Register number for Stack Pointer
100-
101-
fn emit_backtrace_libuwnind(w: &mut impl Write, ucontext: *const ucontext_t) -> anyhow::Result<()> {
96+
fn emit_backtrace_libuwnind(
97+
w: &mut impl Write,
98+
ucontext: *const ucontext_t,
99+
resolve_symbols: bool,
100+
) -> anyhow::Result<()> {
102101
unsafe {
103-
let mut context = UnwContext([0; 1024]);
104-
let mut cursor = UnwCursor([0; 1024]);
102+
let mut cursor = UnwCursor([0; std::mem::size_of::<UnwCursor>()]);
103+
let context = ucontext as *mut UnwContext; // Reinterpret as UnwContext
105104

106-
if _ULx86_64_init_local(&mut cursor, &mut context) != 0 {
107-
writeln!(w, "Failed to initialize libunwind cursor")?;
108-
return Ok(());
105+
writeln!(w, "{DD_CRASHTRACK_BEGIN_STACKTRACE}")?;
106+
107+
if unw_init_local(&mut cursor, context) != 0 {
108+
writeln!(w, "{{ \"error\": \"Failed to initialize cursor from context\" }}")?;
109+
writeln!(w, "{DD_CRASHTRACK_END_STACKTRACE}")?;
110+
w.flush()?;
111+
return Err(anyhow::anyhow!("Failed to initialize cursor from context"));
109112
}
110113

111-
writeln!(w, "{DD_CRASHTRACK_BEGIN_STACKTRACE}")?;
112-
while _ULx86_64_step(&mut cursor) > 0 {
114+
let mut name_buf = vec![0 as libc::c_char; 256]; // Buffer for function names
115+
let mut offset: u64 = 0;
116+
117+
loop {
113118
let mut ip: u64 = 0;
114119
let mut sp: u64 = 0;
115-
if _ULx86_64_get_reg(&mut cursor, UNW_REG_IP, &mut ip) == 0 &&
116-
_ULx86_64_get_reg(&mut cursor, UNW_REG_SP, &mut sp) == 0 {
117-
writeln!(w, "{{ \"ip\": \"{:x}\", \"sp\": \"{:x}\" }}", ip, sp)?;
120+
121+
if unw_get_reg(&mut cursor, UNW_REG_IP, &mut ip) != 0 ||
122+
unw_get_reg(&mut cursor, UNW_REG_SP, &mut sp) != 0 {
123+
writeln!(w, "{{ \"error\": \"Failed to retrieve registers during stack unwinding\" }}")?;
124+
break;
125+
}
126+
127+
if resolve_symbols {
128+
let mut func_name = "<unknown>".to_string();
129+
let mut symbol_address: u64 = 0;
130+
131+
// Attempt to retrieve the function name & offset
132+
if unw_get_proc_name(&mut cursor, name_buf.as_mut_ptr(), name_buf.len(), &mut offset) == 0 {
133+
func_name = CStr::from_ptr(name_buf.as_ptr()).to_string_lossy().into_owned();
134+
symbol_address = ip - offset;
135+
}
136+
// we could also retrieve the elf offsets using the libunwind APIs if needed.
137+
writeln!(w, "{{ \"ip\": \"{:#x}\", \"sp\": \"{:#x}\", \"symbol_address\": \"{:#x}\", \"function\": \"{}\", \"offset\": \"0x{:x}\" }}",
138+
ip, sp, symbol_address, func_name, offset)?;
118139
} else {
119-
writeln!(w, "Failed to get registers")?;
140+
writeln!(w, "{{ \"ip\": \"{:#x}\", \"sp\": \"{:#x}\" }}", ip, sp)?;
141+
}
142+
143+
if unw_step(&mut cursor) <= 0 {
120144
break;
121145
}
122146
}
147+
123148
writeln!(w, "{DD_CRASHTRACK_END_STACKTRACE}")?;
124149
}
150+
151+
w.flush()?;
125152
Ok(())
126153
}
127154

128-
129155
pub(crate) fn emit_crashreport(
130156
pipe: &mut impl Write,
131157
config: &CrashtrackerConfiguration,
@@ -154,9 +180,11 @@ pub(crate) fn emit_crashreport(
154180
// https://doc.rust-lang.org/src/std/backtrace.rs.html#332
155181
// Do this last, so even if it crashes, we still get the other info.
156182
if config.resolve_frames != StacktraceCollection::Disabled {
157-
// todo: add a switch between musl / glibc
158-
// unsafe { emit_backtrace_by_frames(pipe, config.resolve_frames)? };
159-
emit_backtrace_libuwnind(pipe, ucontext)?;
183+
if config.stacktrace_unwinding == StackTraceUnwinding::Backtrace {
184+
unsafe { emit_backtrace_by_frames(pipe, config.resolve_frames)? };
185+
} else {
186+
emit_backtrace_libuwnind(pipe, ucontext, config.resolve_frames == StacktraceCollection::EnabledWithInprocessSymbols)?;
187+
}
160188
}
161189
writeln!(pipe, "{DD_CRASHTRACK_DONE}")?;
162190
pipe.flush()?;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// libunwind_arm.rs
2+
3+
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
4+
// SPDX-License-Identifier: Apache-2.0
5+
6+
use libc;
7+
8+
#[repr(C)]
9+
pub struct UnwContext(pub [u8; 512]); // Placeholder size for ARM unw_context_t
10+
11+
#[repr(C)]
12+
pub struct UnwCursor(pub [u8; 512]); // Placeholder size for ARM unw_cursor_t
13+
14+
// This is a subset of the libunwind API. A more complete binding can be generated using bindgen.
15+
extern "C" {
16+
#[link_name = "_Uarm_init_local"]
17+
pub fn unw_init_local(cursor: *mut UnwCursor, context: *mut UnwContext) -> i32;
18+
#[link_name = "_Uarm_step"]
19+
pub fn unw_step(cursor: *mut UnwCursor) -> i32;
20+
#[link_name = "_Uarm_get_reg"]
21+
pub fn unw_get_reg(cursor: *mut UnwCursor, reg: i32, valp: *mut u64) -> i32;
22+
#[link_name = "_Uarm_get_proc_name"]
23+
pub fn unw_get_proc_name(
24+
cursor: *mut UnwCursor,
25+
name: *mut libc::c_char,
26+
len: usize,
27+
offset: *mut u64,
28+
) -> i32;
29+
}
30+
31+
// ARM register definitions for libunwind
32+
pub const UNW_REG_IP: i32 = 12; // ARM's Instruction Pointer (PC)
33+
pub const UNW_REG_SP: i32 = 13; // ARM's Stack Pointer (SP)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// libunwind_x86_64.rs
2+
3+
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
4+
// SPDX-License-Identifier: Apache-2.0
5+
6+
use libc;
7+
8+
#[repr(C)]
9+
pub struct UnwContext(pub [u8; 1024]); // Placeholder size for x86_64 unw_context_t
10+
11+
#[repr(C)]
12+
pub struct UnwCursor(pub [u8; 1024]); // Placeholder size for x86_64 unw_cursor_t
13+
14+
// This is a subset of the libunwind API. A more complete binding can be generated using bindgen.
15+
16+
extern "C" {
17+
#[link_name = "_Ux86_64_init_local"]
18+
pub fn unw_init_local(cursor: *mut UnwCursor, context: *mut UnwContext) -> i32;
19+
#[link_name = "_Ux86_64_step"]
20+
pub fn unw_step(cursor: *mut UnwCursor) -> i32;
21+
#[link_name = "_Ux86_64_get_reg"]
22+
pub fn unw_get_reg(cursor: *mut UnwCursor, reg: i32, valp: *mut u64) -> i32;
23+
#[link_name = "_Ux86_64_get_proc_name"]
24+
pub fn unw_get_proc_name(
25+
cursor: *mut UnwCursor,
26+
name: *mut libc::c_char,
27+
len: usize,
28+
offset: *mut u64,
29+
) -> i32;
30+
}
31+
32+
// x86_64 register definitions for libunwind
33+
pub const UNW_REG_IP: i32 = 16; // Instruction Pointer
34+
pub const UNW_REG_SP: i32 = 17; // Stack Pointer

crashtracker/src/collector/mod.rs

+7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ pub use additional_tags::{
1414
clear_additional_tags, consume_and_emit_additional_tags, insert_additional_tag,
1515
remove_additional_tag,
1616
};
17+
18+
#[cfg(target_arch = "x86_64")]
19+
pub mod libunwind_x86_64;
20+
21+
#[cfg(target_arch = "arm")]
22+
pub mod libunwind_arm;
23+
1724
pub use api::*;
1825
pub use counters::{begin_op, end_op, reset_counters, OpTypes};
1926
pub use crash_handler::{update_config, update_metadata};

crashtracker/src/shared/configuration.rs

+29
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
use crate::shared::constants;
44
use ddcommon::Endpoint;
55
use serde::{Deserialize, Serialize};
6+
use std::env;
67

78
/// Stacktrace collection occurs in the context of a crashing process.
89
/// If the stack is sufficiently corruputed, it is possible (but unlikely),
@@ -19,6 +20,27 @@ pub enum StacktraceCollection {
1920
EnabledWithSymbolsInReceiver,
2021
}
2122

23+
#[repr(C)]
24+
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
25+
pub enum StackTraceUnwinding {
26+
Backtrace,
27+
Libunwind,
28+
}
29+
30+
impl StackTraceUnwinding {
31+
/// Determines the unwinding strategy based on the environment variable `DD_CRASHTRACK_UNWINDING`.
32+
pub fn from_env() -> Self {
33+
match env::var("DD_CRASHTRACK_UNWINDING")
34+
.unwrap_or_else(|_| "libunwind".to_string())
35+
.to_lowercase()
36+
.as_str()
37+
{
38+
"backtrace" => StackTraceUnwinding::Backtrace,
39+
_ => StackTraceUnwinding::Libunwind, // Default to libunwind
40+
}
41+
}
42+
}
43+
2244
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
2345
pub struct CrashtrackerConfiguration {
2446
// Paths to any additional files to track, if any
@@ -27,6 +49,7 @@ pub struct CrashtrackerConfiguration {
2749
pub use_alt_stack: bool,
2850
pub endpoint: Option<Endpoint>,
2951
pub resolve_frames: StacktraceCollection,
52+
pub stacktrace_unwinding: StackTraceUnwinding,
3053
pub timeout_ms: u32,
3154
pub unix_socket_path: Option<String>,
3255
}
@@ -72,6 +95,7 @@ impl CrashtrackerConfiguration {
7295
use_alt_stack: bool,
7396
endpoint: Option<Endpoint>,
7497
resolve_frames: StacktraceCollection,
98+
7599
timeout_ms: u32,
76100
unix_socket_path: Option<String>,
77101
) -> anyhow::Result<Self> {
@@ -87,6 +111,10 @@ impl CrashtrackerConfiguration {
87111
} else {
88112
timeout_ms
89113
};
114+
115+
// Read stack unwinding strategy from environment
116+
let stacktrace_unwinding = StackTraceUnwinding::from_env();
117+
90118
// Note: don't check the receiver socket upfront, since a configuration can be interned
91119
// before the receiver is started when using an async-receiver.
92120
Ok(Self {
@@ -95,6 +123,7 @@ impl CrashtrackerConfiguration {
95123
use_alt_stack,
96124
endpoint,
97125
resolve_frames,
126+
stacktrace_unwinding,
98127
timeout_ms,
99128
unix_socket_path,
100129
})

docs/RFCs/0008-custom-unwinder.md

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# RFC 0006: Crashtracker unwinding (Version 0.1).
2+
3+
## Context
4+
5+
In the scope of [incident 34148](https://dd.enterprise.slack.com/archives/C088R4S25M5), we have incomplete unwinding on musl. As a first priority we should be able to build a version for PHP that allows unwinding on musl.
6+
I recommend [this issue](https://github.com/rust-lang/backtrace-rs/issues/698) for more context on the issue.
7+
8+
This is only an issue for the languages that do not have a runtime specific unwinding tool.
9+
- Ruby
10+
- .NET
11+
- Python
12+
13+
Other languages have their unwinding mechanisms.
14+
15+
## Solution proposed
16+
17+
Unwinding from the context of the signal handler allows us to get the stacktrace beyond the signal handler. The issue above details some of the experiments I have performed.
18+
19+
### Unwinding libraries
20+
21+
Using [libunwind](https://github.com/libunwind/libunwind/) is not mandatory. A full rust solution can be considered using framehop (which is built on top of Gimli). We have experience using libunwind and debugging it. libunwind is the unwinder for the .NET profiler.
22+
When swapping for a different library we should consider maintenance, internal knowledge and the redundancy of what we are shipping.
23+
24+
### Packaging of libunwind
25+
26+
As this is a C library, we can not use the C header.
27+
We need to declare the functions we use in libdatadog for the different architectures. This requires some adjustements as the functions have names are architecture specific.
28+
29+
We can rely on bindgen to generate the bindings. However as this adds complexity to the builds I favoured declaring the minimal set of functions.
30+
The libunwind-sys crate did not work correctly though it would be a great starting point.
31+
32+
We should statically link libunwind and make symbols invisible to our users.
33+
The link of libunwind requires `libgcc_s.so.1`. This does not change anythinng as we already needed this dependency (as we are using backtrace mechanisms).
34+
35+
Size impacts looking at libdatadog_profiling.so
36+
- 9 Megs
37+
- +1.3 Meg
38+
TODO: check when compiling with PHP if this is acceptable.
39+
40+
### Deployment
41+
42+
We propose to deploy the feature OFF by default. We can then check with the customer to enable this and get the musl crash locations.
43+
If this is a success, we can roll out progressively the change.
44+
45+
### Out of scope
46+
47+
Signal safety is not discussed.
48+
The current implementation is not signal safe. We have more work to improve this.
49+
50+
Shipping libunwund so that .NET folks can reuse it.
51+
This should come in a second phase.
52+
53+
Fixing backtrace-rs
54+
The ideal solution would be to solve the upstream issue. Unwinding from signals in musl is crucial.
55+
I currently do not see an obvious way to do this with the [gcc_s functions](https://refspecs.linuxfoundation.org/LSB_4.1.0/LSB-Core-generic/LSB-Core-generic/libgcc-sman.html).

0 commit comments

Comments
 (0)