Files
SpacetimeDB/crates/runtime/src/sim_std.rs
Shubham Mishra 0be66e3e3d Deterministic runtime crate (#5016)
# Description of Changes.

Introduces deterministic runtime crate.
Integrate it with RelationalDB.

I think best steps to review:
- Read the
[README](https://github.com/clockworklabs/SpacetimeDB/blob/shub/sim/crates/runtime/README.md)
of runtime crate.
- Look at the integration with existing crates - `durability`, `core`,
`snapshot`, etc.
- Read runtime crate's code.

Draft branch to Test code -
https://github.com/clockworklabs/SpacetimeDB/pull/5019

# API and ABI breaking changes
NA

# Expected complexity level and risk
Does not intend to change any production functionality, but it's big
code.

# Testing

- new crate contains unit and integration tests.
- Existing tests should work for production.

---------

Signed-off-by: Shubham Mishra <shivam828787@gmail.com>
Co-authored-by: Zeke Foppa <196249+bfops@users.noreply.github.com>
2026-06-04 14:59:37 +00:00

208 lines
7.1 KiB
Rust

//! Std-hosted entry points for running the deterministic simulator in tests.
//!
//! The portable simulator lives in [`crate::sim`]. This module is deliberately
//! host-specific: it installs thread-local context while a simulation is
//! running, checks determinism by replaying a seed in fresh OS threads, and
//! intercepts a few libc calls so std code cannot silently escape determinism.
#![allow(clippy::disallowed_macros)]
use alloc::boxed::Box;
use core::{cell::Cell, future::Future};
use std::sync::OnceLock;
use crate::sim;
// Public entry points.
/// Run a future to completion with std-hosted determinism guards installed.
///
/// This wraps [`sim::Runtime::block_on`] and is the normal entry point for DST
/// tests that execute inside a hosted process. While the future runs, this
/// marks the thread as inside simulation so OS thread spawns can be rejected.
pub fn block_on<F: Future>(runtime: &mut sim::Runtime, future: F) -> F::Output {
let _system_thread_context = enter_simulation_thread();
runtime.block_on(future)
}
/// Run the same future factory twice and assert that both runs consume the same
/// deterministic RNG/scheduler trace.
///
/// Each pass runs on a fresh OS thread so thread-local std state is not shared
/// between the recording and replay passes.
pub fn check_determinism<M, F>(seed: u64, make_future: M) -> F::Output
where
M: Fn() -> F + Clone + Send + 'static,
F: Future + 'static,
F::Output: Send + 'static,
{
let first = make_future.clone();
let log = std::thread::spawn(move || {
let mut runtime = sim::Runtime::new(seed);
runtime.enable_determinism_log();
block_on(&mut runtime, first());
runtime
.take_determinism_log()
.expect("determinism log should be enabled")
})
.join()
.map_err(|payload| panic_with_seed(seed, payload))
.unwrap();
std::thread::spawn(move || {
let mut runtime = sim::Runtime::new(seed);
runtime.enable_determinism_check(log);
let output = block_on(&mut runtime, make_future());
runtime.finish_determinism_check().unwrap_or_else(|err| panic!("{err}"));
output
})
.join()
.map_err(|payload| panic_with_seed(seed, payload))
.unwrap()
}
fn panic_with_seed(seed: u64, payload: Box<dyn core::any::Any + Send>) -> ! {
eprintln!("note: run with --seed {seed} to reproduce this error");
std::panic::resume_unwind(payload);
}
// Simulation thread context.
// Ambient state used only while `sim_std::block_on` is driving a simulation.
//
// The simulator itself stays explicit-handle based. This thread-local only
// marks whether the current OS thread is owned by a running simulation so
// host thread creation can be rejected.
thread_local! {
// Marks the current OS thread as simulation-owned so thread creation hooks
// can reject accidental escapes to the host scheduler.
static IN_SIMULATION: Cell<bool> = const { Cell::new(false) };
}
struct SimulationThreadGuard {
previous: bool,
}
fn enter_simulation_thread() -> SimulationThreadGuard {
let previous = IN_SIMULATION.with(|state| state.replace(true));
SimulationThreadGuard { previous }
}
fn in_simulation() -> bool {
IN_SIMULATION.with(Cell::get)
}
impl Drop for SimulationThreadGuard {
fn drop(&mut self) {
IN_SIMULATION.with(|state| {
state.set(self.previous);
});
}
}
// Thread hook.
// Hook Unix thread creation by interposing `pthread_attr_init`.
//
// `std::thread::Builder::spawn` initializes pthread attributes before creating
// the thread. Returning an error here while simulation is active makes hidden
// OS thread creation fail early, before host scheduling can affect replay.
// Outside simulation, this delegates to the real libc symbol through `RTLD_NEXT`.
#[cfg(unix)]
#[unsafe(no_mangle)]
#[inline(never)]
unsafe extern "C" fn pthread_attr_init(attr: *mut libc::pthread_attr_t) -> libc::c_int {
// std::thread enters libc through pthread_attr_init on Unix. Refusing that
// call while in simulation keeps hidden OS scheduling out of DST.
if in_simulation() {
eprintln!("attempt to spawn a system thread in simulation.");
eprintln!("note: use simulator tasks instead.");
return -1;
}
type PthreadAttrInit = unsafe extern "C" fn(*mut libc::pthread_attr_t) -> libc::c_int;
static PTHREAD_ATTR_INIT: OnceLock<PthreadAttrInit> = OnceLock::new();
let original = PTHREAD_ATTR_INIT.get_or_init(|| unsafe {
// `RTLD_NEXT` skips this interposed function and finds the libc
// implementation that would have been called without the simulator.
let ptr = libc::dlsym(libc::RTLD_NEXT, c"pthread_attr_init".as_ptr().cast());
assert!(!ptr.is_null(), "failed to resolve original pthread_attr_init");
std::mem::transmute(ptr)
});
unsafe { original(attr) }
}
// Randomness syscall hooks.
// Hook OS randomness by interposing `getrandom`.
//
// This crate no longer tries to make host randomness deterministic. Any such
// request is surfaced with a warning and then delegated to the host OS.
#[unsafe(no_mangle)]
#[inline(never)]
unsafe extern "C" fn getrandom(buf: *mut u8, buflen: usize, flags: u32) -> isize {
if in_simulation() {
eprintln!("warning: randomness requested; delegating to host OS");
eprintln!("{}", std::backtrace::Backtrace::force_capture());
}
unsafe { real_getrandom()(buf, buflen, flags) }
}
#[cfg(target_os = "linux")]
fn real_getrandom() -> unsafe extern "C" fn(*mut u8, usize, u32) -> isize {
type GetrandomFn = unsafe extern "C" fn(*mut u8, usize, u32) -> isize;
static GETRANDOM: OnceLock<GetrandomFn> = OnceLock::new();
*GETRANDOM.get_or_init(|| unsafe {
let ptr = libc::dlsym(libc::RTLD_NEXT, c"getrandom".as_ptr().cast());
assert!(!ptr.is_null(), "failed to resolve original getrandom");
std::mem::transmute(ptr)
})
}
#[cfg(not(target_os = "linux"))]
fn real_getrandom() -> unsafe extern "C" fn(*mut u8, usize, u32) -> isize {
compile_error!("unsupported OS for DST getrandom override");
}
// Hook `getentropy` and route it through the same deterministic path as
// `getrandom`.
//
// The 256-byte limit is part of the getentropy contract. Keeping this wrapper
// small means all entropy decisions stay centralized in `getrandom`.
#[unsafe(no_mangle)]
#[inline(never)]
unsafe extern "C" fn getentropy(buf: *mut u8, buflen: usize) -> i32 {
if buflen > 256 {
return -1;
}
match unsafe { getrandom(buf, buflen, 0) } {
-1 => -1,
_ => 0,
}
}
#[cfg(test)]
mod tests {
use crate::sim;
use super::getentropy;
#[test]
#[cfg(unix)]
fn runtime_forbids_system_thread_spawn() {
let mut runtime = sim::Runtime::new(200);
super::block_on(&mut runtime, async {
let result = std::panic::catch_unwind(|| std::thread::Builder::new().spawn(|| {}));
assert!(result.is_err());
});
}
#[test]
fn getentropy_delegates_to_host_randomness_outside_simulation() {
let mut actual = [0u8; 24];
unsafe {
assert_eq!(getentropy(actual.as_mut_ptr(), actual.len()), 0);
}
}
}