commitlog: Resumption of sealed commitlog (#4650)

The commitlog so far assumed that the latest segment is never compressed
and can be opened for writing (if it is intact).

However, restoring the entire commitlog from cold storage results in all
segments being compressed. Make it so the resumption logic reads the
metadata from the potentially compressed last segment, and starts a new
segment for writing if the latest one was indeed compressed.

# Expected complexity level and risk

1.5

# Testing

Added a test.
This commit is contained in:
Kim Altintop
2026-03-17 13:02:17 +01:00
committed by GitHub
parent 68022eb2d6
commit 7cd72b8adf
6 changed files with 201 additions and 45 deletions
+48 -4
View File
@@ -10,7 +10,7 @@ use tempfile::NamedTempFile;
use crate::segment::FileLike;
use super::{Repo, SegmentLen, TxOffset, TxOffsetIndex, TxOffsetIndexMut};
use super::{Repo, SegmentLen, SegmentReader, TxOffset, TxOffsetIndex, TxOffsetIndexMut};
const SEGMENT_FILE_EXT: &str = ".stdb.log";
@@ -154,9 +154,52 @@ impl FileLike for NamedTempFile {
}
}
/// A file-backed, read-only segment.
///
/// Transparently handles reading compressed segments.
/// [Self::sealed] returns `true` if the segment is compressed.
pub struct ReadOnlySegment {
inner: CompressReader,
}
impl SegmentReader for ReadOnlySegment {
#[inline]
fn sealed(&self) -> bool {
self.inner.is_compressed()
}
}
impl io::Read for ReadOnlySegment {
#[inline]
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.inner.read(buf)
}
}
impl io::BufRead for ReadOnlySegment {
#[inline]
fn fill_buf(&mut self) -> io::Result<&[u8]> {
self.inner.fill_buf()
}
#[inline]
fn consume(&mut self, amount: usize) {
self.inner.consume(amount);
}
}
impl io::Seek for ReadOnlySegment {
#[inline]
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
self.inner.seek(pos)
}
}
impl SegmentLen for ReadOnlySegment {}
impl Repo for Fs {
type SegmentWriter = File;
type SegmentReader = CompressReader;
type SegmentReader = ReadOnlySegment;
fn create_segment(&self, offset: u64) -> io::Result<Self::SegmentWriter> {
File::options()
@@ -198,8 +241,9 @@ impl Repo for Fs {
}
fn open_segment_reader(&self, offset: u64) -> io::Result<Self::SegmentReader> {
debug!("fs: open segment at {}", self.segment_path(offset).display());
let file = File::open(self.segment_path(offset))?;
CompressReader::new(file)
CompressReader::new(file).map(|inner| ReadOnlySegment { inner })
}
fn remove_segment(&self, offset: u64) -> io::Result<()> {
@@ -215,7 +259,7 @@ impl Repo for Fs {
fn compress_segment(&self, offset: u64) -> io::Result<()> {
let src = self.open_segment_reader(offset)?;
// if it's already compressed, leave it be
let CompressReader::None(mut src) = src else {
let CompressReader::None(mut src) = src.inner else {
return Ok(());
};
+3 -3
View File
@@ -10,7 +10,7 @@ use crate::repo::{
};
mod segment;
pub use segment::Segment;
pub use segment::{ReadOnlySegment, Segment};
pub const PAGE_SIZE: usize = 4096;
@@ -52,7 +52,7 @@ impl fmt::Display for Memory {
impl Repo for Memory {
type SegmentWriter = Segment;
type SegmentReader = io::BufReader<Segment>;
type SegmentReader = ReadOnlySegment;
fn create_segment(&self, offset: u64) -> io::Result<Self::SegmentWriter> {
let mut inner = self.segments.write().unwrap();
@@ -88,7 +88,7 @@ impl Repo for Memory {
}
fn open_segment_reader(&self, offset: u64) -> io::Result<Self::SegmentReader> {
self.open_segment_writer(offset).map(io::BufReader::new)
self.open_segment_writer(offset).map(Into::into)
}
fn remove_segment(&self, offset: u64) -> io::Result<()> {
+48 -1
View File
@@ -7,7 +7,7 @@ use std::{
use crate::{
repo::{
mem::{SpaceOnDevice, PAGE_SIZE},
SegmentLen,
SegmentLen, SegmentReader,
},
segment::FileLike,
};
@@ -318,3 +318,50 @@ mod async_impls {
}
}
}
pub struct ReadOnlySegment {
inner: io::BufReader<Segment>,
}
impl From<Segment> for ReadOnlySegment {
fn from(inner: Segment) -> Self {
Self {
inner: io::BufReader::new(inner),
}
}
}
impl SegmentReader for ReadOnlySegment {
/// Memory segments dont' support compression, so are never sealed.
fn sealed(&self) -> bool {
false
}
}
impl io::Read for ReadOnlySegment {
#[inline]
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.inner.read(buf)
}
}
impl io::BufRead for ReadOnlySegment {
#[inline]
fn fill_buf(&mut self) -> io::Result<&[u8]> {
self.inner.fill_buf()
}
#[inline]
fn consume(&mut self, amount: usize) {
self.inner.consume(amount);
}
}
impl io::Seek for ReadOnlySegment {
#[inline]
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
self.inner.seek(pos)
}
}
impl SegmentLen for ReadOnlySegment {}
+50 -32
View File
@@ -1,4 +1,7 @@
use std::{fmt, io};
use std::{
fmt,
io::{self, Seek},
};
use log::{debug, warn};
@@ -52,8 +55,14 @@ pub trait SegmentLen: io::Seek {
}
}
pub trait SegmentReader: io::BufRead + SegmentLen + Send + Sync {}
impl<T: io::BufRead + SegmentLen + Send + Sync> SegmentReader for T {}
pub trait SegmentReader: io::BufRead + SegmentLen + Send + Sync {
/// Whether the segment is considered immutable.
///
/// Currently, this is true when the segment is compressed.
/// [resume_segment_writer] uses this method to indicate that a new segment
/// should be created when opening a commitlog.
fn sealed(&self) -> bool;
}
pub trait SegmentWriter: FileLike + io::Read + io::Write + SegmentLen + Send + Sync {}
impl<T: FileLike + io::Read + io::Write + SegmentLen + Send + Sync> SegmentWriter for T {}
@@ -243,21 +252,9 @@ pub fn resume_segment_writer<R: Repo>(
opts: Options,
offset: u64,
) -> io::Result<Result<Writer<R::SegmentWriter>, Metadata>> {
let mut storage = repo.open_segment_writer(offset)?;
// Ensure we have enough space for this segment.
// The segment could have been created without the `fallocate` feature
// enabled, so we call this here again to ensure writes can't fail due to
// ENOSPC.
fallocate(&mut storage, &opts)?;
let mut reader = repo.open_segment_reader(offset)?;
let offset_index = repo.get_offset_index(offset).ok();
let Metadata {
header,
tx_range,
size_in_bytes,
max_epoch,
max_commit_offset: _,
max_commit: _,
} = match Metadata::extract(offset, &mut storage, offset_index.as_ref()) {
let meta = match Metadata::extract(offset, &mut reader, offset_index.as_ref()) {
Err(error::SegmentMetadata::InvalidCommit { sofar, source }) => {
warn!("invalid commit in segment {offset}: {source}");
debug!("sofar={sofar:?}");
@@ -266,34 +263,55 @@ pub fn resume_segment_writer<R: Repo>(
Err(error::SegmentMetadata::Io(e)) => return Err(e),
Ok(meta) => meta,
};
header
meta.header
.ensure_compatible(opts.log_format_version, Commit::CHECKSUM_ALGORITHM)
.map_err(|msg| io::Error::new(io::ErrorKind::InvalidData, msg))?;
// When resuming, the log format version must be equal.
if header.log_format_version != opts.log_format_version {
if meta.header.log_format_version != opts.log_format_version {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"log format version mismatch: current={} segment={}",
opts.log_format_version, header.log_format_version
opts.log_format_version, meta.header.log_format_version
),
));
}
Ok(Ok(Writer {
commit: Commit {
min_tx_offset: tx_range.end,
n: 0,
records: Vec::new(),
epoch: max_epoch,
},
inner: io::BufWriter::new(storage),
if reader.sealed() {
Ok(Err(meta))
} else {
let Metadata {
header: _,
tx_range,
size_in_bytes,
max_epoch,
max_commit_offset: _,
max_commit: _,
} = meta;
let mut writer = repo.open_segment_writer(offset)?;
// Ensure we have enough space for this segment.
// The segment could have been created without the `fallocate` feature
// enabled, so we call this here again to ensure writes can't fail due
// to ENOSPC.
fallocate(&mut writer, &opts)?;
// We use `O_APPEND`, but make the file offset consistent regardless.
writer.seek(io::SeekFrom::End(0))?;
min_tx_offset: tx_range.start,
bytes_written: size_in_bytes,
Ok(Ok(Writer {
commit: Commit {
min_tx_offset: tx_range.end,
n: 0,
records: Vec::new(),
epoch: max_epoch,
},
inner: io::BufWriter::new(writer),
offset_index_head: create_offset_index_writer(repo, offset, opts),
}))
min_tx_offset: tx_range.start,
bytes_written: size_in_bytes,
offset_index_head: create_offset_index_writer(repo, offset, opts),
}))
}
}
/// Open the existing segment at `offset` for reading.
+1 -1
View File
@@ -271,7 +271,7 @@ impl fmt::Display for ShortMem {
impl Repo for ShortMem {
type SegmentWriter = ShortSegment;
type SegmentReader = io::BufReader<repo::mem::Segment>;
type SegmentReader = repo::mem::ReadOnlySegment;
fn create_segment(&self, offset: u64) -> io::Result<Self::SegmentWriter> {
self.inner.create_segment(offset).map(|inner| ShortSegment {
+51 -4
View File
@@ -1,6 +1,7 @@
use log::info;
use spacetimedb_commitlog::repo::Repo;
use spacetimedb_commitlog::tests::helpers::enable_logging;
use spacetimedb_commitlog::{payload, Commitlog, Options};
use spacetimedb_commitlog::{commitlog, payload, repo, Commitlog, Options};
use spacetimedb_paths::server::CommitLogDir;
use spacetimedb_paths::FromPathUnchecked;
use tempfile::tempdir;
@@ -75,6 +76,12 @@ fn resets() {
}
}
/// Try to generate commitlogs that will be amenable to compression -
/// random data doesn't compress well, so try and have there be repetition
fn compressible_payloads() -> impl Iterator<Item = [u8; 256]> {
(0..4).map(|_| gen_payload()).cycle()
}
#[test]
fn compression() {
enable_logging();
@@ -90,9 +97,7 @@ fn compression() {
)
.unwrap();
// try to generate commitlogs that will be amenable to compression -
// random data doesn't compress well, so try and have there be repetition
let payloads = (0..4).map(|_| gen_payload()).cycle().take(1024).collect::<Vec<_>>();
let payloads = compressible_payloads().take(1024).collect::<Vec<_>>();
for (i, payload) in payloads.iter().enumerate() {
clog.commit([(i as u64, *payload)]).unwrap();
}
@@ -114,3 +119,45 @@ fn compression() {
.enumerate()
.all(|(i, x)| x.offset == i as u64 && x.txdata == payloads[i]));
}
/// When restoring an archived commitlog, all segments are compressed and should
/// remain immutable.
///
/// Tests that this is upheld, i.e. a fresh segment is created when resuming
/// writes.
#[test]
fn all_segments_sealed() {
enable_logging();
let root = tempdir().unwrap();
let path = CommitLogDir::from_path_unchecked(root.path());
let opts = Options {
max_segment_size: 64 * 1024,
..<_>::default()
};
let num_commits = 1024;
let repo = repo::Fs::new(path, None).unwrap();
{
let mut clog = commitlog::Generic::open(&repo, opts).unwrap();
for (i, payload) in compressible_payloads().take(num_commits).enumerate() {
clog.commit([(i as u64, payload)]).unwrap();
}
clog.flush().unwrap();
clog.sync();
}
let segments = repo.existing_offsets().unwrap();
let num_segments = segments.len();
// Compress all segments via the `repo`,
// to not trigger the assert that the head segment cannot be compressed.
for segment in segments {
repo.compress_segment(segment).unwrap();
}
// Re-opening the commitlog should create a fresh segment at offset `num_commits`.
let _ = commitlog::Generic::<_, [u8; 256]>::open(&repo, opts).unwrap();
let segments = repo.existing_offsets().unwrap();
assert_eq!(num_segments + 1, segments.len());
assert_eq!(segments.last().copied(), Some(num_commits as u64));
}