mirror of
https://github.com/clockworklabs/SpacetimeDB.git
synced 2026-05-06 07:26:43 -04:00
d639be0af6
# Description of Changes First two commits are code motion. The second commit fixes a mistake I made in a previous PR that made us use potentially several `ReplayCommittedState`s per `datastore.replay(..)`. More to come in terms of PRs; stay tuned. # API and ABI breaking changes None # Expected complexity level and risk 3
875 lines
36 KiB
Rust
875 lines
36 KiB
Rust
use super::committed_state::CommittedState;
|
|
use super::datastore::{Locking, Result};
|
|
use crate::db_metrics::DB_METRICS;
|
|
use crate::error::{DatastoreError, IndexError, TableError};
|
|
use crate::locking_tx_datastore::state_view::{iter_st_column_for_table, StateView};
|
|
use crate::system_tables::{
|
|
is_built_in_meta_row, StColumnRow, StFields as _, StTableFields, StTableRow, ST_COLUMN_ID, ST_TABLE_ID,
|
|
};
|
|
use anyhow::{anyhow, Context};
|
|
use core::cell::RefMut;
|
|
use core::ops::{Deref, DerefMut, RangeBounds};
|
|
use parking_lot::RwLockWriteGuard;
|
|
use prometheus::core::{AtomicF64, GenericGauge};
|
|
use prometheus::IntGauge;
|
|
use spacetimedb_commitlog::payload::txdata;
|
|
use spacetimedb_data_structures::map::{HashSet, IntMap, IntSet};
|
|
use spacetimedb_durability::History;
|
|
use spacetimedb_durability::Txdata;
|
|
use spacetimedb_lib::Identity;
|
|
use spacetimedb_primitives::{ColId, ColList, TableId};
|
|
use spacetimedb_sats::algebraic_value::de::ValueDeserializer;
|
|
use spacetimedb_sats::buffer::BufReader;
|
|
use spacetimedb_sats::{bsatn, AlgebraicValue, Deserialize, ProductValue};
|
|
use spacetimedb_schema::schema::{ColumnSchema, TableSchema};
|
|
use spacetimedb_schema::table_name::TableName;
|
|
use spacetimedb_table::indexes::RowPointer;
|
|
use spacetimedb_table::table::{InsertError, RowRef};
|
|
use std::cell::RefCell;
|
|
use std::sync::Arc;
|
|
use thiserror::Error;
|
|
|
|
pub fn apply_history(
|
|
datastore: &Locking,
|
|
database_identity: Identity,
|
|
history: impl History<TxData = Txdata<ProductValue>>,
|
|
counters: ApplyHistoryCounters,
|
|
) -> Result<()> {
|
|
log::info!("[{database_identity}] DATABASE: applying transaction history...");
|
|
|
|
// TODO: Revisit once we actually replay history suffixes, ie. starting
|
|
// from an offset larger than the history's min offset.
|
|
// TODO: We may want to require that a `tokio::runtime::Handle` is
|
|
// always supplied when constructing a `RelationalDB`. This would allow
|
|
// to spawn a timer task here which just prints the progress periodically
|
|
// in case the history is finite but very long.
|
|
let (_, max_tx_offset) = history.tx_range_hint();
|
|
let mut last_logged_percentage = 0;
|
|
let progress = |tx_offset: u64| {
|
|
if let Some(max_tx_offset) = max_tx_offset {
|
|
let percentage = f64::floor((tx_offset as f64 / max_tx_offset as f64) * 100.0) as i32;
|
|
if percentage > last_logged_percentage && percentage % 10 == 0 {
|
|
log::info!("[{database_identity}] Loaded {percentage}% ({tx_offset}/{max_tx_offset})");
|
|
last_logged_percentage = percentage;
|
|
}
|
|
// Print _something_ even if we don't know what's still ahead.
|
|
} else if tx_offset.is_multiple_of(10_000) {
|
|
log::info!("[{database_identity}] Loading transaction {tx_offset}");
|
|
}
|
|
};
|
|
|
|
let time_before = std::time::Instant::now();
|
|
|
|
let mut replay = datastore.replay(
|
|
progress,
|
|
// We don't want to instantiate an incorrect state;
|
|
// if the commitlog contains an inconsistency we'd rather get a hard error than showing customers incorrect data.
|
|
ErrorBehavior::FailFast,
|
|
);
|
|
let start_tx_offset = replay.next_tx_offset();
|
|
history
|
|
.fold_transactions_from(start_tx_offset, &mut replay)
|
|
.map_err(anyhow::Error::from)?;
|
|
|
|
let time_elapsed = time_before.elapsed();
|
|
counters.replay_commitlog_time_seconds.set(time_elapsed.as_secs_f64());
|
|
|
|
let end_tx_offset = replay.next_tx_offset();
|
|
counters
|
|
.replay_commitlog_num_commits
|
|
.set((end_tx_offset - start_tx_offset) as _);
|
|
|
|
log::info!("[{database_identity}] DATABASE: applied transaction history");
|
|
drop(replay); // Neccessary to avoid a deadlock.
|
|
datastore.rebuild_state_after_replay()?;
|
|
log::info!("[{database_identity}] DATABASE: rebuilt state after replay");
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub struct ApplyHistoryCounters {
|
|
pub replay_commitlog_time_seconds: GenericGauge<AtomicF64>,
|
|
pub replay_commitlog_num_commits: IntGauge,
|
|
}
|
|
|
|
#[derive(Debug, Error)]
|
|
pub enum ReplayError {
|
|
#[error("Expected tx offset {expected}, encountered {encountered}")]
|
|
InvalidOffset { expected: u64, encountered: u64 },
|
|
#[error(transparent)]
|
|
Decode(#[from] bsatn::DecodeError),
|
|
#[error(transparent)]
|
|
Db(#[from] DatastoreError),
|
|
#[error(transparent)]
|
|
Any(#[from] anyhow::Error),
|
|
}
|
|
|
|
/// A [`spacetimedb_commitlog::Decoder`] suitable for replaying a transaction
|
|
/// history into the database state.
|
|
pub struct Replay<'a, F> {
|
|
database_identity: Identity,
|
|
committed_state: RefCell<ReplayCommittedState<'a>>,
|
|
progress: RefCell<F>,
|
|
error_behavior: ErrorBehavior,
|
|
}
|
|
|
|
impl<'a, F> Replay<'a, F> {
|
|
pub fn new(
|
|
database_identity: Identity,
|
|
committed_state: RwLockWriteGuard<'a, CommittedState>,
|
|
progress: F,
|
|
error_behavior: ErrorBehavior,
|
|
) -> Self {
|
|
Self {
|
|
database_identity,
|
|
committed_state: RefCell::new(ReplayCommittedState::new(committed_state)),
|
|
progress: RefCell::new(progress),
|
|
error_behavior,
|
|
}
|
|
}
|
|
|
|
fn using_visitor<T>(&self, f: impl FnOnce(&mut ReplayVisitor<'_, '_, F>) -> T) -> T {
|
|
let mut visitor = ReplayVisitor {
|
|
database_identity: &self.database_identity,
|
|
committed_state: &mut self.committed_state.borrow_mut(),
|
|
progress: &mut *self.progress.borrow_mut(),
|
|
dropped_table_names: IntMap::default(),
|
|
error_behavior: self.error_behavior,
|
|
};
|
|
f(&mut visitor)
|
|
}
|
|
|
|
pub fn next_tx_offset(&self) -> u64 {
|
|
self.committed_state.borrow().next_tx_offset
|
|
}
|
|
|
|
// NOTE: This is not unused.
|
|
pub fn committed_state(&self) -> RefMut<'_, ReplayCommittedState<'a>> {
|
|
self.committed_state.borrow_mut()
|
|
}
|
|
}
|
|
|
|
impl<F: FnMut(u64)> spacetimedb_commitlog::Decoder for &mut Replay<'_, F> {
|
|
type Record = txdata::Txdata<ProductValue>;
|
|
type Error = txdata::DecoderError<ReplayError>;
|
|
|
|
fn decode_record<'a, R: BufReader<'a>>(
|
|
&self,
|
|
version: u8,
|
|
tx_offset: u64,
|
|
reader: &mut R,
|
|
) -> std::result::Result<Self::Record, Self::Error> {
|
|
self.using_visitor(|visitor| txdata::decode_record_fn(visitor, version, tx_offset, reader))
|
|
}
|
|
|
|
fn consume_record<'a, R: BufReader<'a>>(
|
|
&self,
|
|
version: u8,
|
|
tx_offset: u64,
|
|
reader: &mut R,
|
|
) -> std::result::Result<(), Self::Error> {
|
|
self.using_visitor(|visitor| txdata::consume_record_fn(visitor, version, tx_offset, reader))
|
|
}
|
|
|
|
fn skip_record<'a, R: BufReader<'a>>(
|
|
&self,
|
|
version: u8,
|
|
_tx_offset: u64,
|
|
reader: &mut R,
|
|
) -> std::result::Result<(), Self::Error> {
|
|
self.using_visitor(|visitor| txdata::skip_record_fn(visitor, version, reader))
|
|
}
|
|
}
|
|
|
|
// n.b. (Tyler) We actually **do not** want to check constraints at replay
|
|
// time because not only is it a pain, but actually **subtly wrong** the
|
|
// way we have it implemented. It's wrong because the actual constraints of
|
|
// the database may change as different transactions are added to the
|
|
// schema and you would actually have to change your indexes and
|
|
// constraints as you replayed the log. This we are not currently doing
|
|
// (we're building all the non-bootstrapped indexes at the end after
|
|
// replaying), and thus aren't implementing constraint checking correctly
|
|
// as it stands.
|
|
//
|
|
// However, the above is all rendered moot anyway because we don't need to
|
|
// check constraints while replaying if we just assume that they were all
|
|
// checked prior to the transaction committing in the first place.
|
|
//
|
|
// Note also that operation/mutation ordering **does not** matter for
|
|
// operations inside a transaction of the message log assuming we only ever
|
|
// insert **OR** delete a unique row in one transaction. If we ever insert
|
|
// **AND** delete then order **does** matter. The issue caused by checking
|
|
// constraints for each operation while replaying does not imply that order
|
|
// matters. Ordering of operations would **only** matter if you wanted to
|
|
// view the state of the database as of a partially applied transaction. We
|
|
// never actually want to do this, because after a transaction has been
|
|
// committed, it is assumed that all operations happen instantaneously and
|
|
// atomically at the timestamp of the transaction. The only time that we
|
|
// actually want to view the state of a database while a transaction is
|
|
// partially applied is while the transaction is running **before** it
|
|
// commits. Thus, we only care about operation ordering while the
|
|
// transaction is running, but we do not care about it at all in the
|
|
// context of the commit log.
|
|
//
|
|
// Not caring about the order in the log, however, requires that we **do
|
|
// not** check index constraints during replay of transaction operations.
|
|
// We **could** check them in between transactions if we wanted to update
|
|
// the indexes and constraints as they changed during replay, but that is
|
|
// unnecessary.
|
|
|
|
/// What to do when encountering an error during commitlog replay due to an invalid TX.
|
|
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
|
pub enum ErrorBehavior {
|
|
/// Return an error and refuse to continue.
|
|
///
|
|
/// This is the behavior in production, as we don't want to reconstruct an incorrect state.
|
|
FailFast,
|
|
/// Log a warning and continue replay.
|
|
///
|
|
/// This behavior is used when inspecting broken commitlogs during debugging.
|
|
Warn,
|
|
}
|
|
|
|
struct ReplayVisitor<'a, 'cs, F> {
|
|
database_identity: &'a Identity,
|
|
committed_state: &'a mut ReplayCommittedState<'cs>,
|
|
progress: &'a mut F,
|
|
// Since deletes are handled before truncation / drop, sometimes the schema
|
|
// info is gone. We save the name on the first delete of that table so metrics
|
|
// can still show a name.
|
|
dropped_table_names: IntMap<TableId, TableName>,
|
|
error_behavior: ErrorBehavior,
|
|
}
|
|
|
|
impl<F> ReplayVisitor<'_, '_, F> {
|
|
/// Process `err` according to `self.error_behavior`,
|
|
/// either warning about it or returning it.
|
|
///
|
|
/// If this method returns an `Err`, the caller should bubble up that error with `?`.
|
|
fn process_error(&self, err: ReplayError) -> std::result::Result<(), ReplayError> {
|
|
match self.error_behavior {
|
|
ErrorBehavior::FailFast => Err(err),
|
|
ErrorBehavior::Warn => {
|
|
log::warn!("{err:?}");
|
|
Ok(())
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<F: FnMut(u64)> spacetimedb_commitlog::payload::txdata::Visitor for ReplayVisitor<'_, '_, F> {
|
|
type Error = ReplayError;
|
|
// NOTE: Technically, this could be `()` if and when we can extract the
|
|
// row data without going through `ProductValue` (PV).
|
|
// To accommodate auxiliary traversals (e.g. for analytics), we may want to
|
|
// provide a separate visitor yielding PVs.
|
|
type Row = ProductValue;
|
|
|
|
fn skip_row<'a, R: BufReader<'a>>(
|
|
&mut self,
|
|
table_id: TableId,
|
|
reader: &mut R,
|
|
) -> std::result::Result<(), Self::Error> {
|
|
let schema = self.committed_state.schema_for_table(table_id)?;
|
|
ProductValue::decode(schema.get_row_type(), reader)?;
|
|
Ok(())
|
|
}
|
|
|
|
fn visit_insert<'a, R: BufReader<'a>>(
|
|
&mut self,
|
|
table_id: TableId,
|
|
reader: &mut R,
|
|
) -> std::result::Result<Self::Row, Self::Error> {
|
|
let schema = self.committed_state.schema_for_table(table_id)?;
|
|
let row = ProductValue::decode(schema.get_row_type(), reader)?;
|
|
|
|
if let Err(e) = self
|
|
.committed_state
|
|
.replay_insert(table_id, &schema, &row)
|
|
.with_context(|| {
|
|
format!(
|
|
"Error inserting row {:?} during transaction {:?} playback",
|
|
row, self.committed_state.next_tx_offset
|
|
)
|
|
})
|
|
{
|
|
self.process_error(e.into())?;
|
|
}
|
|
// NOTE: the `rdb_num_table_rows` metric is used by the query optimizer,
|
|
// and therefore has performance implications and must not be disabled.
|
|
DB_METRICS
|
|
.rdb_num_table_rows
|
|
.with_label_values(self.database_identity, &table_id.into(), &schema.table_name)
|
|
.inc();
|
|
|
|
Ok(row)
|
|
}
|
|
|
|
fn visit_delete<'a, R: BufReader<'a>>(
|
|
&mut self,
|
|
table_id: TableId,
|
|
reader: &mut R,
|
|
) -> std::result::Result<Self::Row, Self::Error> {
|
|
let schema = self.committed_state.schema_for_table(table_id)?;
|
|
// TODO: avoid clone
|
|
let table_name = schema.table_name.clone();
|
|
let row = ProductValue::decode(schema.get_row_type(), reader)?;
|
|
|
|
// If this is a delete from the `st_table` system table, save the name
|
|
if table_id == ST_TABLE_ID {
|
|
let ab = AlgebraicValue::Product(row.clone());
|
|
let st_table_row = StTableRow::deserialize(ValueDeserializer::from_ref(&ab)).unwrap();
|
|
self.dropped_table_names
|
|
.insert(st_table_row.table_id, st_table_row.table_name);
|
|
}
|
|
|
|
if let Err(e) = self
|
|
.committed_state
|
|
.replay_delete_by_rel(table_id, &row)
|
|
.with_context(|| {
|
|
format!(
|
|
"Error deleting row {:?} from table {:?} during transaction {:?} playback",
|
|
row, table_name, self.committed_state.next_tx_offset
|
|
)
|
|
})
|
|
{
|
|
self.process_error(e.into())?;
|
|
}
|
|
// NOTE: the `rdb_num_table_rows` metric is used by the query optimizer,
|
|
// and therefore has performance implications and must not be disabled.
|
|
DB_METRICS
|
|
.rdb_num_table_rows
|
|
.with_label_values(self.database_identity, &table_id.into(), &table_name)
|
|
.dec();
|
|
|
|
Ok(row)
|
|
}
|
|
|
|
fn visit_truncate(&mut self, table_id: TableId) -> std::result::Result<(), Self::Error> {
|
|
let table_name = match self.committed_state.schema_for_table(table_id) {
|
|
// TODO: avoid clone
|
|
Ok(schema) => schema.table_name.clone(),
|
|
|
|
Err(_) => match self.dropped_table_names.remove(&table_id) {
|
|
Some(name) => name,
|
|
_ => {
|
|
return self
|
|
.process_error(anyhow!("Error looking up name for truncated table {table_id:?}").into());
|
|
}
|
|
},
|
|
};
|
|
|
|
if let Err(e) = self.committed_state.replay_truncate(table_id).with_context(|| {
|
|
format!(
|
|
"Error truncating table {:?} during transaction {:?} playback",
|
|
table_id, self.committed_state.next_tx_offset
|
|
)
|
|
}) {
|
|
self.process_error(e.into())?;
|
|
}
|
|
|
|
// NOTE: the `rdb_num_table_rows` metric is used by the query optimizer,
|
|
// and therefore has performance implications and must not be disabled.
|
|
DB_METRICS
|
|
.rdb_num_table_rows
|
|
.with_label_values(self.database_identity, &table_id.into(), &table_name)
|
|
.set(0);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn visit_tx_start(&mut self, offset: u64) -> std::result::Result<(), Self::Error> {
|
|
// The first transaction in a history must have the same offset as the
|
|
// committed state.
|
|
//
|
|
// (Technically, the history should guarantee that all subsequent
|
|
// transaction offsets are contiguous, but we don't currently have a
|
|
// good way to only check the first transaction.)
|
|
//
|
|
// Note that this is not a panic, because the starting offset can be
|
|
// chosen at runtime.
|
|
if offset != self.committed_state.next_tx_offset {
|
|
return Err(ReplayError::InvalidOffset {
|
|
expected: self.committed_state.next_tx_offset,
|
|
encountered: offset,
|
|
});
|
|
}
|
|
(self.progress)(offset);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn visit_tx_end(&mut self) -> std::result::Result<(), Self::Error> {
|
|
self.committed_state.replay_end_tx().map_err(Into::into)
|
|
}
|
|
}
|
|
|
|
/// A `CommittedState` under construction during replay.
|
|
pub struct ReplayCommittedState<'cs> {
|
|
/// The committed state being constructed.
|
|
state: RwLockWriteGuard<'cs, CommittedState>,
|
|
|
|
/// Whether the table was dropped within the current transaction during replay.
|
|
///
|
|
/// While processing a transaction which drops a table, we'll first see the `st_table` delete,
|
|
/// then a series of deletes from the table itself.
|
|
/// We track the table's ID here so we know to ignore the deletes.
|
|
///
|
|
/// Cleared after the end of processing each transaction,
|
|
/// as it should be impossible to ever see another reference to the table after that point.
|
|
replay_table_dropped: IntSet<TableId>,
|
|
|
|
/// Rows within `st_column` which should be ignored during replay
|
|
/// due to having been superseded by a new row representing the same column.
|
|
///
|
|
/// During replay, we visit all of the inserts table-by-table, followed by all of the deletes table-by-table.
|
|
/// This means that, when multiple columns of a table change type within the same transaction,
|
|
/// we see all of the newly-inserted `st_column` rows first, and then later, all of the deleted rows.
|
|
/// We may even see inserts into the altered table before seeing the `st_column` deletes!
|
|
///
|
|
/// In order to maintain a proper view of the schema of tables during replay,
|
|
/// we must remember the old versions of the `st_column` rows when we insert the new ones,
|
|
/// so that we can respect only the new versions.
|
|
///
|
|
/// We insert into this set during [`Self::replay_insert`] of `st_column` rows
|
|
/// and delete from it during [`Self::replay_delete`] of `st_column` rows.
|
|
/// We assert this is empty at the end of each transaction.
|
|
replay_columns_to_ignore: HashSet<RowPointer>,
|
|
|
|
/// Set of tables whose `st_table` entries have been updated during the currently-replaying transaction,
|
|
/// mapped to the current most-recent `st_table` row.
|
|
///
|
|
/// When processing an insert to `st_table`, if the table already exists, we'll record it here.
|
|
/// Then, when we see a corresponding delete, we know that the table has not been dropped,
|
|
/// and so we won't delete the in-memory structure or insert its ID into [`Self::replay_table_dropped`].
|
|
///
|
|
/// When looking up the `st_table` row for a table, if it has an entry here,
|
|
/// that means there are two rows resident in `st_table` at this point in replay.
|
|
/// We return the row recorded here rather than inspecting `st_table`.
|
|
///
|
|
/// We remove from this set when we reach the matching delete,
|
|
/// and assert this set is empty at the end of each transaction.
|
|
///
|
|
/// [`RowPointer`]s from this set are passed to the `unsafe` [`Table::get_row_ref_unchecked`],
|
|
/// so it's important to properly maintain only [`RowPointer`]s to valid, extant, non-deleted rows.
|
|
replay_table_updated: IntMap<TableId, RowPointer>,
|
|
}
|
|
|
|
impl Deref for ReplayCommittedState<'_> {
|
|
type Target = CommittedState;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.state
|
|
}
|
|
}
|
|
|
|
impl DerefMut for ReplayCommittedState<'_> {
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
&mut self.state
|
|
}
|
|
}
|
|
|
|
impl<'cs> ReplayCommittedState<'cs> {
|
|
fn new(state: RwLockWriteGuard<'cs, CommittedState>) -> Self {
|
|
Self {
|
|
state,
|
|
replay_table_dropped: <_>::default(),
|
|
replay_columns_to_ignore: <_>::default(),
|
|
replay_table_updated: <_>::default(),
|
|
}
|
|
}
|
|
|
|
fn replay_insert(&mut self, table_id: TableId, schema: &Arc<TableSchema>, row: &ProductValue) -> Result<()> {
|
|
// Event table rows in the commitlog are preserved for future replay features
|
|
// but don't rebuild state — event tables have no committed state.
|
|
if schema.is_event {
|
|
return Ok(());
|
|
}
|
|
|
|
let (table, blob_store, pool) = self.get_table_and_blob_store_or_create(table_id, schema);
|
|
|
|
let (_, row_ref) = match table.insert(pool, blob_store, row) {
|
|
Ok(stuff) => stuff,
|
|
Err(InsertError::Duplicate(e)) => {
|
|
if is_built_in_meta_row(table_id, row)? {
|
|
// If this is a meta-descriptor for a system object,
|
|
// and it already exists exactly, then we can safely ignore the insert.
|
|
// Any error other than `Duplicate` means the commitlog
|
|
// has system table schemas which do not match our expectations,
|
|
// which is almost certainly an unrecoverable error.
|
|
return Ok(());
|
|
} else {
|
|
return Err(TableError::Duplicate(e).into());
|
|
}
|
|
}
|
|
Err(InsertError::Bflatn(e)) => return Err(TableError::Bflatn(e).into()),
|
|
Err(InsertError::IndexError(e)) => return Err(IndexError::UniqueConstraintViolation(e).into()),
|
|
};
|
|
|
|
// `row_ref` is treated as having a mutable borrow on `self`
|
|
// because it derives from `self.get_table_and_blob_store_or_create`,
|
|
// so we have to downgrade it to a pointer and then re-upgrade it again as an immutable row pointer later.
|
|
let row_ptr = row_ref.pointer();
|
|
|
|
if table_id == ST_TABLE_ID {
|
|
// For `st_table` inserts, we need to check if this is a new table or an update to an existing table.
|
|
// For new tables there's nothing more to do, as we'll automatically create it later on
|
|
// when we first `get_table_and_blob_store_or_create` on that table,
|
|
// but for updates to existing tables we need additional bookkeeping.
|
|
|
|
// Upgrade `row_ptr` back again, to break the mutable borrow.
|
|
let (table, blob_store, _) = self.get_table_and_blob_store(ST_TABLE_ID)?;
|
|
|
|
// Safety: We got `row_ptr` from a valid `RowRef` just above, and haven't done any mutations since,
|
|
// so it must still be valid.
|
|
let row_ref = unsafe { table.get_row_ref_unchecked(blob_store, row_ptr) };
|
|
|
|
if self.replay_does_table_already_exist(row_ref) {
|
|
// We've inserted a new `st_table` row for an existing table.
|
|
// We'll expect to see the previous row deleted later in this transaction.
|
|
// For now, mark the table as updated so that we don't confuse it for a deleted table in `replay_delete_by_rel`.
|
|
|
|
let st_table_row = StTableRow::try_from(row_ref)?;
|
|
let referenced_table_id = st_table_row.table_id;
|
|
self.replay_table_updated.insert(referenced_table_id, row_ptr);
|
|
self.reschema_table_for_st_table_update(st_table_row)?;
|
|
}
|
|
}
|
|
|
|
if table_id == ST_COLUMN_ID {
|
|
// We've made a modification to `st_column`.
|
|
// The type of a table has changed, so figure out which.
|
|
// The first column in `StColumnRow` is `table_id`.
|
|
let referenced_table_id = self.ignore_previous_versions_of_column(row, row_ptr)?;
|
|
self.st_column_changed(referenced_table_id)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Does another row other than `new_st_table_entry` exist in `st_table`
|
|
/// which refers to the same [`TableId`] as `new_st_table_entry`?
|
|
///
|
|
/// Used during [`Self::replay_insert`] of `st_table` rows to maintain [`Self::replay_table_updated`].
|
|
fn replay_does_table_already_exist(&self, new_st_table_entry: RowRef<'_>) -> bool {
|
|
fn get_table_id(row_ref: RowRef<'_>) -> TableId {
|
|
row_ref
|
|
.read_col(StTableFields::TableId)
|
|
.expect("`st_table` row should conform to `st_table` schema")
|
|
}
|
|
|
|
let referenced_table_id = get_table_id(new_st_table_entry);
|
|
self.iter_by_col_eq(ST_TABLE_ID, StTableFields::TableId, &referenced_table_id.into())
|
|
.expect("`st_table` should exist")
|
|
.any(|row_ref| row_ref.pointer() != new_st_table_entry.pointer())
|
|
}
|
|
|
|
/// Update the in-memory table structure for the table described by `row`,
|
|
/// in response to replay of a schema-altering migration.
|
|
fn reschema_table_for_st_table_update(&mut self, row: StTableRow) -> Result<()> {
|
|
// We only need to update if we've already constructed the in-memory table structure.
|
|
// If we haven't yet, then `self.get_table_and_blob_store_or_create` will see the correct schema
|
|
// when it eventually runs.
|
|
if let Ok((table, ..)) = self.get_table_and_blob_store_mut(row.table_id) {
|
|
table.with_mut_schema(|schema| -> Result<()> {
|
|
schema.table_access = row.table_access;
|
|
schema.primary_key = row.table_primary_key.map(|col_list| col_list.as_singleton().ok_or_else(|| anyhow::anyhow!("When replaying `st_column` update: `table_primary_key` should be a single column, but found {col_list:?}"))).transpose()?;
|
|
schema.table_name = row.table_name;
|
|
if row.table_type == schema.table_type {
|
|
Ok(())
|
|
} else {
|
|
Err(anyhow::anyhow!(
|
|
"When replaying `st_column` update: `table_type` should not have changed, but previous schema has {:?} and new schema has {:?}",
|
|
schema.table_type,
|
|
row.table_type,
|
|
).into())
|
|
}
|
|
})?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Mark all `st_column` rows which refer to the same column as `st_column_row`
|
|
/// other than the one at `row_pointer` as outdated
|
|
/// by storing them in [`Self::replay_columns_to_ignore`].
|
|
///
|
|
/// Returns the ID of the table to which `st_column_row` belongs.
|
|
fn ignore_previous_versions_of_column(
|
|
&mut self,
|
|
st_column_row: &ProductValue,
|
|
row_ptr: RowPointer,
|
|
) -> Result<TableId> {
|
|
let target_table_id = Self::read_table_id(st_column_row);
|
|
let target_col_id = ColId::deserialize(ValueDeserializer::from_ref(&st_column_row.elements[1]))
|
|
.expect("second field in `st_column` should decode to a `ColId`");
|
|
|
|
let outdated_st_column_rows = iter_st_column_for_table(self, &target_table_id.into())?
|
|
.filter_map(|row_ref| {
|
|
StColumnRow::try_from(row_ref)
|
|
.map(|c| (c.col_pos == target_col_id && row_ref.pointer() != row_ptr).then(|| row_ref.pointer()))
|
|
.transpose()
|
|
})
|
|
.collect::<Result<Vec<RowPointer>>>()?;
|
|
|
|
for row in outdated_st_column_rows {
|
|
self.replay_columns_to_ignore.insert(row);
|
|
}
|
|
|
|
Ok(target_table_id)
|
|
}
|
|
|
|
/// Refreshes the columns and layout of a table
|
|
/// when a `row` has been inserted from `st_column`.
|
|
///
|
|
/// The `row_ptr` is a pointer to `row`.
|
|
fn st_column_changed(&mut self, table_id: TableId) -> Result<()> {
|
|
let table_name = self.find_st_table_row(table_id)?.table_name;
|
|
|
|
// We're replaying and we don't have unique constraints yet.
|
|
// Due to replay handling all inserts first and deletes after,
|
|
// when processing `st_column` insert/deletes,
|
|
// we may end up with two definitions for the same `col_pos`.
|
|
// Of those two, we're interested in the one we just inserted
|
|
// and not the other one, as it is being replaced.
|
|
// `Self::ignore_previous_version_of_column` has marked the old version as ignored,
|
|
// so filter only the non-ignored columns.
|
|
let mut columns = iter_st_column_for_table(self, &table_id.into())?
|
|
.filter(|row_ref| !self.replay_columns_to_ignore.contains(&row_ref.pointer()))
|
|
.map(|row_ref| {
|
|
let row = StColumnRow::try_from(row_ref)?;
|
|
let mut column_schema = ColumnSchema::from(row);
|
|
let alias = self
|
|
.find_st_column_accessor_row(table_name.as_ref(), &column_schema.col_name)?
|
|
.map(|row| row.accessor_name);
|
|
column_schema.alias = alias;
|
|
Ok(column_schema)
|
|
})
|
|
.collect::<Result<Vec<_>>>()?;
|
|
|
|
// Columns in `st_column` are not in general sorted by their `col_pos`,
|
|
// though they will happen to be for tables which have never undergone migrations
|
|
// because their initial insertion order matches their `col_pos` order.
|
|
columns.sort_by_key(|col: &ColumnSchema| col.col_pos);
|
|
|
|
// Update the columns and layout of the the in-memory table.
|
|
if let Some(table) = self.tables.get_mut(&table_id) {
|
|
table.change_columns_to(columns).map_err(TableError::from)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn replay_delete_by_rel(&mut self, table_id: TableId, row: &ProductValue) -> Result<()> {
|
|
// (1) Table dropped? Avoid an error and just ignore the row instead.
|
|
if self.replay_table_dropped.contains(&table_id) {
|
|
return Ok(());
|
|
}
|
|
|
|
// Get the table for mutation.
|
|
let (table, blob_store, _, page_pool) = self.get_table_and_blob_store_mut(table_id)?;
|
|
|
|
// Delete the row.
|
|
let row_ptr = table
|
|
.delete_equal_row(page_pool, blob_store, row)
|
|
.map_err(TableError::Bflatn)?
|
|
.ok_or_else(|| anyhow!("Delete for non-existent row when replaying transaction"))?;
|
|
|
|
if table_id == ST_TABLE_ID {
|
|
let referenced_table_id = row
|
|
.elements
|
|
.get(StTableFields::TableId.col_idx())
|
|
.expect("`st_table` row should conform to `st_table` schema")
|
|
.as_u32()
|
|
.expect("`st_table` row should conform to `st_table` schema");
|
|
if self
|
|
.replay_table_updated
|
|
.remove(&TableId::from(*referenced_table_id))
|
|
.is_some()
|
|
{
|
|
// This delete is part of an update to an `st_table` row,
|
|
// i.e. earlier in this transaction we inserted a new version of the row.
|
|
// That means it's not a dropped table.
|
|
} else {
|
|
// A row was removed from `st_table`, so a table was dropped.
|
|
// Remove that table from the in-memory structures.
|
|
let dropped_table_id = Self::read_table_id(row);
|
|
// It's safe to ignore the case where we don't have an in-memory structure for the deleted table.
|
|
// This can happen if a table is initially empty at the snapshot or its creation,
|
|
// and never has any rows inserted into or deleted from it.
|
|
self.tables.remove(&dropped_table_id);
|
|
|
|
// Mark the table as dropped so that when
|
|
// processing row deletions for that table later,
|
|
// they are simply ignored in (1).
|
|
self.replay_table_dropped.insert(dropped_table_id);
|
|
}
|
|
}
|
|
|
|
if table_id == ST_COLUMN_ID {
|
|
// We may have reached the corresponding delete to an insert in `st_column`
|
|
// as the result of a column-type-altering migration.
|
|
// Now that the outdated `st_column` row isn't present any more,
|
|
// we can stop ignoring it.
|
|
//
|
|
// It's also possible that we're deleting this column as the result of a deleted table,
|
|
// and that there wasn't any corresponding insert at all.
|
|
// If that's the case, `row_ptr` won't be in `self.replay_columns_to_ignore`,
|
|
// which is fine.
|
|
self.replay_columns_to_ignore.remove(&row_ptr);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Assuming that a `TableId` is stored as the first field in `row`, read it.
|
|
fn read_table_id(row: &ProductValue) -> TableId {
|
|
TableId::deserialize(ValueDeserializer::from_ref(&row.elements[0]))
|
|
.expect("first field in `st_column` should decode to a `TableId`")
|
|
}
|
|
|
|
fn replay_truncate(&mut self, table_id: TableId) -> Result<()> {
|
|
// (1) Table dropped? Avoid an error and just ignore the row instead.
|
|
if self.replay_table_dropped.contains(&table_id) {
|
|
return Ok(());
|
|
}
|
|
|
|
// Get the table for mutation.
|
|
let (table, blob_store, ..) = self.get_table_and_blob_store_mut(table_id)?;
|
|
|
|
// We do not need to consider a truncation of `st_table` itself,
|
|
// as if that happens, the database is bricked.
|
|
|
|
table.clear(blob_store);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn replay_end_tx(&mut self) -> Result<()> {
|
|
self.next_tx_offset += 1;
|
|
|
|
if !self.replay_columns_to_ignore.is_empty() {
|
|
return Err(anyhow::anyhow!(
|
|
"`CommittedState::replay_columns_to_ignore` should be empty at the end of a commit, but found {} entries",
|
|
self.replay_columns_to_ignore.len(),
|
|
).into());
|
|
}
|
|
|
|
if !self.replay_table_updated.is_empty() {
|
|
return Err(anyhow::anyhow!(
|
|
"`CommittedState::replay_table_updated` should be empty at the end of a commit, but found {} entries",
|
|
self.replay_table_updated.len(),
|
|
)
|
|
.into());
|
|
}
|
|
|
|
// Any dropped tables should be fully gone by the end of a transaction;
|
|
// if we see any reference to them in the future we should error, not ignore.
|
|
self.replay_table_dropped.clear();
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl StateView for ReplayCommittedState<'_> {
|
|
/// Find the `st_table` row for `table_id`,
|
|
/// first inspecting [`Self::replay_table_updated`],
|
|
/// then falling back to [`CommittedState::iter_by_col_eq`].
|
|
fn find_st_table_row(&self, table_id: TableId) -> Result<StTableRow> {
|
|
if let Some(row_ptr) = self.replay_table_updated.get(&table_id) {
|
|
let (table, blob_store, _) = self.state.get_table_and_blob_store(table_id)?;
|
|
// SAFETY: `row_ptr` is stored in `self.replay_table_updated`,
|
|
// meaning it was inserted into `st_table` by `replay_insert`
|
|
// and has not yet been deleted by `replay_delete_by_rel`.
|
|
let row_ref = unsafe { table.get_row_ref_unchecked(blob_store, *row_ptr) };
|
|
StTableRow::try_from(row_ref)
|
|
} else {
|
|
self.state.find_st_table_row(table_id)
|
|
}
|
|
}
|
|
|
|
type Iter<'a>
|
|
= <CommittedState as StateView>::Iter<'a>
|
|
where
|
|
Self: 'a;
|
|
|
|
type IterByColRange<'a, R: RangeBounds<AlgebraicValue>>
|
|
= <CommittedState as StateView>::IterByColRange<'a, R>
|
|
where
|
|
Self: 'a;
|
|
|
|
type IterByColEq<'a, 'r>
|
|
= <CommittedState as StateView>::IterByColEq<'a, 'r>
|
|
where
|
|
Self: 'a;
|
|
|
|
fn get_schema(&self, table_id: TableId) -> Option<&Arc<TableSchema>> {
|
|
self.state.get_schema(table_id)
|
|
}
|
|
|
|
fn table_row_count(&self, table_id: TableId) -> Option<u64> {
|
|
self.state.table_row_count(table_id)
|
|
}
|
|
|
|
fn iter(&self, table_id: TableId) -> Result<Self::Iter<'_>> {
|
|
self.state.iter(table_id)
|
|
}
|
|
|
|
fn iter_by_col_range<R: RangeBounds<AlgebraicValue>>(
|
|
&self,
|
|
table_id: TableId,
|
|
cols: ColList,
|
|
range: R,
|
|
) -> Result<Self::IterByColRange<'_, R>> {
|
|
self.state.iter_by_col_range(table_id, cols, range)
|
|
}
|
|
|
|
fn iter_by_col_eq<'a, 'r>(
|
|
&'a self,
|
|
table_id: TableId,
|
|
cols: impl Into<ColList>,
|
|
value: &'r AlgebraicValue,
|
|
) -> Result<Self::IterByColEq<'a, 'r>> {
|
|
self.state.iter_by_col_eq(table_id, cols, value)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::{
|
|
locking_tx_datastore::datastore::tests::{all_rows, begin_mut_tx, commit, setup_event_table, u32_str_u32},
|
|
traits::{MutTx as _, MutTxDatastore as _},
|
|
};
|
|
use spacetimedb_lib::error::ResultTest;
|
|
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_event_table_replay_ignores_inserts() -> ResultTest<()> {
|
|
let (datastore, tx, table_id) = setup_event_table()?;
|
|
// Commit the table-creation tx so the schema exists.
|
|
commit(&datastore, tx)?;
|
|
|
|
// Get the schema for this event table.
|
|
let tx = begin_mut_tx(&datastore);
|
|
let schema = datastore.schema_for_table_mut_tx(&tx, table_id)?;
|
|
let _ = datastore.rollback_mut_tx(tx);
|
|
|
|
// Directly call replay_insert on committed state.
|
|
let row = u32_str_u32(1, "Carol", 40);
|
|
{
|
|
let state = datastore.committed_state.write();
|
|
let mut committed_state = ReplayCommittedState::new(state);
|
|
committed_state.replay_insert(table_id, &schema, &row)?;
|
|
}
|
|
|
|
// After replay, the event table should still have no committed rows.
|
|
let tx = begin_mut_tx(&datastore);
|
|
assert_eq!(
|
|
all_rows(&datastore, &tx, table_id).len(),
|
|
0,
|
|
"replay_insert should be a no-op for event tables"
|
|
);
|
|
Ok(())
|
|
}
|
|
}
|