mirror of
https://github.com/clockworklabs/SpacetimeDB.git
synced 2026-05-06 07:26:43 -04:00
Add support for bytes key btree indices (#4733)
# Description of Changes Add support for btree indices where the keys are encoded byte strings for e.g., multi-column indices of no-unbounded-types (arrays and strings) that aren't floats. The main interesting stuff in this PR is in `bytes_key.rs` which defines `RangeCompatBytesKey`, a type that is derived from `BytesKey`, by converting little-endian encoded integers to big-endian. Signed integers are now also supported, but floats are not. `table_index/mod.rs` also includes a bunch of interesting stuff. # API and ABI breaking changes Technically this fixes pre-existing bugs in the handling of `Excluded` ranges for multi-col indices. # Expected complexity level and risk 2? # Testing - A proptest `order_in_bsatn_is_preserved` is now adjusted and enabled to exercise the ordering of `RangeCompatBytesKey`. - A proptest `btree_multi_col_range_scans_work` is added to check the behavior of range scans on multi-col indices. --------- Co-authored-by: joshua-spacetime <josh@clockworklabs.io>
This commit is contained in:
committed by
GitHub
parent
86b3ac1453
commit
e7294bf2e8
@@ -23,7 +23,7 @@ use spacetimedb_lib::{http as st_http, ConnectionId, Identity, Timestamp};
|
||||
use spacetimedb_primitives::{ColId, ColList, IndexId, TableId};
|
||||
use spacetimedb_sats::{
|
||||
bsatn::{self, ToBsatn},
|
||||
buffer::{CountWriter, TeeWriter},
|
||||
buffer::CountWriter,
|
||||
AlgebraicValue, ProductValue,
|
||||
};
|
||||
use spacetimedb_schema::identifier::Identifier;
|
||||
@@ -330,16 +330,16 @@ impl InstanceEnv {
|
||||
fn project_cols_bsatn(buffer: &mut [u8], cols: ColList, row_ref: RowRef<'_>) -> usize {
|
||||
// We get back a col-list with the columns with generated values.
|
||||
// Write those back to `buffer` and then the encoded length to `row_len`.
|
||||
let counter = CountWriter::default();
|
||||
let mut writer = TeeWriter::new(counter, buffer);
|
||||
for col in cols.iter() {
|
||||
// Read the column value to AV and then serialize.
|
||||
let val = row_ref
|
||||
.read_col::<AlgebraicValue>(col)
|
||||
.expect("reading col as AV never panics");
|
||||
bsatn::to_writer(&mut writer, &val).unwrap();
|
||||
}
|
||||
writer.w1.finish()
|
||||
let (_, count) = CountWriter::run(buffer, |writer| {
|
||||
for col in cols.iter() {
|
||||
// Read the column value to AV and then serialize.
|
||||
let val = row_ref
|
||||
.read_col::<AlgebraicValue>(col)
|
||||
.expect("reading col as AV never panics");
|
||||
bsatn::to_writer(writer, &val).unwrap();
|
||||
}
|
||||
});
|
||||
count
|
||||
}
|
||||
|
||||
pub fn insert(&self, table_id: TableId, buffer: &mut [u8]) -> Result<usize, NodesError> {
|
||||
|
||||
@@ -514,7 +514,8 @@ impl CommittedState {
|
||||
|
||||
let index = table.new_index(&algo, is_unique)?;
|
||||
// SAFETY: `index` was derived from `table`.
|
||||
unsafe { table.insert_index(blob_store, index_id, index) };
|
||||
unsafe { table.insert_index(blob_store, index_id, index) }
|
||||
.expect("rebuilding should not cause constraint violations");
|
||||
index_id_map.insert(index_id, table_id);
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -1502,6 +1502,7 @@ impl MutTxId {
|
||||
.map_err(|IndexCannotSeekRange| IndexError::IndexCannotSeekRange(index_id))?;
|
||||
IndexScanPointOrRange::Range(iter)
|
||||
}
|
||||
PointOrRange::Unsupported => return Err(IndexError::IndexCannotSeekRange(index_id).into()),
|
||||
};
|
||||
Ok((table_id, iter))
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::array_value::{ArrayValueIntoIter, ArrayValueIterCloned};
|
||||
use crate::{de, AlgebraicValue, SumValue};
|
||||
use crate::{de, AlgebraicValue, ProductValue, SumValue};
|
||||
use crate::{i256, u256};
|
||||
use derive_more::From;
|
||||
|
||||
@@ -23,6 +23,11 @@ impl ValueDeserializer {
|
||||
// SAFETY: The conversion is OK due to `repr(transparent)`.
|
||||
unsafe { &*(val as *const AlgebraicValue as *const ValueDeserializer) }
|
||||
}
|
||||
|
||||
pub fn from_product_ref(prod: &ProductValue) -> RefProductAccess<'_> {
|
||||
let vals = prod.elements.iter();
|
||||
RefProductAccess { vals }
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors that can occur when deserializing the `AlgebraicValue`.
|
||||
@@ -348,7 +353,7 @@ impl<'de> de::Deserializer<'de> for &'de ValueDeserializer {
|
||||
}
|
||||
|
||||
/// Defines deserialization for [`&'de ValueDeserializer`] where product elements are in the input.
|
||||
struct RefProductAccess<'a> {
|
||||
pub struct RefProductAccess<'a> {
|
||||
/// The element values of the product as an iterator of borrowed values.
|
||||
vals: std::slice::Iter<'a, AlgebraicValue>,
|
||||
}
|
||||
|
||||
@@ -329,6 +329,14 @@ pub struct CountWriter {
|
||||
}
|
||||
|
||||
impl CountWriter {
|
||||
/// Run `work` on `writer`, but also count the number of bytes written.
|
||||
pub fn run<W: BufWriter, R>(writer: W, work: impl FnOnce(&mut TeeWriter<W, CountWriter>) -> R) -> (R, usize) {
|
||||
let counter = Self::default();
|
||||
let mut writer = TeeWriter::new(writer, counter);
|
||||
let ret = work(&mut writer);
|
||||
(ret, writer.w2.finish())
|
||||
}
|
||||
|
||||
/// Consumes the counter and returns the final count.
|
||||
pub fn finish(self) -> usize {
|
||||
self.num_bytes
|
||||
|
||||
@@ -259,6 +259,17 @@ impl_deserialize!(
|
||||
de => Vec::<T>::validate(de)
|
||||
);
|
||||
|
||||
/// The visitor merely valiates the slice.
|
||||
struct ValidatingSliceVisitor;
|
||||
|
||||
impl<T: ToOwned + ?Sized> SliceVisitor<'_, T> for ValidatingSliceVisitor {
|
||||
type Output = ();
|
||||
|
||||
fn visit<E: Error>(self, _: &T) -> Result<Self::Output, E> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The visitor converts the slice to its owned version.
|
||||
struct OwnedSliceVisitor;
|
||||
|
||||
@@ -293,8 +304,16 @@ impl<const N: usize> SliceVisitor<'_, [u8]> for ByteArrayVisitor<N> {
|
||||
}
|
||||
}
|
||||
|
||||
impl_deserialize!([] &'de str, de => de.deserialize_str(BorrowedSliceVisitor));
|
||||
impl_deserialize!([] &'de [u8], de => de.deserialize_bytes(BorrowedSliceVisitor));
|
||||
impl_deserialize!(
|
||||
[] &'de str,
|
||||
de => de.deserialize_str_slice(),
|
||||
de => de.deserialize_str(ValidatingSliceVisitor)
|
||||
);
|
||||
impl_deserialize!(
|
||||
[] &'de [u8],
|
||||
de => de.deserialize_bytes(BorrowedSliceVisitor),
|
||||
de => de.deserialize_bytes(ValidatingSliceVisitor)
|
||||
);
|
||||
|
||||
/// The visitor returns the slice as-is and borrowed.
|
||||
pub(crate) struct BorrowedSliceVisitor;
|
||||
@@ -610,7 +629,7 @@ impl<'de> DeserializeSeed<'de> for WithTypespace<'_, AlgebraicType> {
|
||||
AlgebraicType::U256 => u256::validate(de),
|
||||
AlgebraicType::F32 => f32::validate(de),
|
||||
AlgebraicType::F64 => f64::validate(de),
|
||||
AlgebraicType::String => <Box<str>>::validate(de),
|
||||
AlgebraicType::String => <&str>::validate(de),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,6 +69,14 @@ impl From<ColId> for InvalidFieldError {
|
||||
}
|
||||
|
||||
impl ProductValue {
|
||||
/// Pushes a single value to he product.
|
||||
pub fn push(self, val: impl Into<AlgebraicValue>) -> Self {
|
||||
let mut vals: Vec<_> = self.elements.into();
|
||||
vals.reserve(1);
|
||||
vals.push(val.into());
|
||||
Self::from(vals)
|
||||
}
|
||||
|
||||
/// Borrow the value at field of `self` identified by `col_pos`.
|
||||
///
|
||||
/// The `name` is non-functional and is only used for error-messages.
|
||||
|
||||
@@ -763,7 +763,7 @@ fn make_table_with_index<R: IndexedRow>(unique: bool) -> (Table, IndexId) {
|
||||
let algo = BTreeAlgorithm { columns: cols }.into();
|
||||
let idx = tbl.new_index(&algo, unique).unwrap();
|
||||
// SAFETY: index was derived from the table.
|
||||
unsafe { tbl.insert_index(&NullBlobStore, index_id, idx) };
|
||||
unsafe { tbl.insert_index(&NullBlobStore, index_id, idx) }.unwrap();
|
||||
|
||||
(tbl, index_id)
|
||||
}
|
||||
|
||||
@@ -12,3 +12,4 @@ cc 1f295db61a02ac3378f5ffcceb084637d2391bcc1758af6fb2df8355a713e998 # shrinks to
|
||||
cc 776d142680b35d7dad5b558fea7071b095f7e6a23c8549e9b32b452d5eebf92b # shrinks to (ty, val) = (ProductType { elements: [ProductTypeElement { name: None, algebraic_type: Builtin(String) }] }, ProductValue { elements: [String("\u{16af0}a®ਲ𒒀A 𑌅 ಎ꒐𑍇A A𐫫Aⷀ𑌵ૠ\u{b55} aㄱ \u{f99}a ")] })
|
||||
cc 66d99531b8e513d0fd558f492f708d110e1e117dfc7f3f42188bcc57c23bb89e # shrinks to (ty, val) = (ProductType { elements: [ProductTypeElement { name: None, algebraic_type: Builtin(Map(MapType { key_ty: Builtin(U8), ty: Builtin(Map(MapType { key_ty: Builtin(I32), ty: Builtin(F32) })) })) }] }, ProductValue { elements: [Map({U8(0): Map({I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(-0.0)), I32(-1): F32(Total(-0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0))}), U8(1): Map({I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(-0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(4): F32(Total(0.0)), I32(5): F32(Total(0.0)), I32(6): F32(Total(0.0)), I32(7): F32(Total(0.0))}), U8(2): Map({I32(-3): F32(Total(-0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(-0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0))}), U8(3): Map({I32(-10): F32(Total(0.0)), I32(-9): F32(Total(0.0)), I32(-8): F32(Total(-0.0)), I32(-7): F32(Total(0.0)), I32(-6): F32(Total(0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0))}), U8(4): Map({I32(-7): F32(Total(0.0)), I32(-6): F32(Total(0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0))}), U8(5): Map({I32(-9): F32(Total(0.0)), I32(-8): F32(Total(0.0)), I32(-7): F32(Total(0.0)), I32(-6): F32(Total(0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(4): F32(Total(0.0)), I32(5): F32(Total(0.0))}), U8(6): Map({I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0))}), U8(7): Map({I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(-0.0)), I32(3): F32(Total(0.0))}), U8(8): Map({I32(-7): F32(Total(0.0)), I32(-6): F32(Total(-0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(4): F32(Total(0.0)), I32(5): F32(Total(0.0)), I32(6): F32(Total(-0.0)), I32(7): F32(Total(0.0))}), U8(9): Map({I32(-1349171619): F32(Total(418648100.0)), I32(-665792478): F32(Total(-5.3081414e23)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(5): F32(Total(-0.0)), I32(906732021): F32(Total(1.952517e16)), I32(1965197035): F32(Total(1020.84216))}), U8(11): Map({I32(-7): F32(Total(0.0)), I32(-6): F32(Total(0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(4): F32(Total(0.0)), I32(5): F32(Total(0.0)), I32(6): F32(Total(0.0))})})] })
|
||||
cc 7f478c4dd0f24e715a74949c6d06af8ca2b4c8b82fae4f53c953a2b323cff851 # shrinks to (ty, val) = (ProductType { elements: [ProductTypeElement { name: None, algebraic_type: Builtin(Array(ArrayType { elem_ty: Builtin(Map(MapType { key_ty: Builtin(U64), ty: Builtin(Bool) })) })) }] }, ProductValue { elements: [Array([{U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false), U64(11): Bool(false), U64(12): Bool(false), U64(13): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false), U64(11): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false), U64(11): Bool(false), U64(12): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false)}])] })
|
||||
cc 01bfd4449bee7eaa0b61b60792baed8d52d3589f4a5bb313bf057194a6248a83
|
||||
|
||||
@@ -10,3 +10,5 @@ cc c1e4c959a32f6ab8ef9c4e29d39a24ec47cb03524584606a7f1fa4563f0f8cca # shrinks to
|
||||
cc 4cb325be8b24c9efa5b1f20b9504d044d9dd110eb9e99355de4ca42f9cfc20b4 # shrinks to (ty, cols, pv) = (ProductType {None: Sum(SumType {"variant_0": Product(ProductType {})})}, [ColId(0)], ProductValue { elements: [Sum(SumValue { tag: 0, value: Product(ProductValue { elements: [] }) })] }), is_unique = false
|
||||
cc a166a3c619c7cae3938f4e0cfb4e7a96cddfbb7943efd0b74e8cbb99d7a1e6a8 # shrinks to (ty, cols, pv) = (ProductType {None: U8}, [ColId(0)], ProductValue { elements: [U8(0)] }), kind = Direct
|
||||
cc 05390c104810e7086fa5d3f3cac7f491a377ae6ba64431661fd94662e28d1fca # shrinks to (ty, cols, pv) = (ProductType {None: Sum(SumType {"variant_0": Product(ProductType {})})}, [ColId(0)], ProductValue { elements: [Sum(SumValue { tag: 0, value: Product(ProductValue { elements: [] }) })] }), kind = Direct
|
||||
cc 3b8115315ff39f3268c02ccd659b82444e6d0b12aae7e6c0feba956875dab5ab # shrinks to is_unique = false, (prefix_ty, prefix_val) = (Array(ArrayType { elem_ty: Product(ProductType {"field_0": String}) }), Array([ProductValue { elements: [String("")] }])), (middle_ty, [start, middle, end]) = (Bool, [Bool(false), Bool(false), Bool(false)]), (suffix_ty, suffix_val) = (Bool, Bool(false))
|
||||
cc aa8f01aec687cbe6ad36acc77c8c484ccd323a53dca132c6eb490104563d17af # shrinks to is_unique = false, index_kind = BTree, (prefix_ty, prefix_val) = (Bool, Bool(false)), (middle_ty, [included, excluded]) = (Product(ProductType {}), [Product(ProductValue { elements: [] }), Product(ProductValue { elements: [] })]), (suffix_ty, suffix_val) = (Bool, Bool(false))
|
||||
|
||||
+62
-30
@@ -1413,36 +1413,57 @@ impl Table {
|
||||
/// # Safety
|
||||
///
|
||||
/// Caller must promise that `index` was constructed with the same row type/layout as this table.
|
||||
pub unsafe fn insert_index(&mut self, blob_store: &dyn BlobStore, index_id: IndexId, mut index: TableIndex) {
|
||||
pub unsafe fn insert_index(
|
||||
&mut self,
|
||||
blob_store: &dyn BlobStore,
|
||||
index_id: IndexId,
|
||||
mut index: TableIndex,
|
||||
) -> Result<(), String> {
|
||||
let rows = self.scan_rows(blob_store);
|
||||
// SAFETY: Caller promised that table's row type/layout
|
||||
// matches that which `index` was constructed with.
|
||||
// It follows that this applies to any `rows`, as required.
|
||||
let violation = unsafe { index.build_from_rows(rows) };
|
||||
violation.unwrap_or_else(|ptr| {
|
||||
let index_schema = &self.schema.indexes.iter().find(|index_schema| index_schema.index_id == index_id).expect("Index should exist");
|
||||
let indexed_column = if let IndexAlgorithm::BTree(BTreeAlgorithm { columns }) = &index_schema.index_algorithm {
|
||||
Some(columns)
|
||||
} else { None };
|
||||
let indexed_column = indexed_column.and_then(|columns| columns.as_singleton());
|
||||
let indexed_column_info = indexed_column.and_then(|column| self.schema.get_column(column.idx()));
|
||||
violation.map_err(|ptr| {
|
||||
// SAFETY: `ptr` just came out of `self.scan_rows`, so it is present.
|
||||
let row = unsafe { self.get_row_ref_unchecked(blob_store, ptr) }.to_product_value();
|
||||
panic!(
|
||||
"Adding index `{}` {:?} to table `{}` {:?} on column `{}` {:?} should cause no unique constraint violations.
|
||||
|
||||
Found violation at pointer {ptr:?} to row {:?}.",
|
||||
index_schema.index_name,
|
||||
index_schema.index_id,
|
||||
self.schema.table_name,
|
||||
self.schema.table_id,
|
||||
indexed_column_info.map(|column| &column.col_name[..]).unwrap_or("unknown column"),
|
||||
indexed_column,
|
||||
row,
|
||||
);
|
||||
});
|
||||
if let Some(index_schema) = self.schema.indexes.iter().find(|index_schema| index_schema.index_id == index_id) {
|
||||
let indexed_column = if let IndexAlgorithm::BTree(BTreeAlgorithm { columns }) = &index_schema.index_algorithm {
|
||||
Some(columns)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let indexed_column = indexed_column.and_then(|columns| columns.as_singleton());
|
||||
let indexed_column_info = indexed_column.and_then(|column| self.schema.get_column(column.idx()));
|
||||
|
||||
format!(
|
||||
"Adding index `{}` {:?} to table `{}` {:?} on column `{}` {:?} should cause no unique constraint violations.\
|
||||
Found violation at pointer {ptr:?} to row {:?}.",
|
||||
index_schema.index_name,
|
||||
index_schema.index_id,
|
||||
self.schema.table_name,
|
||||
self.schema.table_id,
|
||||
indexed_column_info.map(|column| &column.col_name[..]).unwrap_or("unknown column"),
|
||||
indexed_column,
|
||||
row,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"Adding index to table `{}` {:?} on columns `{:?}` with key type {:?} should cause no unique constraint violations.\
|
||||
Found violation at pointer {ptr:?} to row {:?}.",
|
||||
self.schema.table_name,
|
||||
self.schema.table_id,
|
||||
index.indexed_columns,
|
||||
index.key_type,
|
||||
row,
|
||||
)
|
||||
}
|
||||
})?;
|
||||
|
||||
// SAFETY: Forward caller requirement.
|
||||
unsafe { self.add_index(index_id, index) };
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Adds an index to the table without populating.
|
||||
@@ -2453,7 +2474,7 @@ pub(crate) mod test {
|
||||
|
||||
let index = table.new_index(&algo, true).unwrap();
|
||||
// SAFETY: Index was derived from `table`.
|
||||
unsafe { table.insert_index(&NullBlobStore, index_schema.index_id, index) };
|
||||
unsafe { table.insert_index(&NullBlobStore, index_schema.index_id, index) }.unwrap();
|
||||
|
||||
// Reserve a page so that we can check the hash.
|
||||
let pi = table.inner.pages.reserve_empty_page(&pool, table.row_size()).unwrap();
|
||||
@@ -2553,6 +2574,8 @@ pub(crate) mod test {
|
||||
ty: ProductType,
|
||||
vals: Vec<ProductValue>,
|
||||
indexed_columns: ColList,
|
||||
index_kind: IndexKind,
|
||||
is_unique: bool,
|
||||
) -> Result<(), TestCaseError> {
|
||||
let pool = PagePool::new_for_test();
|
||||
let mut blob_store = HashMapBlobStore::default();
|
||||
@@ -2565,13 +2588,13 @@ pub(crate) mod test {
|
||||
// We haven't added any indexes yet, so there should be 0 rows in indexes.
|
||||
prop_assert_eq!(table.num_rows_in_indexes(), 0);
|
||||
|
||||
let index_id = IndexId(0);
|
||||
let index_id = IndexId::SENTINEL;
|
||||
|
||||
let index = TableIndex::new(&ty, indexed_columns.clone(), IndexKind::BTree, false).unwrap();
|
||||
let index = TableIndex::new(&ty, indexed_columns.clone(), index_kind, is_unique).unwrap();
|
||||
// Add an index on column 0.
|
||||
// Safety:
|
||||
// We're using `ty` as the row type for both `table` and the new index.
|
||||
unsafe { table.insert_index(&blob_store, index_id, index) };
|
||||
prop_assume!(unsafe { table.insert_index(&blob_store, index_id, index) }.is_ok());
|
||||
|
||||
// We have one index, which should be fully populated,
|
||||
// so in total we should have the same number of rows in indexes as we have rows.
|
||||
@@ -2595,14 +2618,15 @@ pub(crate) mod test {
|
||||
let key_size_in_pvs = vals
|
||||
.iter()
|
||||
.map(|row| crate::table_index::KeySize::key_size_in_bytes(&row.project(&indexed_columns).unwrap()) as u64)
|
||||
.sum();
|
||||
.sum::<u64>();
|
||||
prop_assert_eq!(index.num_key_bytes(), key_size_in_pvs);
|
||||
|
||||
let index = TableIndex::new(&ty, indexed_columns, IndexKind::BTree, false).unwrap();
|
||||
// Add a duplicate of the same index, so we can check that all above quantities double.
|
||||
// Safety:
|
||||
// As above, we're using `ty` as the row type for both `table` and the new index.
|
||||
unsafe { table.insert_index(&blob_store, IndexId(1), index) };
|
||||
unsafe { table.insert_index(&blob_store, IndexId(1), index) }
|
||||
.expect("already inserted this index, should not error");
|
||||
|
||||
prop_assert_eq!(table.num_rows_in_indexes(), table.num_rows() * 2);
|
||||
prop_assert_eq!(table.bytes_used_by_index_keys(), key_size_in_pvs * 2);
|
||||
@@ -2722,13 +2746,21 @@ pub(crate) mod test {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn index_size_reporting_matches_slow_implementations_single_column((ty, vals) in generate_typed_row_vec(1..SIZE, 128, 2048)) {
|
||||
test_index_size_reporting(ty, vals, ColList::from(ColId(0)))?;
|
||||
fn index_size_reporting_matches_slow_implementations_single_column(
|
||||
(ty, vals) in generate_typed_row_vec(1..SIZE, 128, 2048),
|
||||
index_kind: IndexKind,
|
||||
is_unique: bool
|
||||
) {
|
||||
test_index_size_reporting(ty, vals, [0].into(), index_kind, is_unique)?;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn index_size_reporting_matches_slow_implementations_two_column((ty, vals) in generate_typed_row_vec(2..SIZE, 128, 2048)) {
|
||||
test_index_size_reporting(ty, vals, ColList::from([ColId(0), ColId(1)]))?;
|
||||
fn index_size_reporting_matches_slow_implementations_two_column(
|
||||
(ty, vals) in generate_typed_row_vec(2..SIZE, 128, 2048),
|
||||
index_kind: IndexKind,
|
||||
is_unique: bool
|
||||
) {
|
||||
test_index_size_reporting(ty, vals, [0, 1].into(), index_kind, is_unique)?
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -112,6 +112,8 @@ impl<K: Ord + KeySize> Index for BTreeIndex<K> {
|
||||
// `self.insert` always returns `Ok(_)`.
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const IS_RANGED: bool = true;
|
||||
}
|
||||
|
||||
impl<K: KeySize + Ord> BTreeIndex<K> {
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
use super::key_size::KeySize;
|
||||
use super::{DecodeResult, RowRef};
|
||||
use crate::indexes::RowPointer;
|
||||
use core::cmp::Ordering;
|
||||
use core::hash::{Hash, Hasher};
|
||||
use core::mem;
|
||||
use core::ops::Deref;
|
||||
use spacetimedb_lib::de::ProductVisitor;
|
||||
use spacetimedb_lib::ser::{SerializeSeqProduct, Serializer as _};
|
||||
use spacetimedb_memory_usage::MemoryUsage;
|
||||
use spacetimedb_primitives::ColList;
|
||||
use spacetimedb_sats::algebraic_value::de::{ValueDeserializeError, ValueDeserializer};
|
||||
use spacetimedb_sats::bsatn::{DecodeError, Deserializer, Serializer};
|
||||
use spacetimedb_sats::buffer::{CountWriter, TeeWriter};
|
||||
use spacetimedb_sats::de::{DeserializeSeed, Error as _};
|
||||
use spacetimedb_sats::{u256, AlgebraicType, AlgebraicValue, ProductTypeElement, Serialize as _, WithTypespace};
|
||||
use spacetimedb_sats::{
|
||||
i256, u256, AlgebraicType, AlgebraicValue, ProductTypeElement, ProductValue, Serialize as _, WithTypespace,
|
||||
};
|
||||
|
||||
/// A key for an all-primitive multi-column index
|
||||
/// serialized to a byte array.
|
||||
@@ -16,15 +26,62 @@ use spacetimedb_sats::{u256, AlgebraicType, AlgebraicValue, ProductTypeElement,
|
||||
/// which is the same as little-endian encoding of the keys for primitive types.
|
||||
///
|
||||
/// As we cannot have too many different `N`s,
|
||||
/// we have a few `N`s, where each is a power of 2.
|
||||
/// we have a few `N`s, where `N = 2^x - 1`.
|
||||
/// A key is then padded with zeroes to the nearest `N`.
|
||||
/// For example, a key `(x: u8, y: u16, z: u32)` for a 3-column index
|
||||
/// would have `N = 1 + 2 + 4 = 7` but would be padded to `N = 8`.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
|
||||
pub(super) struct BytesKey<const N: usize>([u8; N]);
|
||||
/// For example, a key `(y: u16, z: u32)` for a 2-column index
|
||||
/// would have `N = 2 + 4 = 6` but would be padded to `N = 7`.
|
||||
/// The reason for the `-1`, i.e., `N = 7` and not `N = 8`
|
||||
/// is because `length` takes up one byte.
|
||||
///
|
||||
/// The `length` stores the number of actual bytes used by the key.
|
||||
#[derive(Debug, Eq, Clone, Copy)]
|
||||
pub(super) struct BytesKey<const N: usize> {
|
||||
length: u8,
|
||||
bytes: [u8; N],
|
||||
}
|
||||
|
||||
impl<const N: usize> MemoryUsage for BytesKey<N> {}
|
||||
|
||||
impl<const N: usize> KeySize for BytesKey<N> {
|
||||
type MemoStorage = u64;
|
||||
|
||||
fn key_size_in_bytes(&self) -> usize {
|
||||
self.length as usize
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Deref for BytesKey<N> {
|
||||
type Target = [u8];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.bytes[0..self.length as usize]
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> PartialEq for BytesKey<N> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.deref() == other.deref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> PartialOrd for BytesKey<N> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Ord for BytesKey<N> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.deref().cmp(other.deref())
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Hash for BytesKey<N> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.deref().hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
/// A difference between btree indices and hash indices
|
||||
/// is that the former btree indices store keys and values separately,
|
||||
/// i.e., as `([K], [RowPointer])`
|
||||
@@ -32,10 +89,23 @@ impl<const N: usize> MemoryUsage for BytesKey<N> {}
|
||||
/// i.e., as `([K, RowPointer])`.
|
||||
///
|
||||
/// For hash indices, it's therefore profitable to ensure
|
||||
/// that the key and the value together fit into an `N` that is a power of 2.
|
||||
/// An `N` that is a power of 2 is well aligned around cache line sizes.
|
||||
pub(super) const fn size_sub_row_pointer(n: usize) -> usize {
|
||||
n - mem::size_of::<RowPointer>()
|
||||
/// that the key and the value together fit into an `N + 1` that is a power of 2.
|
||||
/// An `N + 1` that is a power of 2 is well aligned around cache line sizes.
|
||||
pub(super) const fn size_for_hash_bytes_key(n: usize) -> usize {
|
||||
size_for_btree_bytes_key(n) - mem::size_of::<RowPointer>()
|
||||
}
|
||||
|
||||
/// A difference between btree indices and hash indices
|
||||
/// is that the former btree indices store keys and values separately,
|
||||
/// i.e., as `([K], [RowPointer])`
|
||||
/// whereas hash indices store them together,
|
||||
/// i.e., as `([K, RowPointer])`.
|
||||
///
|
||||
/// For btree indices, it's therefore sufficient to enure
|
||||
/// that the key alone fits into an `N + 1` that is a power of 2.
|
||||
/// An `N + 1` that is a power of 2 is well aligned around cache line sizes.
|
||||
pub(super) const fn size_for_btree_bytes_key(n: usize) -> usize {
|
||||
n - 1
|
||||
}
|
||||
|
||||
/// Returns the number of bytes required at most to store a key at `ty`
|
||||
@@ -43,8 +113,9 @@ pub(super) const fn size_sub_row_pointer(n: usize) -> usize {
|
||||
///
|
||||
/// If keys at `ty` are incompatible with fixed byte keys,
|
||||
/// e.g., because they are of unbounded length,
|
||||
/// or because `is_ranged_idx` and `ty` contains a float,
|
||||
/// then `None` is returned.
|
||||
pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option<usize> {
|
||||
pub(super) fn required_bytes_key_size(ty: &AlgebraicType, is_ranged_idx: bool) -> Option<usize> {
|
||||
use AlgebraicType::*;
|
||||
|
||||
match ty {
|
||||
@@ -55,10 +126,18 @@ pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option<usize> {
|
||||
|
||||
// For sum, we report the greatest possible fixed size.
|
||||
// A key may be of variable size, a long as it fits within an upper bound.
|
||||
//
|
||||
// It's valid to use `RangeCompatBytesKey`-ified sums in range index,
|
||||
// i.e., when `is_range_idx`,
|
||||
// as `Ord for AlgebraicValue` delegates to `Ord for SumValue`
|
||||
// which compares the `tag` first and the payload (`value`) second,
|
||||
// The `RangeCompatBytesKey` encoding of sums places the `tag` first and the payload second.
|
||||
// When comparing two `[u8]` slices with encoded sums,
|
||||
// this produces an ordering that also compares the `tag` first and the payload second.
|
||||
Sum(ty) => {
|
||||
let mut max_size = 0;
|
||||
for var in &ty.variants {
|
||||
let variant_size = required_bytes_key_size(&var.algebraic_type)?;
|
||||
let variant_size = required_bytes_key_size(&var.algebraic_type, is_ranged_idx)?;
|
||||
max_size = max_size.max(variant_size);
|
||||
}
|
||||
// The sum tag is represented as a u8 in BSATN,
|
||||
@@ -70,11 +149,15 @@ pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option<usize> {
|
||||
Product(ty) => {
|
||||
let mut total_size = 0;
|
||||
for elem in &ty.elements {
|
||||
total_size += required_bytes_key_size(&elem.algebraic_type)?;
|
||||
total_size += required_bytes_key_size(&elem.algebraic_type, is_ranged_idx)?;
|
||||
}
|
||||
Some(total_size)
|
||||
}
|
||||
|
||||
// Floats are stored in IEEE 754 format,
|
||||
// so their byte representation is not order-preserving.
|
||||
F32 | F64 if is_ranged_idx => None,
|
||||
|
||||
// Primitives:
|
||||
Bool | U8 | I8 => Some(mem::size_of::<u8>()),
|
||||
U16 | I16 => Some(mem::size_of::<u16>()),
|
||||
@@ -85,7 +168,42 @@ pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option<usize> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Validates BSATN `byte` to conform to `seed`.
|
||||
///
|
||||
/// The BSATN can originate from untrusted sources, e.g., from module code.
|
||||
/// This also means that e.g., a `BytesKey` can be trusted to hold valid BSATN
|
||||
/// for the key type, which we can rely on in e.g., `decode_algebraic_value`,
|
||||
/// which isn't used in a context where it would be appropriate to fail.
|
||||
///
|
||||
/// Another reason to validate is that we wish for `BytesKey` to be strictly
|
||||
/// an optimization and not allow things that would be rejected by the non-optimized code.
|
||||
///
|
||||
/// After validating, we also don't need to validate that `bytes`
|
||||
/// will fit into e.g., a `BytesKey<N>`
|
||||
/// since if all parts that are encoded into it are valid according to a key type,
|
||||
/// then `bytes` cannot be longer than `N`.
|
||||
fn validate<'a, 'de, S: 'a + ?Sized>(seed: &'a S, mut bytes: &'de [u8]) -> DecodeResult<()>
|
||||
where
|
||||
WithTypespace<'a, S>: DeserializeSeed<'de>,
|
||||
{
|
||||
WithTypespace::empty(seed).validate(Deserializer::new(&mut bytes))?;
|
||||
|
||||
if !bytes.is_empty() {
|
||||
return Err(DecodeError::custom(format_args!(
|
||||
"after decoding, there are {} extra bytes",
|
||||
bytes.len()
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl<const N: usize> BytesKey<N> {
|
||||
fn new(length: usize, bytes: [u8; N]) -> Self {
|
||||
let length = length as _;
|
||||
Self { length, bytes }
|
||||
}
|
||||
|
||||
/// Decodes `self` as an [`AlgebraicValue`] at `key_type`.
|
||||
///
|
||||
/// An incorrect `key_type`,
|
||||
@@ -94,62 +212,29 @@ impl<const N: usize> BytesKey<N> {
|
||||
/// The method could also silently succeed
|
||||
/// if the passed `key_type` incidentally happens to be compatible the stored bytes in `self`.
|
||||
pub(super) fn decode_algebraic_value(&self, key_type: &AlgebraicType) -> AlgebraicValue {
|
||||
AlgebraicValue::decode(key_type, &mut self.0.as_slice())
|
||||
AlgebraicValue::decode(key_type, &mut self.deref())
|
||||
.expect("A `BytesKey` should by construction always deserialize to the right `key_type`")
|
||||
}
|
||||
|
||||
/// Ensure bytes of length `got` fit in `N` or return an error.
|
||||
fn ensure_key_fits(got: usize) -> DecodeResult<()> {
|
||||
if got > N {
|
||||
return Err(DecodeError::custom(format_args!(
|
||||
"key provided is too long, expected at most {N}, but got {got}"
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Decodes `prefix` and `endpoint` in BSATN to a [`BytesKey<N>`]
|
||||
/// by copying over both if they fit into the key.
|
||||
pub(super) fn from_bsatn_prefix_and_endpoint(
|
||||
prefix: &[u8],
|
||||
prefix_types: &[ProductTypeElement],
|
||||
endpoint: &[u8],
|
||||
range_type: &AlgebraicType,
|
||||
) -> DecodeResult<Self> {
|
||||
// Validate the BSATN.
|
||||
//
|
||||
// The BSATN can originate from untrusted sources, e.g., from module code.
|
||||
// This also means that a `BytesKey` can be trusted to hold valid BSATN
|
||||
// for the key type, which we can rely on in e.g., `decode_algebraic_value`,
|
||||
// which isn't used in a context where it would be appropriate to fail.
|
||||
//
|
||||
// Another reason to validate is that we wish for `BytesKey` to be strictly
|
||||
// an optimization and not allow things that would be rejected by the non-optimized code.
|
||||
WithTypespace::empty(prefix_types).validate(Deserializer::new(&mut { prefix }))?;
|
||||
WithTypespace::empty(range_type).validate(Deserializer::new(&mut { endpoint }))?;
|
||||
// Check that the `prefix` and the `endpoint` together fit into the key.
|
||||
let prefix_len = prefix.len();
|
||||
let endpoint_len = endpoint.len();
|
||||
Self::ensure_key_fits(prefix_len + endpoint_len)?;
|
||||
// Copy the `prefix` and the `endpoint` over.
|
||||
let mut arr = [0; N];
|
||||
arr[..prefix_len].copy_from_slice(prefix);
|
||||
arr[prefix_len..prefix_len + endpoint_len].copy_from_slice(endpoint);
|
||||
Ok(Self(arr))
|
||||
}
|
||||
|
||||
/// Decodes `bytes` in BSATN to a [`BytesKey<N>`]
|
||||
/// by copying over the bytes if they fit into the key.
|
||||
pub(super) fn from_bsatn(ty: &AlgebraicType, bytes: &[u8]) -> DecodeResult<Self> {
|
||||
// Validate the BSATN. See `Self::from_bsatn_prefix_and_endpoint` for more details.
|
||||
WithTypespace::empty(ty).validate(Deserializer::new(&mut { bytes }))?;
|
||||
// Check that the `bytes` fit into the key.
|
||||
let got = bytes.len();
|
||||
Self::ensure_key_fits(got)?;
|
||||
// Validate the BSATN.
|
||||
validate(ty, bytes)?;
|
||||
// Copy the bytes over.
|
||||
let got = bytes.len();
|
||||
let mut arr = [0; N];
|
||||
arr[..got].copy_from_slice(bytes);
|
||||
Ok(Self(arr))
|
||||
Ok(Self::new(got, arr))
|
||||
}
|
||||
|
||||
fn via_serializer(work: impl FnOnce(Serializer<'_, TeeWriter<&mut [u8], CountWriter>>)) -> Self {
|
||||
let mut bytes = [0; N];
|
||||
let (_, length) = CountWriter::run(bytes.as_mut_slice(), |writer| {
|
||||
let ser = Serializer::new(writer);
|
||||
work(ser)
|
||||
});
|
||||
Self::new(length, bytes)
|
||||
}
|
||||
|
||||
/// Serializes the columns `cols` in `row_ref` to a [`BytesKey<N>`].
|
||||
@@ -160,12 +245,10 @@ impl<const N: usize> BytesKey<N> {
|
||||
///
|
||||
/// SAFETY: Any `col` in `cols` is in-bounds of `row_ref`'s layout.
|
||||
pub(super) unsafe fn from_row_ref(cols: &ColList, row_ref: RowRef<'_>) -> Self {
|
||||
let mut arr = [0; N];
|
||||
let mut sink = arr.as_mut_slice();
|
||||
let ser = Serializer::new(&mut sink);
|
||||
unsafe { row_ref.serialize_columns_unchecked(cols, ser) }
|
||||
.expect("should've serialized a `row_ref` to BSATN successfully");
|
||||
Self(arr)
|
||||
Self::via_serializer(|ser| {
|
||||
unsafe { row_ref.serialize_columns_unchecked(cols, ser) }
|
||||
.expect("should've serialized a `row_ref` to BSATN successfully");
|
||||
})
|
||||
}
|
||||
|
||||
/// Serializes `av` to a [`BytesKey<N>`].
|
||||
@@ -174,25 +257,352 @@ impl<const N: usize> BytesKey<N> {
|
||||
/// will fit into `N` bytes when serialized into BSATN.
|
||||
/// The method panics otherwise.
|
||||
pub(super) fn from_algebraic_value(av: &AlgebraicValue) -> Self {
|
||||
let mut arr = [0; N];
|
||||
let mut sink = arr.as_mut_slice();
|
||||
let ser = Serializer::new(&mut sink);
|
||||
av.serialize_into_bsatn(ser)
|
||||
.expect("should've serialized an `AlgebraicValue` to BSATN successfully");
|
||||
Self(arr)
|
||||
Self::via_serializer(|ser| {
|
||||
av.serialize_into_bsatn(ser)
|
||||
.expect("should've serialized an `AlgebraicValue` to BSATN successfully")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// A key for an all-primitive multi-column index
|
||||
/// serialized to a byte array.
|
||||
///
|
||||
/// These keys are derived from [`BytesKey`]
|
||||
/// but are post-processed to work with ranges,
|
||||
/// unlike the former type,
|
||||
/// which only work with point indices (e.g., hash indices).
|
||||
///
|
||||
/// The post-processing converts how some types are stored in the encoding:
|
||||
/// - unsigned integer types `uN`, where `N > 8` from little-endian to big-endian.
|
||||
/// - signed integers are shifted such that `iN::MIN` is stored as `0`
|
||||
/// and `iN:MAX` is stored as `uN::MAX`.
|
||||
///
|
||||
/// The `length` stores the number of actual bytes used by the key.
|
||||
#[derive(Debug, Eq, Clone, Copy)]
|
||||
pub(super) struct RangeCompatBytesKey<const N: usize> {
|
||||
length: u8,
|
||||
bytes: [u8; N],
|
||||
}
|
||||
|
||||
impl<const N: usize> MemoryUsage for RangeCompatBytesKey<N> {}
|
||||
|
||||
impl<const N: usize> KeySize for RangeCompatBytesKey<N> {
|
||||
type MemoStorage = u64;
|
||||
|
||||
fn key_size_in_bytes(&self) -> usize {
|
||||
self.length as usize
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Deref for RangeCompatBytesKey<N> {
|
||||
type Target = [u8];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.bytes[0..self.length as usize]
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> PartialEq for RangeCompatBytesKey<N> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.deref() == other.deref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> PartialOrd for RangeCompatBytesKey<N> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Ord for RangeCompatBytesKey<N> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.deref().cmp(other.deref())
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> Hash for RangeCompatBytesKey<N> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.deref().hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
/// Splits `slice` into the first `N` bytes converting the former via `map_bytes`
|
||||
/// and returning the rest.
|
||||
fn split_map_write_back<const N: usize>(slice: &mut [u8], map_bytes: impl FnOnce([u8; N]) -> [u8; N]) -> &mut [u8] {
|
||||
let (bytes, rest) = slice.split_first_chunk_mut().unwrap();
|
||||
*bytes = map_bytes(*bytes);
|
||||
rest
|
||||
}
|
||||
|
||||
impl<const N: usize> RangeCompatBytesKey<N> {
|
||||
fn new(length: usize, bytes: [u8; N]) -> Self {
|
||||
let length = length as _;
|
||||
Self { length, bytes }
|
||||
}
|
||||
|
||||
/// Decodes `self` as an [`AlgebraicValue`] at `key_type`.
|
||||
///
|
||||
/// An incorrect `key_type`,
|
||||
/// i.e., one other than what was used when the index was created,
|
||||
/// may lead to a panic, but this is not guaranteed.
|
||||
/// The method could also silently succeed
|
||||
/// if the passed `key_type` incidentally happens to be compatible the stored bytes in `self`.
|
||||
pub(super) fn decode_algebraic_value(&self, key_type: &AlgebraicType) -> AlgebraicValue {
|
||||
Self::to_bytes_key(*self, key_type).decode_algebraic_value(key_type)
|
||||
}
|
||||
|
||||
/// Decodes `prefix` in BSATN to a [`RangeCompatBytesKey<N>`]
|
||||
/// by copying over `prefix` and massaging if they fit into the key.
|
||||
pub(super) fn from_bsatn_prefix(prefix: &[u8], prefix_types: &[ProductTypeElement]) -> DecodeResult<Self> {
|
||||
// Validate the BSATN.
|
||||
validate(prefix_types, prefix)?;
|
||||
|
||||
// Copy the `prefix` over.
|
||||
let mut bytes = [0; N];
|
||||
let got = prefix.len();
|
||||
bytes[..got].copy_from_slice(prefix);
|
||||
|
||||
// Massage the `bytes`.
|
||||
let mut slice = bytes.as_mut_slice();
|
||||
for ty in prefix_types {
|
||||
slice = Self::process_from_bytes_key(slice, &ty.algebraic_type);
|
||||
}
|
||||
|
||||
Ok(Self::new(got, bytes))
|
||||
}
|
||||
|
||||
/// Decodes `prefix` and `endpoint` in BSATN to a [`RangeCompatBytesKey<N>`]
|
||||
/// by copying over both and massaging if they fit into the key.
|
||||
pub(super) fn from_bsatn_prefix_and_endpoint(
|
||||
prefix: &[u8],
|
||||
prefix_types: &[ProductTypeElement],
|
||||
endpoint: &[u8],
|
||||
range_type: &AlgebraicType,
|
||||
) -> DecodeResult<Self> {
|
||||
// Validate the BSATN.
|
||||
validate(prefix_types, prefix)?;
|
||||
validate(range_type, endpoint)?;
|
||||
|
||||
// Sum up the lengths.
|
||||
let prefix_len = prefix.len();
|
||||
let endpoint_len = endpoint.len();
|
||||
let total_len = prefix_len + endpoint_len;
|
||||
|
||||
// Copy the `prefix` and the `endpoint` over.
|
||||
let mut bytes = [0; N];
|
||||
bytes[..prefix_len].copy_from_slice(prefix);
|
||||
bytes[prefix_len..total_len].copy_from_slice(endpoint);
|
||||
|
||||
// Massage the bytes.
|
||||
let mut slice = bytes.as_mut_slice();
|
||||
for ty in prefix_types {
|
||||
slice = Self::process_from_bytes_key(slice, &ty.algebraic_type);
|
||||
}
|
||||
Self::process_from_bytes_key(slice, range_type);
|
||||
|
||||
Ok(Self::new(total_len, bytes))
|
||||
}
|
||||
|
||||
/// Decodes `bytes` in BSATN to a [`RangeCompatBytesKey<N>`]
|
||||
/// by copying over the bytes if they fit into the key.
|
||||
pub(super) fn from_bsatn(ty: &AlgebraicType, bytes: &[u8]) -> DecodeResult<Self> {
|
||||
let key = BytesKey::from_bsatn(ty, bytes)?;
|
||||
Ok(Self::from_bytes_key(key, ty))
|
||||
}
|
||||
|
||||
/// Serializes the columns `cols` in `row_ref` to a [`BytesKey<N>`].
|
||||
///
|
||||
/// It's assumed that `row_ref` projected to `cols`
|
||||
/// will fit into `N` bytes when serialized into BSATN.
|
||||
/// The method panics otherwise.
|
||||
///
|
||||
/// SAFETY: Any `col` in `cols` is in-bounds of `row_ref`'s layout.
|
||||
pub(super) unsafe fn from_row_ref(cols: &ColList, row_ref: RowRef<'_>, ty: &AlgebraicType) -> Self {
|
||||
// SAFETY: same as caller requirements.
|
||||
let key = unsafe { BytesKey::from_row_ref(cols, row_ref) };
|
||||
Self::from_bytes_key(key, ty)
|
||||
}
|
||||
|
||||
/// Decodes `prefix` in `AlgebraicValue` form to a [`RangeCompatBytesKey<N>`]
|
||||
/// by serializing the prefix and massaging.
|
||||
pub(super) fn from_algebraic_value_prefix(
|
||||
prefix: &ProductValue,
|
||||
prefix_types: &[ProductTypeElement],
|
||||
) -> Result<Self, ValueDeserializeError> {
|
||||
// Validate the prefix.
|
||||
WithTypespace::empty(prefix_types).validate_seq_product(ValueDeserializer::from_product_ref(prefix))?;
|
||||
|
||||
// Serialize the `prefix` and the `endpoint` over.
|
||||
let bytes = BytesKey::via_serializer(|ser| {
|
||||
prefix
|
||||
.serialize(ser)
|
||||
.expect("should've serialized to BSATN successfully");
|
||||
});
|
||||
let BytesKey { mut bytes, length } = bytes;
|
||||
|
||||
// Massage the bytes.
|
||||
let mut slice = bytes.as_mut_slice();
|
||||
for ty in prefix_types {
|
||||
slice = Self::process_from_bytes_key(slice, &ty.algebraic_type);
|
||||
}
|
||||
|
||||
Ok(Self::new(length as usize, bytes))
|
||||
}
|
||||
|
||||
/// Decodes `prefix` and `endpoint` in `AlgebraicValue` form to a [`RangeCompatBytesKey<N>`]
|
||||
/// by serializing over both and massaging if they fit into the key.
|
||||
pub(super) fn from_algebraic_value_prefix_and_endpoint(
|
||||
prefix: &ProductValue,
|
||||
prefix_types: &[ProductTypeElement],
|
||||
endpoint: &AlgebraicValue,
|
||||
range_type: &AlgebraicType,
|
||||
) -> Result<Self, ValueDeserializeError> {
|
||||
// Validate the values.
|
||||
WithTypespace::empty(prefix_types).validate_seq_product(ValueDeserializer::from_product_ref(prefix))?;
|
||||
WithTypespace::empty(range_type).validate(ValueDeserializer::from_ref(endpoint))?;
|
||||
|
||||
// Serialize the `prefix` and the `endpoint` over.
|
||||
let bytes = BytesKey::via_serializer(|ser| {
|
||||
(|| {
|
||||
let mut ser = ser.serialize_seq_product(2)?;
|
||||
ser.serialize_element(&prefix)?;
|
||||
ser.serialize_element(&endpoint)?;
|
||||
ser.end()
|
||||
})()
|
||||
.expect("should've serialized to BSATN successfully");
|
||||
});
|
||||
let BytesKey { mut bytes, length } = bytes;
|
||||
|
||||
// Massage the bytes.
|
||||
let mut slice = bytes.as_mut_slice();
|
||||
for ty in prefix_types {
|
||||
slice = Self::process_from_bytes_key(slice, &ty.algebraic_type);
|
||||
}
|
||||
Self::process_from_bytes_key(slice, range_type);
|
||||
|
||||
Ok(Self::new(length as usize, bytes))
|
||||
}
|
||||
|
||||
/// Serializes `av` to a [`BytesKey<N>`].
|
||||
///
|
||||
/// It's assumed that `av`
|
||||
/// will fit into `N` bytes when serialized into BSATN.
|
||||
/// The method panics otherwise.
|
||||
pub(super) fn from_algebraic_value(av: &AlgebraicValue, ty: &AlgebraicType) -> Self {
|
||||
let key = BytesKey::from_algebraic_value(av);
|
||||
Self::from_bytes_key(key, ty)
|
||||
}
|
||||
|
||||
fn from_bytes_key(key: BytesKey<N>, ty: &AlgebraicType) -> Self {
|
||||
let BytesKey { length, mut bytes } = key;
|
||||
Self::process_from_bytes_key(bytes.as_mut_slice(), ty);
|
||||
Self { length, bytes }
|
||||
}
|
||||
|
||||
fn process_from_bytes_key<'a>(mut slice: &'a mut [u8], ty: &AlgebraicType) -> &'a mut [u8] {
|
||||
use AlgebraicType::*;
|
||||
match ty {
|
||||
// For sums, read the tag and process the active variant.
|
||||
Sum(ty) => {
|
||||
let (&mut tag, rest) = slice.split_first_mut().unwrap();
|
||||
let ty = &ty.variants[tag as usize].algebraic_type;
|
||||
Self::process_from_bytes_key(rest, ty)
|
||||
}
|
||||
// For products, just process each field in sequence.
|
||||
Product(ty) => {
|
||||
for ty in &ty.elements {
|
||||
slice = Self::process_from_bytes_key(slice, &ty.algebraic_type);
|
||||
}
|
||||
slice
|
||||
}
|
||||
// No need to do anything as these are only a single byte long.
|
||||
Bool | U8 => &mut slice[1..],
|
||||
// For unsigned integers, read them as LE and write back as BE.
|
||||
U16 => split_map_write_back(slice, |b| u16::from_le_bytes(b).to_be_bytes()),
|
||||
U32 => split_map_write_back(slice, |b| u32::from_le_bytes(b).to_be_bytes()),
|
||||
U64 => split_map_write_back(slice, |b| u64::from_le_bytes(b).to_be_bytes()),
|
||||
U128 => split_map_write_back(slice, |b| u128::from_le_bytes(b).to_be_bytes()),
|
||||
U256 => split_map_write_back(slice, |b| u256::from_le_bytes(b).to_be_bytes()),
|
||||
// For signed integers, read them as LE, make them unsigned, and write back as BE.
|
||||
I8 => split_map_write_back(slice, |b| i8::from_le_bytes(b).wrapping_sub(i8::MIN).to_be_bytes()),
|
||||
I16 => split_map_write_back(slice, |b| i16::from_le_bytes(b).wrapping_sub(i16::MIN).to_be_bytes()),
|
||||
I32 => split_map_write_back(slice, |b| i32::from_le_bytes(b).wrapping_sub(i32::MIN).to_be_bytes()),
|
||||
I64 => split_map_write_back(slice, |b| i64::from_le_bytes(b).wrapping_sub(i64::MIN).to_be_bytes()),
|
||||
I128 => split_map_write_back(slice, |b| i128::from_le_bytes(b).wrapping_sub(i128::MIN).to_be_bytes()),
|
||||
I256 => split_map_write_back(slice, |b| i256::from_le_bytes(b).wrapping_sub(i256::MIN).to_be_bytes()),
|
||||
// Refs don't exist here and
|
||||
// arrays and strings are of unbounded length.
|
||||
// For floats, we haven't considred them yet.
|
||||
Ref(_) | Array(_) | String | F32 | F64 => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_bytes_key(key: Self, ty: &AlgebraicType) -> BytesKey<N> {
|
||||
fn process<'a>(mut slice: &'a mut [u8], ty: &AlgebraicType) -> &'a mut [u8] {
|
||||
use AlgebraicType::*;
|
||||
match ty {
|
||||
// For sums, read the tag and process the active variant.
|
||||
Sum(ty) => {
|
||||
let (&mut tag, rest) = slice.split_first_mut().unwrap();
|
||||
let ty = &ty.variants[tag as usize].algebraic_type;
|
||||
process(rest, ty)
|
||||
}
|
||||
// For products, just process each field in sequence.
|
||||
Product(ty) => {
|
||||
for ty in &ty.elements {
|
||||
slice = process(slice, &ty.algebraic_type);
|
||||
}
|
||||
slice
|
||||
}
|
||||
// No need to do anything as these are only a single byte long.
|
||||
Bool | U8 => &mut slice[1..],
|
||||
// For unsigned integers, read them as BE and write back as LE.
|
||||
U16 => split_map_write_back(slice, |b| u16::from_be_bytes(b).to_le_bytes()),
|
||||
U32 => split_map_write_back(slice, |b| u32::from_be_bytes(b).to_le_bytes()),
|
||||
U64 => split_map_write_back(slice, |b| u64::from_be_bytes(b).to_le_bytes()),
|
||||
U128 => split_map_write_back(slice, |b| u128::from_be_bytes(b).to_le_bytes()),
|
||||
U256 => split_map_write_back(slice, |b| u256::from_be_bytes(b).to_le_bytes()),
|
||||
// For signed integers, read them as LE, make them unsigned, and write back as BE.
|
||||
I8 => split_map_write_back(slice, |b| i8::from_be_bytes(b).wrapping_add(i8::MIN).to_le_bytes()),
|
||||
I16 => split_map_write_back(slice, |b| i16::from_be_bytes(b).wrapping_add(i16::MIN).to_le_bytes()),
|
||||
I32 => split_map_write_back(slice, |b| i32::from_be_bytes(b).wrapping_add(i32::MIN).to_le_bytes()),
|
||||
I64 => split_map_write_back(slice, |b| i64::from_be_bytes(b).wrapping_add(i64::MIN).to_le_bytes()),
|
||||
I128 => split_map_write_back(slice, |b| i128::from_be_bytes(b).wrapping_add(i128::MIN).to_le_bytes()),
|
||||
I256 => split_map_write_back(slice, |b| i256::from_be_bytes(b).wrapping_add(i256::MIN).to_le_bytes()),
|
||||
// Refs don't exist here and
|
||||
// arrays and strings are of unbounded length.
|
||||
// For floats, we haven't considred them yet.
|
||||
Ref(_) | Array(_) | String | F32 | F64 => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
let Self { length, mut bytes } = key;
|
||||
process(bytes.as_mut_slice(), ty);
|
||||
BytesKey { length, bytes }
|
||||
}
|
||||
|
||||
/// Extend the length to `N` by filling with `u8::MAX`.
|
||||
pub(super) fn add_max_suffix(mut self) -> Self {
|
||||
let len = self.len();
|
||||
self.bytes[len..].fill(u8::MAX);
|
||||
self.length = N as u8;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use proptest::array::uniform;
|
||||
use proptest::prelude::*;
|
||||
use spacetimedb_sats::bsatn::to_len;
|
||||
use spacetimedb_sats::proptest::generate_typed_row;
|
||||
use spacetimedb_sats::proptest::{gen_with, generate_product_value, generate_row_type, generate_typed_row, SIZE};
|
||||
|
||||
const N: usize = 4096;
|
||||
const N: usize = u8::MAX as usize;
|
||||
|
||||
proptest! {
|
||||
#![proptest_config(ProptestConfig { max_global_rejects: 65536, ..<_>::default() })]
|
||||
|
||||
#[test]
|
||||
fn test_bytes_key_round_trip((ty, av) in generate_typed_row()) {
|
||||
let len = to_len(&av).unwrap();
|
||||
@@ -202,20 +612,14 @@ mod test {
|
||||
let av = AlgebraicValue::Product(av);
|
||||
let key = BytesKey::<N>::from_algebraic_value(&av);
|
||||
let decoded_av = key.decode_algebraic_value(&ty);
|
||||
assert_eq!(av, decoded_av);
|
||||
prop_assert_eq!(av, decoded_av);
|
||||
}
|
||||
|
||||
/*
|
||||
// This test turned out not to hold for integers larger than u8,
|
||||
// as BSATN stores them little-endian,
|
||||
// but `Ord for AlgebraicValue` compares them as big-endian.
|
||||
// It's included here for posterity and in case we'd like to
|
||||
// massage the BSATN before storing it in the `BytesKey`
|
||||
// to make it order-preserving.
|
||||
|
||||
use proptest::array::uniform;
|
||||
use spacetimedb_sats::proptest::{gen_with, generate_product_value, generate_row_type, SIZE};
|
||||
|
||||
/// This test does not hold for `BytesKey`
|
||||
/// as BSATN stores them little-endian,
|
||||
/// but `Ord for AlgebraicValue` compares them as big-endian.
|
||||
/// It does however hold for `RangeCompatBytesKey` which
|
||||
/// massages the BSATN to make it order-preserving.
|
||||
#[test]
|
||||
fn order_in_bsatn_is_preserved((ty, [r1, r2]) in gen_with(generate_row_type(0..=SIZE), |ty| uniform(generate_product_value(ty)))) {
|
||||
let ty: AlgebraicType = ty.into();
|
||||
@@ -223,17 +627,17 @@ mod test {
|
||||
let r2: AlgebraicValue = r2.into();
|
||||
|
||||
let Some(required) = required_bytes_key_size(&ty, true) else {
|
||||
//dbg!(&ty);
|
||||
return Err(TestCaseError::reject("type is incompatible with fixed byte keys in range indices"));
|
||||
};
|
||||
prop_assume!(required <= N);
|
||||
|
||||
let k1 = BytesKey::<N>::from_algebraic_value(&r1);
|
||||
let kr1 = RangeCompatBytesKey::from_bytes_key(k1, &ty);
|
||||
let k2 = BytesKey::<N>::from_algebraic_value(&r2);
|
||||
let ord_k = k1.cmp(&k2);
|
||||
let kr2 = RangeCompatBytesKey::from_bytes_key(k2, &ty);
|
||||
let ord_kr = kr1.cmp(&kr2);
|
||||
let ord_r = r1.cmp(&r2);
|
||||
prop_assert_eq!(ord_k, ord_r);
|
||||
prop_assert_eq!(ord_kr, ord_r);
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,6 +98,10 @@ impl<K: KeySize + Eq + Hash> Index for HashIndex<K> {
|
||||
self.map.len()
|
||||
}
|
||||
|
||||
fn num_key_bytes(&self) -> u64 {
|
||||
self.num_key_bytes
|
||||
}
|
||||
|
||||
fn num_rows(&self) -> usize {
|
||||
self.num_rows
|
||||
}
|
||||
@@ -114,6 +118,8 @@ impl<K: KeySize + Eq + Hash> Index for HashIndex<K> {
|
||||
// `self.insert` always returns `Ok(_)`.
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const IS_RANGED: bool = false;
|
||||
}
|
||||
|
||||
impl<K: KeySize + Eq + Hash> HashIndex<K> {
|
||||
|
||||
@@ -133,6 +133,9 @@ pub trait Index {
|
||||
/// The trait imposes no particular order.
|
||||
/// Implementations may provide a non-deterministic order.
|
||||
fn iter(&self) -> Self::Iter<'_>;
|
||||
|
||||
/// Whether the index is ranged or not.
|
||||
const IS_RANGED: bool;
|
||||
}
|
||||
|
||||
pub trait RangedIndex: Index {
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use crate::table_index::BytesKey;
|
||||
|
||||
use super::Index;
|
||||
use core::mem;
|
||||
use spacetimedb_memory_usage::MemoryUsage;
|
||||
@@ -218,7 +216,3 @@ impl KeySize for ArrayValue {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> KeySize for BytesKey<N> {
|
||||
type MemoStorage = ();
|
||||
}
|
||||
|
||||
+773
-211
File diff suppressed because it is too large
Load Diff
@@ -97,6 +97,8 @@ impl<K: Ord + KeySize> Index for UniqueBTreeIndex<K> {
|
||||
};
|
||||
Err(*found)
|
||||
}
|
||||
|
||||
const IS_RANGED: bool = true;
|
||||
}
|
||||
|
||||
impl<K: KeySize + Ord> UniqueBTreeIndex<K> {
|
||||
@@ -140,6 +142,7 @@ impl<K: KeySize + Ord> UniqueBTreeIndex<K> {
|
||||
}
|
||||
|
||||
/// An iterator over the potential value in a unique index for a given key.
|
||||
#[derive(Clone)]
|
||||
pub struct UniquePointIter {
|
||||
/// The iterator seeking for matching keys in the range.
|
||||
pub(super) iter: IntoIter<RowPointer>,
|
||||
|
||||
@@ -122,6 +122,8 @@ impl<K: ToFromUsize + KeySize> Index for UniqueDirectFixedCapIndex<K> {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const IS_RANGED: bool = true;
|
||||
}
|
||||
|
||||
impl<K: ToFromUsize + KeySize> RangedIndex for UniqueDirectFixedCapIndex<K> {
|
||||
|
||||
@@ -284,6 +284,8 @@ impl<K: ToFromUsize + KeySize> Index for UniqueDirectIndex<K> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const IS_RANGED: bool = true;
|
||||
}
|
||||
|
||||
impl<K: ToFromUsize + KeySize> RangedIndex for UniqueDirectIndex<K> {
|
||||
|
||||
@@ -101,6 +101,8 @@ impl<K: KeySize + Eq + Hash> Index for UniqueHashIndex<K> {
|
||||
fn iter(&self) -> Self::Iter<'_> {
|
||||
self.map.values().copied()
|
||||
}
|
||||
|
||||
const IS_RANGED: bool = false;
|
||||
}
|
||||
|
||||
impl<K: KeySize + Eq + Hash> UniqueHashIndex<K> {
|
||||
|
||||
Reference in New Issue
Block a user