mirror of
https://github.com/valkey-io/valkey.git
synced 2026-05-09 06:49:36 -04:00
e9014fd02b
## Overview Sharing memory between the module and engine reduces memory overhead by eliminating redundant copies of stored records in the module. This is particularly beneficial for search workloads that require indexing large volumes of documents. ### Vectors Vector similarity search requires storing large volumes of high-cardinality vectors. For example, a single vector with 512 dimensions consumes 2048 bytes, and typical workloads often involve millions of vectors. Due to the lack of a memory-sharing mechanism between the module and the engine, valkey-search currently doubles memory consumption when indexing vectors, significantly increasing operational costs. This limitation introduces adoption friction and reduces valkey-search's competitiveness. ## Memory Allocation Strategy At a fundamental level, there are two primary allocation strategies: - [Chosen] Module-allocated memory shared with the engine. - Engine-allocated memory shared with the module. For valkey-search, it is crucial that vectors reside in cache-aligned memory to maximize SIMD optimizations. Allowing the module to allocate memory provides greater flexibility for different use cases, though it introduces slightly higher implementation complexity. ## Old Implementation The old [implementation](https://github.com/valkey-io/valkey/pull/1804) was based on ref-counting and introduced a new SDS type. After further discussion, we [agreed](https://github.com/valkey-io/valkey/pull/1804#issuecomment-2905115712) to simplify the design by removing ref-counting and avoiding the introduction of a new SDS type. ## New Implementation - Key Points 1. The engine exposes a new interface, `VM_HashSetViewValue`, which set value as a view of a buffer which is owned by the module. The function accepts the hash key, hash field, and a buffer along with its length. 2. `ViewValue` is a new data type that captures the externalized buffer and its length. ## valkey-search Usage ### Insertion 1. Upon receiving a key space notification for a new hash or JSON key with an indexed vector attribute, valkey-search allocates cache-aligned memory and deep-copies the vector value. 2. valkey-search then calls `VM_HashSetViewValue` to avoid keeping two copies of the vector. ### Deletion When receiving a key space notification for a deleted hash key or hash field that was indexed as a vector, valkey-search deletes the corresponding entry from the index. ### Update Handled similarly to insertion. --------- Signed-off-by: yairgott <yairgott@gmail.com> Signed-off-by: Yair Gottdenker <yairg@google.com> Signed-off-by: Yair Gottdenker <yairgott@gmail.com> Co-authored-by: Yair Gottdenker <yairg@google.com> Co-authored-by: Ran Shidlansik <ranshid@amazon.com> Co-authored-by: Jim Brunner <brunnerj@amazon.com>
100 lines
3.1 KiB
C
100 lines
3.1 KiB
C
/* Module Test: Verifies the module's capability to share an owned buffer with the core,
|
|
* which is then stored in a hash key field using a non-owning string reference (stringRef). */
|
|
#include "valkeymodule.h"
|
|
#include <string.h>
|
|
|
|
typedef struct bufferNode {
|
|
char *buf;
|
|
size_t len;
|
|
struct bufferNode *next;
|
|
} bufferNode;
|
|
|
|
bufferNode *head = NULL;
|
|
|
|
bufferNode *addBuffer(const char *buf, size_t len) {
|
|
if (!buf || len == 0) return NULL;
|
|
|
|
bufferNode *node = malloc(sizeof(bufferNode));
|
|
node->buf = malloc(len);
|
|
memcpy(node->buf, buf, len);
|
|
node->len = len;
|
|
node->next = head;
|
|
head = node;
|
|
return node;
|
|
}
|
|
|
|
void freeBufferList(void) {
|
|
bufferNode *current = head;
|
|
while (current) {
|
|
bufferNode *next = current->next;
|
|
free(current->buf);
|
|
free(current);
|
|
current = next;
|
|
}
|
|
}
|
|
|
|
/* HASH.HAS_STRINGREF key field
|
|
*
|
|
* Returns 1 if all of the following conditions are met for the hash field:
|
|
* 1. The key exists.
|
|
* 2. The key's value is a HASH type.
|
|
* 3. The field's value is a string reference (stringRef) type.
|
|
* Otherwise, returns 0.
|
|
*
|
|
* Parameters:
|
|
* 1. The hash entry key.
|
|
* 2. The hahs entry field.
|
|
*/
|
|
int hashHasStringRef(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) {
|
|
if (argc != 3) return ValkeyModule_WrongArity(ctx);
|
|
|
|
ValkeyModule_AutoMemory(ctx);
|
|
ValkeyModuleKey *key = ValkeyModule_OpenKey(ctx, argv[1], VALKEYMODULE_WRITE);
|
|
|
|
int result = ValkeyModule_HashHasStringRef(key, argv[2]);
|
|
return ValkeyModule_ReplyWithLongLong(ctx, result);
|
|
}
|
|
|
|
/* HASH.SET_STRINGREF key field buffer
|
|
*
|
|
* Sets hash entry value of a given key and field to an external owned buffer.
|
|
* Parameters:
|
|
* 1. The hash entry key.
|
|
* 2. The hahs entry field.
|
|
* 3. The buffer to share with the core.
|
|
*/
|
|
int hashSetStringRef(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) {
|
|
if (argc != 4) return ValkeyModule_WrongArity(ctx);
|
|
|
|
ValkeyModule_AutoMemory(ctx);
|
|
ValkeyModuleKey *key = ValkeyModule_OpenKey(ctx, argv[1], VALKEYMODULE_WRITE);
|
|
|
|
size_t buf_len;
|
|
const char *buf = ValkeyModule_StringPtrLen(argv[3], &buf_len);
|
|
bufferNode *node = addBuffer(buf, buf_len);
|
|
|
|
int result = ValkeyModule_HashSetStringRef(key, argv[2], node->buf, node->len);
|
|
if (result == 0) return ValkeyModule_ReplyWithLongLong(ctx, result);
|
|
return ValkeyModule_ReplyWithError(ctx, "Err");
|
|
}
|
|
|
|
int ValkeyModule_OnLoad(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) {
|
|
VALKEYMODULE_NOT_USED(argv);
|
|
VALKEYMODULE_NOT_USED(argc);
|
|
if (ValkeyModule_Init(ctx, "hash.stringref", 1, VALKEYMODULE_APIVER_1) ==
|
|
VALKEYMODULE_OK &&
|
|
ValkeyModule_CreateCommand(ctx, "hash.set_stringref", hashSetStringRef, "write",
|
|
1, 1, 1) == VALKEYMODULE_OK &&
|
|
ValkeyModule_CreateCommand(ctx, "hash.has_stringref", hashHasStringRef, "readonly",
|
|
1, 1, 1) == VALKEYMODULE_OK) {
|
|
return VALKEYMODULE_OK;
|
|
}
|
|
return VALKEYMODULE_ERR;
|
|
}
|
|
|
|
int ValkeyModule_OnUnload(ValkeyModuleCtx *ctx) {
|
|
VALKEYMODULE_NOT_USED(ctx);
|
|
freeBufferList();
|
|
return VALKEYMODULE_OK;
|
|
}
|