Files
valkey/tests/modules/hash_stringref.c
Yair Gottdenker e9014fd02b Adding support for sharing memory between the module and the engine (#2472)
## Overview

Sharing memory between the module and engine reduces memory overhead by
eliminating redundant copies of stored records in the module. This is
particularly beneficial for search workloads that require indexing large
volumes of documents.

### Vectors

Vector similarity search requires storing large volumes of
high-cardinality vectors. For example, a single vector with 512
dimensions consumes 2048 bytes, and typical workloads often involve
millions of vectors. Due to the lack of a memory-sharing mechanism
between the module and the engine, valkey-search currently doubles
memory consumption when indexing vectors, significantly increasing
operational costs. This limitation introduces adoption friction and
reduces valkey-search's competitiveness.

## Memory Allocation Strategy

At a fundamental level, there are two primary allocation strategies:
- [Chosen] Module-allocated memory shared with the engine.
- Engine-allocated memory shared with the module.

For valkey-search, it is crucial that vectors reside in cache-aligned
memory to maximize SIMD optimizations. Allowing the module to allocate
memory provides greater flexibility for different use cases, though it
introduces slightly higher implementation complexity.

## Old Implementation

The old [implementation](https://github.com/valkey-io/valkey/pull/1804)
was based on ref-counting and introduced a new SDS type. After further
discussion, we
[agreed](https://github.com/valkey-io/valkey/pull/1804#issuecomment-2905115712)
to simplify the design by removing ref-counting and avoiding the
introduction of a new SDS type.

## New Implementation - Key Points

1. The engine exposes a new interface, `VM_HashSetViewValue`, which set
value as a view of a buffer which is owned by the module. The function
accepts the hash key, hash field, and a buffer along with its length.
2. `ViewValue` is a new data type that captures the externalized buffer
and its length.


## valkey-search Usage

### Insertion
1. Upon receiving a key space notification for a new hash or JSON key
with an indexed vector attribute, valkey-search allocates cache-aligned
memory and deep-copies the vector value.
2. valkey-search then calls `VM_HashSetViewValue` to avoid keeping two
copies of the vector.
### Deletion
When receiving a key space notification for a deleted hash key or hash
field that was indexed as a vector, valkey-search deletes the
corresponding entry from the index.

### Update
Handled similarly to insertion.

---------

Signed-off-by: yairgott <yairgott@gmail.com>
Signed-off-by: Yair Gottdenker <yairg@google.com>
Signed-off-by: Yair Gottdenker <yairgott@gmail.com>
Co-authored-by: Yair Gottdenker <yairg@google.com>
Co-authored-by: Ran Shidlansik <ranshid@amazon.com>
Co-authored-by: Jim Brunner <brunnerj@amazon.com>
2025-12-19 15:55:57 +02:00

100 lines
3.1 KiB
C

/* Module Test: Verifies the module's capability to share an owned buffer with the core,
* which is then stored in a hash key field using a non-owning string reference (stringRef). */
#include "valkeymodule.h"
#include <string.h>
typedef struct bufferNode {
char *buf;
size_t len;
struct bufferNode *next;
} bufferNode;
bufferNode *head = NULL;
bufferNode *addBuffer(const char *buf, size_t len) {
if (!buf || len == 0) return NULL;
bufferNode *node = malloc(sizeof(bufferNode));
node->buf = malloc(len);
memcpy(node->buf, buf, len);
node->len = len;
node->next = head;
head = node;
return node;
}
void freeBufferList(void) {
bufferNode *current = head;
while (current) {
bufferNode *next = current->next;
free(current->buf);
free(current);
current = next;
}
}
/* HASH.HAS_STRINGREF key field
*
* Returns 1 if all of the following conditions are met for the hash field:
* 1. The key exists.
* 2. The key's value is a HASH type.
* 3. The field's value is a string reference (stringRef) type.
* Otherwise, returns 0.
*
* Parameters:
* 1. The hash entry key.
* 2. The hahs entry field.
*/
int hashHasStringRef(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) {
if (argc != 3) return ValkeyModule_WrongArity(ctx);
ValkeyModule_AutoMemory(ctx);
ValkeyModuleKey *key = ValkeyModule_OpenKey(ctx, argv[1], VALKEYMODULE_WRITE);
int result = ValkeyModule_HashHasStringRef(key, argv[2]);
return ValkeyModule_ReplyWithLongLong(ctx, result);
}
/* HASH.SET_STRINGREF key field buffer
*
* Sets hash entry value of a given key and field to an external owned buffer.
* Parameters:
* 1. The hash entry key.
* 2. The hahs entry field.
* 3. The buffer to share with the core.
*/
int hashSetStringRef(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) {
if (argc != 4) return ValkeyModule_WrongArity(ctx);
ValkeyModule_AutoMemory(ctx);
ValkeyModuleKey *key = ValkeyModule_OpenKey(ctx, argv[1], VALKEYMODULE_WRITE);
size_t buf_len;
const char *buf = ValkeyModule_StringPtrLen(argv[3], &buf_len);
bufferNode *node = addBuffer(buf, buf_len);
int result = ValkeyModule_HashSetStringRef(key, argv[2], node->buf, node->len);
if (result == 0) return ValkeyModule_ReplyWithLongLong(ctx, result);
return ValkeyModule_ReplyWithError(ctx, "Err");
}
int ValkeyModule_OnLoad(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) {
VALKEYMODULE_NOT_USED(argv);
VALKEYMODULE_NOT_USED(argc);
if (ValkeyModule_Init(ctx, "hash.stringref", 1, VALKEYMODULE_APIVER_1) ==
VALKEYMODULE_OK &&
ValkeyModule_CreateCommand(ctx, "hash.set_stringref", hashSetStringRef, "write",
1, 1, 1) == VALKEYMODULE_OK &&
ValkeyModule_CreateCommand(ctx, "hash.has_stringref", hashHasStringRef, "readonly",
1, 1, 1) == VALKEYMODULE_OK) {
return VALKEYMODULE_OK;
}
return VALKEYMODULE_ERR;
}
int ValkeyModule_OnUnload(ValkeyModuleCtx *ctx) {
VALKEYMODULE_NOT_USED(ctx);
freeBufferList();
return VALKEYMODULE_OK;
}