Files
valkey/cmake/Modules/SourceFiles.cmake
T
VoletiRam 39036c7c06 Add structured datasets loading capability in valkey benchmark (#2823)
## Background

Add structured datasets loading capability. Support CSV and TSV file
formats. Use `__field:fieldname__` placeholders to replace the
corresponding fields from the dataset file. Support natural content size
of varying length. Allow mixed placeholder usage combining dataset
fields with random generators. Enable automatic field discovery from
CSV/TSV headers. Use `--maxdocs` to limit the dataset loading.

Rather than modifying the existing placeholder system, we detect field
placeholders and switch to a separate code path that builds commands
from scratch using `valkeyFormatCommandArgv()`. This ensures:

- Zero impact on existing functionality
- Full support for variable-size content
- Thread-safe atomic record iteration
- Compatible with pipelining and threading modes

__Usage examples__

```sh
# Strings - Simple key-value with dataset fields
./valkey-benchmark --dataset products.csv -n 10000 SET product:__rand_int__ "__field:name__"

# Sets - Unique collections from dataset
./valkey-benchmark --dataset categories.csv -n 10000 SADD tags:__rand_int__ "__field:category__"

# CSV dataset with document limit
./valkey-benchmark --dataset wiki.csv --maxdocs 100000 -n 50000 HSET doc:__rand_int__ title "__field:title__" body "__field:abstract__"

# Mixed placeholders (dataset + random)
./valkey-benchmark --dataset terms.csv -r 5000000 -n 50000 HSET search:__rand_int__ term "__field:term__" score __rand_1st__
```

__Full-Text Search Benchmarking__

```sh
# Search hit scenarios (existing terms)
./valkey-benchmark --dataset search_terms.csv -n 50000 FT.SEARCH rd0 "__field:term__"

# Search miss scenarios (non-existent terms)
./valkey-benchmark --dataset miss_terms.csv -n 50000 FT.SEARCH rd0 "__field:term__"

# Query variations
./valkey-benchmark --dataset search_terms.csv -n 50000 FT.SEARCH rd0 "@title:__field:term__"
./valkey-benchmark --dataset search_terms.csv -n 50000 FT.SEARCH rd0 "__field:term__*"
```

__Benchmark Results__


Test environment:
__Instance:__ AWS c7i.16xlarge, 64 vCPU

Test Dataset: 5M+ Wikipedia XML documents, 5.8GB memory

| Configuration | Throughput | CPU Usage | Wall Time | Memory Peak |
|---------------|------------|-----------|-----------|-------------|
| Single-threaded, P1 | 93,295 RPS | 99% | 71.4s | 5.8GB |
| Multi-threaded (10), P1 | 93,332 RPS | 137% | 71.5s | 5.8GB |
| Single-threaded, P10 | 274,499 RPS | 96% | 36.1s | 5.8GB |
| Multi-threaded (4), P10 | 344,589 RPS | 161% | 32.4s | 5.8GB |

---------

Signed-off-by: Ram Prasad Voleti <ramvolet@amazon.com>
Co-authored-by: Ram Prasad Voleti <ramvolet@amazon.com>
2026-04-29 09:18:37 -07:00

185 lines
6.7 KiB
CMake

# -------------------------------------------------
# Define the sources to be built
# -------------------------------------------------
# valkey-server source files
set(VALKEY_SERVER_SRCS
${CMAKE_SOURCE_DIR}/src/threads_mngr.c
${CMAKE_SOURCE_DIR}/src/adlist.c
${CMAKE_SOURCE_DIR}/src/vector.c
${CMAKE_SOURCE_DIR}/src/quicklist.c
${CMAKE_SOURCE_DIR}/src/ae.c
${CMAKE_SOURCE_DIR}/src/anet.c
${CMAKE_SOURCE_DIR}/src/hashtable.c
${CMAKE_SOURCE_DIR}/src/kvstore.c
${CMAKE_SOURCE_DIR}/src/sds.c
${CMAKE_SOURCE_DIR}/src/zmalloc.c
${CMAKE_SOURCE_DIR}/src/lzf_c.c
${CMAKE_SOURCE_DIR}/src/lzf_d.c
${CMAKE_SOURCE_DIR}/src/pqsort.c
${CMAKE_SOURCE_DIR}/src/zipmap.c
${CMAKE_SOURCE_DIR}/src/sha1.c
${CMAKE_SOURCE_DIR}/src/ziplist.c
${CMAKE_SOURCE_DIR}/src/release.c
${CMAKE_SOURCE_DIR}/src/memory_prefetch.c
${CMAKE_SOURCE_DIR}/src/io_threads.c
${CMAKE_SOURCE_DIR}/src/networking.c
${CMAKE_SOURCE_DIR}/src/util.c
${CMAKE_SOURCE_DIR}/src/object.c
${CMAKE_SOURCE_DIR}/src/db.c
${CMAKE_SOURCE_DIR}/src/replication.c
${CMAKE_SOURCE_DIR}/src/rdb.c
${CMAKE_SOURCE_DIR}/src/t_string.c
${CMAKE_SOURCE_DIR}/src/t_list.c
${CMAKE_SOURCE_DIR}/src/t_set.c
${CMAKE_SOURCE_DIR}/src/t_zset.c
${CMAKE_SOURCE_DIR}/src/t_hash.c
${CMAKE_SOURCE_DIR}/src/config.c
${CMAKE_SOURCE_DIR}/src/aof.c
${CMAKE_SOURCE_DIR}/src/pubsub.c
${CMAKE_SOURCE_DIR}/src/multi.c
${CMAKE_SOURCE_DIR}/src/debug.c
${CMAKE_SOURCE_DIR}/src/sort.c
${CMAKE_SOURCE_DIR}/src/intset.c
${CMAKE_SOURCE_DIR}/src/syncio.c
${CMAKE_SOURCE_DIR}/src/cluster.c
${CMAKE_SOURCE_DIR}/src/cluster_migrateslots.c
${CMAKE_SOURCE_DIR}/src/cluster_legacy.c
${CMAKE_SOURCE_DIR}/src/cluster_slot_stats.c
${CMAKE_SOURCE_DIR}/src/crc16.c
${CMAKE_SOURCE_DIR}/src/crc16_slottable.c
${CMAKE_SOURCE_DIR}/src/commandlog.c
${CMAKE_SOURCE_DIR}/src/eval.c
${CMAKE_SOURCE_DIR}/src/bio.c
${CMAKE_SOURCE_DIR}/src/rio.c
${CMAKE_SOURCE_DIR}/src/rand.c
${CMAKE_SOURCE_DIR}/src/memtest.c
${CMAKE_SOURCE_DIR}/src/syscheck.c
${CMAKE_SOURCE_DIR}/src/crcspeed.c
${CMAKE_SOURCE_DIR}/src/crccombine.c
${CMAKE_SOURCE_DIR}/src/crc64.c
${CMAKE_SOURCE_DIR}/src/bitops.c
${CMAKE_SOURCE_DIR}/src/sentinel.c
${CMAKE_SOURCE_DIR}/src/notify.c
${CMAKE_SOURCE_DIR}/src/setproctitle.c
${CMAKE_SOURCE_DIR}/src/blocked.c
${CMAKE_SOURCE_DIR}/src/hyperloglog.c
${CMAKE_SOURCE_DIR}/src/latency.c
${CMAKE_SOURCE_DIR}/src/sparkline.c
${CMAKE_SOURCE_DIR}/src/valkey-check-rdb.c
${CMAKE_SOURCE_DIR}/src/valkey-check-aof.c
${CMAKE_SOURCE_DIR}/src/valkey_strtod.c
${CMAKE_SOURCE_DIR}/src/geo.c
${CMAKE_SOURCE_DIR}/src/lazyfree.c
${CMAKE_SOURCE_DIR}/src/module.c
${CMAKE_SOURCE_DIR}/src/lrulfu.c
${CMAKE_SOURCE_DIR}/src/evict.c
${CMAKE_SOURCE_DIR}/src/expire.c
${CMAKE_SOURCE_DIR}/src/geohash.c
${CMAKE_SOURCE_DIR}/src/geohash_helper.c
${CMAKE_SOURCE_DIR}/src/childinfo.c
${CMAKE_SOURCE_DIR}/src/allocator_defrag.c
${CMAKE_SOURCE_DIR}/src/defrag.c
${CMAKE_SOURCE_DIR}/src/siphash.c
${CMAKE_SOURCE_DIR}/src/rax.c
${CMAKE_SOURCE_DIR}/src/t_stream.c
${CMAKE_SOURCE_DIR}/src/listpack.c
${CMAKE_SOURCE_DIR}/src/localtime.c
${CMAKE_SOURCE_DIR}/src/lolwut.c
${CMAKE_SOURCE_DIR}/src/lolwut5.c
${CMAKE_SOURCE_DIR}/src/lolwut6.c
${CMAKE_SOURCE_DIR}/src/lolwut9.c
${CMAKE_SOURCE_DIR}/src/acl.c
${CMAKE_SOURCE_DIR}/src/tracking.c
${CMAKE_SOURCE_DIR}/src/socket.c
${CMAKE_SOURCE_DIR}/src/tls.c
${CMAKE_SOURCE_DIR}/src/rdma.c
${CMAKE_SOURCE_DIR}/src/sha256.c
${CMAKE_SOURCE_DIR}/src/timeout.c
${CMAKE_SOURCE_DIR}/src/setcpuaffinity.c
${CMAKE_SOURCE_DIR}/src/monotonic.c
${CMAKE_SOURCE_DIR}/src/mt19937-64.c
${CMAKE_SOURCE_DIR}/src/resp_parser.c
${CMAKE_SOURCE_DIR}/src/call_reply.c
${CMAKE_SOURCE_DIR}/src/script.c
${CMAKE_SOURCE_DIR}/src/functions.c
${CMAKE_SOURCE_DIR}/src/scripting_engine.c
${CMAKE_SOURCE_DIR}/src/trace/trace.c
${CMAKE_SOURCE_DIR}/src/trace/trace_rdb.c
${CMAKE_SOURCE_DIR}/src/trace/trace_aof.c
${CMAKE_SOURCE_DIR}/src/trace/trace_commands.c
${CMAKE_SOURCE_DIR}/src/trace/trace_db.c
${CMAKE_SOURCE_DIR}/src/trace/trace_cluster.c
${CMAKE_SOURCE_DIR}/src/trace/trace_server.c
${CMAKE_SOURCE_DIR}/src/commands.c
${CMAKE_SOURCE_DIR}/src/strl.c
${CMAKE_SOURCE_DIR}/src/connection.c
${CMAKE_SOURCE_DIR}/src/unix.c
${CMAKE_SOURCE_DIR}/src/server.c
${CMAKE_SOURCE_DIR}/src/logreqres.c
${CMAKE_SOURCE_DIR}/src/entry.c
${CMAKE_SOURCE_DIR}/src/vset.c
${CMAKE_SOURCE_DIR}/src/fifo.c
${CMAKE_SOURCE_DIR}/src/mutexqueue.c
${CMAKE_SOURCE_DIR}/src/queues.c)
# valkey-cli
set(VALKEY_CLI_SRCS
${CMAKE_SOURCE_DIR}/src/anet.c
${CMAKE_SOURCE_DIR}/src/adlist.c
${CMAKE_SOURCE_DIR}/src/hashtable.c
${CMAKE_SOURCE_DIR}/src/sds.c
${CMAKE_SOURCE_DIR}/src/sha256.c
${CMAKE_SOURCE_DIR}/src/util.c
${CMAKE_SOURCE_DIR}/src/valkey-cli.c
${CMAKE_SOURCE_DIR}/src/valkey_strtod.c
${CMAKE_SOURCE_DIR}/src/zmalloc.c
${CMAKE_SOURCE_DIR}/src/release.c
${CMAKE_SOURCE_DIR}/src/ae.c
${CMAKE_SOURCE_DIR}/src/serverassert.c
${CMAKE_SOURCE_DIR}/src/crcspeed.c
${CMAKE_SOURCE_DIR}/src/crccombine.c
${CMAKE_SOURCE_DIR}/src/crc64.c
${CMAKE_SOURCE_DIR}/src/siphash.c
${CMAKE_SOURCE_DIR}/src/crc16.c
${CMAKE_SOURCE_DIR}/src/monotonic.c
${CMAKE_SOURCE_DIR}/src/cli_common.c
${CMAKE_SOURCE_DIR}/src/mt19937-64.c
${CMAKE_SOURCE_DIR}/src/strl.c
${CMAKE_SOURCE_DIR}/src/cli_commands.c)
# valkey-benchmark
set(VALKEY_BENCHMARK_SRCS
${CMAKE_SOURCE_DIR}/src/ae.c
${CMAKE_SOURCE_DIR}/src/anet.c
${CMAKE_SOURCE_DIR}/src/sds.c
${CMAKE_SOURCE_DIR}/src/sha256.c
${CMAKE_SOURCE_DIR}/src/util.c
${CMAKE_SOURCE_DIR}/src/valkey-benchmark.c
${CMAKE_SOURCE_DIR}/src/valkey-benchmark-dataset.c
${CMAKE_SOURCE_DIR}/src/valkey_strtod.c
${CMAKE_SOURCE_DIR}/src/adlist.c
${CMAKE_SOURCE_DIR}/src/hashtable.c
${CMAKE_SOURCE_DIR}/src/zmalloc.c
${CMAKE_SOURCE_DIR}/src/serverassert.c
${CMAKE_SOURCE_DIR}/src/release.c
${CMAKE_SOURCE_DIR}/src/crcspeed.c
${CMAKE_SOURCE_DIR}/src/crccombine.c
${CMAKE_SOURCE_DIR}/src/crc64.c
${CMAKE_SOURCE_DIR}/src/siphash.c
${CMAKE_SOURCE_DIR}/src/crc16.c
${CMAKE_SOURCE_DIR}/src/crc16_slottable.c
${CMAKE_SOURCE_DIR}/src/monotonic.c
${CMAKE_SOURCE_DIR}/src/cli_common.c
${CMAKE_SOURCE_DIR}/src/mt19937-64.c
${CMAKE_SOURCE_DIR}/src/strl.c
${CMAKE_SOURCE_DIR}/src/fuzzer_client.c
${CMAKE_SOURCE_DIR}/src/fuzzer_command_generator.c)
# valkey-rdma module
set(VALKEY_RDMA_MODULE_SRCS ${CMAKE_SOURCE_DIR}/src/rdma.c)
# valkey-tls module
set(VALKEY_TLS_MODULE_SRCS ${CMAKE_SOURCE_DIR}/src/tls.c)