Added tool to generate test data

This commit is contained in:
Kenneth Skovhede
2024-07-08 07:32:35 +02:00
parent 59b00a5a45
commit df76a77ee2
10 changed files with 656 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
data
+27
View File
@@ -0,0 +1,27 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Generate Test Data",
"type": "coreclr",
"request": "launch",
"preLaunchTask": "build",
"program": "${workspaceFolder}/bin/Debug/net8.0/TestDataGenerator.dll",
"args": ["create", "./data", "--file-count=1000", "--max-folder-count=100", "--max-fan-out=3"],
"cwd": "${workspaceFolder}",
"console": "internalConsole",
"stopAtEntry": false
},
{
"name": "Update Test Data",
"type": "coreclr",
"request": "launch",
"preLaunchTask": "build",
"program": "${workspaceFolder}/bin/Debug/net8.0/TestDataGenerator.dll",
"args": ["update", "./data", "--new-files=10", "--updated-files=10", "--deleted-files=10"],
"cwd": "${workspaceFolder}",
"console": "internalConsole",
"stopAtEntry": false
}
]
}
+17
View File
@@ -0,0 +1,17 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "build",
"command": "dotnet",
"type": "process",
"args": [
"build",
"${workspaceFolder}/TestDataGenerator.sln",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary;ForceNoAlign"
],
"problemMatcher": "$msCompile"
}
]
}
+227
View File
@@ -0,0 +1,227 @@
using System.CommandLine;
using System.CommandLine.NamingConventionBinder;
using static TestDataGenerator.Commands.Shared;
namespace TestDataGenerator.Commands;
/// <summary>
/// Create test data in a folder
/// </summary>
public static class Create
{
/// <summary>
/// The input parameters for the command
/// </summary>
/// <param name="TargetFolder">The folder to create files in</param>
/// <param name="FileCount">The number of files to create in the target folder</param>
/// <param name="MaxTotalSize">The maximum total size of the files to create in the target folder</param>
/// <param name="MaxFileSize">The maximum size of each file to create in the target folder</param>
/// <param name="MinFileSize">The minimum size of each file to create in the target folder</param>
/// <param name="SparseFactor">The percentage of data that should be zeroed out in each file</param>
/// <param name="MaxFanOut">The maximum number of subfolders to create in each folder</param>
/// <param name="MaxDepth">The maximum depth of subfolders to create in the target folder</param>
/// <param name="MaxFolderCount">The maximum number of folders to create in the target folder</param>
/// <param name="MaxPathSegmentLength">The maximum length of each path segment in the target folder</param>
record CommandInput(
DirectoryInfo TargetFolder,
int FileCount,
long MaxTotalSize,
long MaxFileSize,
long MinFileSize,
int SparseFactor,
int MaxFanOut,
int MaxDepth,
int MaxFolderCount,
int MaxPathSegmentLength);
/// <summary>
/// Creates the command
/// </summary>
/// <returns>The command</returns>
public static Command CreateCommand()
{
var command = new Command("create", "Create test data in a folder");
var targetFolderOption = new Argument<DirectoryInfo>("target-folder", "The folder to create files in");
targetFolderOption.SetDefaultValue(new DirectoryInfo(Directory.GetCurrentDirectory()));
command.AddArgument(targetFolderOption);
var fileCountOption = new Option<int>("--file-count", "The number of files to create in the target folder");
fileCountOption.SetDefaultValue(200000);
command.AddOption(fileCountOption);
var maxTotalSizeOption = new Option<long>("--max-total-size", "The maximum total size of the files to create in the target folder");
maxTotalSizeOption.SetDefaultValue(1024 * 1024 * 1024L);
command.AddOption(maxTotalSizeOption);
var maxFileSizeOption = new Option<long>("--max-file-size", "The maximum size of each file to create in the target folder");
maxFileSizeOption.SetDefaultValue(1024 * 1024L);
command.AddOption(maxFileSizeOption);
var minFileSizeOption = new Option<long>("--min-file-size", "The minimum size of each file to create in the target folder");
minFileSizeOption.SetDefaultValue(0L);
command.AddOption(minFileSizeOption);
var sparseFactorOption = new Option<int>("--sparse-factor", "The percentage of data that should be zeroed out in each file");
sparseFactorOption.SetDefaultValue(10);
command.AddOption(sparseFactorOption);
var maxFanOutOption = new Option<int>("--max-fan-out", "The maximum number of subfolders to create in each folder");
maxFanOutOption.SetDefaultValue(10);
command.AddOption(maxFanOutOption);
var maxDepthOption = new Option<int>("--max-depth", "The maximum depth of subfolders to create in the target folder");
maxDepthOption.SetDefaultValue(5);
command.AddOption(maxDepthOption);
var maxFolderCountOption = new Option<int>("--max-folder-count", "The maximum number of folders to create in the target folder");
maxFolderCountOption.SetDefaultValue(10000);
command.AddOption(maxFolderCountOption);
var maxPathSegmentLengthOption = new Option<int>("--max-path-segment-length", "The maximum length of each path segment in the target folder");
maxPathSegmentLengthOption.SetDefaultValue(15);
command.AddOption(maxPathSegmentLengthOption);
command.Handler = CommandHandler.Create<CommandInput>(Execute);
return command;
}
/// <summary>
/// Executes the command
/// </summary>
/// <param name="input">The input parameters</param>
private static void Execute(CommandInput input)
{
if (input.TargetFolder.Exists && input.TargetFolder.GetFileSystemInfos().Length > 0)
throw new Exception($"The target folder {input.TargetFolder.FullName} already exists");
if (input.MaxFileSize == 0)
throw new Exception("The maximum file size must be greater than zero");
if (input.MaxTotalSize == 0)
throw new Exception("The maximum total size must be greater than zero");
if (input.MinFileSize > input.MaxFileSize)
throw new Exception("The minimum file size must be less than or equal to the maximum file size");
if (input.FileCount == 0)
throw new Exception("The file count must be greater than zero");
if (input.MaxFolderCount == 0)
throw new Exception("The maximum folder count must be greater than zero");
if (input.MaxFanOut == 0)
throw new Exception("The maximum fan-out must be greater than zero");
if (input.MaxDepth == 0)
throw new Exception("The maximum depth must be greater than zero");
if (input.MaxPathSegmentLength == 0)
throw new Exception("The maximum path segment length must be greater than zero");
Console.WriteLine($"Creating test data in {input.TargetFolder.FullName}");
var rnd = new Random();
var folders = GeneratePathStructure(rnd, input.TargetFolder.FullName, input.MaxDepth, input.MaxFanOut, input.MaxPathSegmentLength);
while (folders.Count < input.MaxFolderCount)
{
folders.AddRange(GeneratePathStructure(rnd, input.TargetFolder.FullName, input.MaxDepth, input.MaxFanOut, input.MaxPathSegmentLength));
folders = folders.Distinct().ToList();
}
while (folders.Count > input.MaxFolderCount)
folders.RemoveAt(folders.Count - 1);
folders.Add(input.TargetFolder.FullName);
var files = DistributeFiles(rnd, folders, GenerateFileNames(rnd, input.FileCount, input.MaxPathSegmentLength));
var filesWithSizes = files.Select(x => (x, rnd.NextInt64(input.MinFileSize, input.MaxFileSize))).ToList();
var totalSize = filesWithSizes.Sum(x => x.Item2);
while (totalSize > input.MaxTotalSize)
{
var index = rnd.Next(filesWithSizes.Count);
var newSize = rnd.NextInt64(input.MinFileSize, filesWithSizes[index].Item2);
totalSize -= filesWithSizes[index].Item2 - newSize;
filesWithSizes[index] = (filesWithSizes[index].Item1, newSize);
}
Console.WriteLine($"Creating {folders.Count - 1} folders and {files.Count} files ({SizeToHumanReadable(totalSize)})");
if (!input.TargetFolder.Exists)
input.TargetFolder.Create();
var lastUpdate = DateTime.UtcNow;
var foldersCreated = 0L;
var updateInterval = TimeSpan.FromSeconds(10);
foreach (var folder in folders)
{
Directory.CreateDirectory(folder);
foldersCreated++;
if ((DateTime.UtcNow - lastUpdate) > updateInterval)
{
Console.WriteLine($"Created {foldersCreated} of {folders.Count} folders");
lastUpdate = DateTime.UtcNow;
}
}
var filesCreated = 0L;
var fileSizeCreated = 0L;
foreach (var (file, size) in filesWithSizes)
{
try
{
using var fs = new FileStream(file, FileMode.Create, FileAccess.Write);
if (size > 0)
{
fs.SetLength(size);
fs.Position = 0;
WriteRandomData(rnd, fs, size, input.SparseFactor);
}
filesCreated++;
fileSizeCreated += size;
if ((DateTime.UtcNow - lastUpdate) > updateInterval)
{
Console.WriteLine($"Created {filesCreated} of {files.Count} files ({SizeToHumanReadable(fileSizeCreated)} of {SizeToHumanReadable(totalSize)})");
lastUpdate = DateTime.UtcNow;
}
}
catch (Exception ex)
{
Console.WriteLine($"Error creating file {file}: {ex.Message}");
}
}
}
/// <summary>
/// Generates a list of random folder paths
/// </summary>
/// <param name="rnd">The random number generator to use</param>
/// <param name="prefix">The prefix to use for the folder paths</param>
/// <param name="maxDepth">The maximum depth of the folder structure</param>
/// <param name="maxFanOut">The maximum number of subfolders to create in each folder</param>
/// <param name="maxSegmentLength">The maximum length of each path segment</param>
private static List<string> GeneratePathStructure(Random rnd, string prefix, int maxDepth, int maxFanOut, int maxSegmentLength)
{
var folders = new List<string>();
var folderCount = rnd.Next(1, maxFanOut + 1);
if (maxDepth == 0)
return GeneratePathSegments(rnd, folderCount, maxSegmentLength).Select(x => Path.Combine(prefix, x)).ToList();
for (var i = 0; i < folderCount; i++)
{
var folderName = GetPathSegment(rnd, maxSegmentLength);
folders.Add(Path.Combine(prefix, folderName));
folders.AddRange(GeneratePathStructure(rnd, Path.Combine(prefix, folderName), maxDepth - 1, maxFanOut, maxSegmentLength));
}
return folders;
}
}
+132
View File
@@ -0,0 +1,132 @@
using System.Text;
namespace TestDataGenerator.Commands;
/// <summary>
/// Shared utility methods for the test data generator
/// </summary>
public static class Shared
{
/// <summary>
/// Distributes the files in the list to the folders in the list
/// </summary>
/// <param name="rnd">The random number generator to use</param>
/// <param name="folders">The list of folders to distribute the files to</param>
/// <param name="list">The list of files to distribute</param>
/// <returns>The list of files distributed to the folders</returns>
public static List<string> DistributeFiles(Random rnd, List<string> folders, List<string> list)
{
var expanded = new List<string>();
foreach (var file in list)
expanded.Add(Path.Combine(folders[rnd.Next(folders.Count)], file));
return expanded;
}
/// <summary>
/// Converts a size in bytes to a human-readable string
/// </summary>
/// <param name="size">The size in bytes</param>
/// <returns>The human-readable string</returns>
public static string SizeToHumanReadable(long size)
{
if (size < 1024)
return $"{size} B";
if (size < 1024 * 1000)
return $"{size / 1024m:F2} KiB";
if (size < 1024 * 1024 * 1000)
return $"{size / 1024 / 1024m:F2} MiB";
if (size < 1024L * 1024 * 1024 * 1000)
return $"{size / 1024 / 1024 / 1024m:F2} GiB";
return $"{size / 1024 / 1024 / 1024 / 1024m:F2} TiB";
}
/// <summary>
/// The list of file extensions to use for random file names
/// </summary>
private static readonly IReadOnlyList<string> FileExtensions = new List<string>([".bin", ".dat", ".binary", ".data", ".test", ""]);
/// <summary>
/// The characters to use for random path segments
/// </summary>
private const string PathSegmentChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
/// <summary>
/// Generates a random path segment
/// </summary>
/// <param name="rnd">The random number generator to use</param>
/// <param name="maxLength">The maximum length of the path segment</param>
/// <returns>The random path segment</returns>
public static string GetPathSegment(Random rnd, int maxLength)
{
var length = rnd.Next(1, maxLength + 1);
var sb = new StringBuilder(length);
for (int j = 0; j < length; j++)
sb.Append(PathSegmentChars[rnd.Next(PathSegmentChars.Length)]);
return sb.ToString();
}
/// <summary>
/// Generates a list of random path segments
/// </summary>
/// <param name="rnd">The random number generator to use</param>
/// <param name="count">The number of path segments to generate</param>
/// <param name="maxLength">The maximum length of each path segment</param>
/// <returns>The list of path segments</returns>
public static List<string> GeneratePathSegments(Random rnd, int count, int maxLength)
{
var names = new List<string>(count);
for (int i = 0; i < count; i++)
names.Add(GetPathSegment(rnd, maxLength));
return names;
}
/// <summary>
/// Generates a list of random file names
/// </summary>
/// <param name="rnd">The random number generator to use</param>
/// <param name="count">The number of file names to generate</param>
/// <param name="maxLength">The maximum length of each file name</param>
/// <returns>The list of file names</returns>
public static List<string> GenerateFileNames(Random rnd, int count, int maxLength)
{
var names = new List<string>(count);
for (int i = 0; i < count; i++)
names.Add(GetPathSegment(rnd, maxLength) + FileExtensions[rnd.Next(FileExtensions.Count)]);
return names;
}
/// <summary>
/// Writes random data to a stream
/// </summary>
/// <param name="rnd">The random number generator to use</param>
/// <param name="fs">The stream to write to</param>
/// <param name="size">The size of the data to write</param>
/// <param name="sparseFactor">The factor to use for sparse data</param>
/// <remarks>
/// The sparse factor is a percentage value that determines how many bytes are skipped in the stream.
/// For example, a sparse factor of 10 means that 10% of the data will be not be overwritten.
/// </remarks>
public static void WriteRandomData(Random rnd, Stream fs, long size, int sparseFactor)
{
var buffer = new byte[4096];
while (size > 0)
{
var chunkSize = rnd.Next(1, (int)Math.Min(size, buffer.Length));
var sp = buffer.AsSpan(chunkSize);
if (rnd.Next(100) < sparseFactor)
{
// Skip the bytes
fs.Position += sp.Length;
}
else
{
rnd.NextBytes(sp);
fs.Write(sp);
}
size -= sp.Length;
}
}
}
+195
View File
@@ -0,0 +1,195 @@
using System.CommandLine;
using System.CommandLine.NamingConventionBinder;
using static TestDataGenerator.Commands.Shared;
namespace TestDataGenerator.Commands;
/// <summary>
/// Update test data in a folder
/// </summary>
public static class Update
{
/// <summary>
/// The input parameters for the command
/// </summary>
/// <param name="TargetFolder">The folder to update files in</param>
/// <param name="NewFiles">The number of new files to create in the target folder</param>
/// <param name="UpdatedFiles">The number of files to update in the target folder</param>
/// <param name="DeletedFiles">The number of files to delete in the target folder</param>
/// <param name="RenameFiles">The number of files to rename in the target folder</param>
/// <param name="MaxFileSize">The maximum size of each file to create in the target folder</param>
/// <param name="MinFileSize">The minimum size of each file to create in the target folder</param>
/// <param name="SparseFactor">The percentage of data that should be zeroed out in each file</param>
/// <param name="UpdateFactor">The percentage of files to update in the target folder</param>
/// <param name="MaxPathSegmentLength">The maximum length of each path segment in the target folder</param>
record CommandInput(
DirectoryInfo TargetFolder,
int NewFiles,
int UpdatedFiles,
int DeletedFiles,
int RenameFiles,
long MaxFileSize,
long MinFileSize,
int SparseFactor,
int UpdateFactor,
int MaxPathSegmentLength);
/// <summary>
/// Creates the command
/// </summary>
/// <returns>The command</returns>
public static Command CreateCommand()
{
var command = new Command("update", "Update test data in a folder");
var targetFolderOption = new Argument<DirectoryInfo>("target-folder", "The folder to update files in");
targetFolderOption.SetDefaultValue(new DirectoryInfo(Directory.GetCurrentDirectory()));
command.AddArgument(targetFolderOption);
var newFilesOption = new Option<int>("--new-files", "The number of new files to create in the target folder");
newFilesOption.SetDefaultValue(1000);
command.AddOption(newFilesOption);
var updatedFilesOption = new Option<int>("--updated-files", "The number of files to update in the target folder");
updatedFilesOption.SetDefaultValue(1000);
command.AddOption(updatedFilesOption);
var deletedFilesOption = new Option<int>("--deleted-files", "The number of files to delete in the target folder");
deletedFilesOption.SetDefaultValue(1000);
command.AddOption(deletedFilesOption);
var renameFilesOption = new Option<int>("--rename-files", "The number of files to rename in the target folder");
renameFilesOption.SetDefaultValue(1000);
command.AddOption(renameFilesOption);
var maxFileSizeOption = new Option<long>("--max-file-size", "The maximum size of each file to create in the target folder");
maxFileSizeOption.SetDefaultValue(1024 * 1024L);
command.AddOption(maxFileSizeOption);
var minFileSizeOption = new Option<long>("--min-file-size", "The minimum size of each file to create in the target folder");
minFileSizeOption.SetDefaultValue(1024L);
command.AddOption(minFileSizeOption);
var sparseFactorOption = new Option<int>("--sparse-factor", "The percentage of data that should be zeroed out in each file");
sparseFactorOption.SetDefaultValue(10);
command.AddOption(sparseFactorOption);
var updateFactorOption = new Option<int>("--update-factor", "The percentage of files to update in the target folder");
updateFactorOption.SetDefaultValue(10);
command.AddOption(updateFactorOption);
var maxPathSegmentLengthOption = new Option<int>("--max-path-segment-length", "The maximum length of each path segment in the target folder");
maxPathSegmentLengthOption.SetDefaultValue(15);
command.AddOption(maxPathSegmentLengthOption);
command.Handler = CommandHandler.Create<CommandInput>(Execute);
return command;
}
/// <summary>
/// Executes the command
/// </summary>
/// <param name="input">The input parameters</param>
private static void Execute(CommandInput input)
{
if (!input.TargetFolder.Exists)
throw new DirectoryNotFoundException($"The target folder {input.TargetFolder.FullName} does not exist");
if (input.MaxFileSize == 0)
throw new Exception("The maximum file size must be greater than zero");
Console.WriteLine($"Updating test data in {input.TargetFolder.FullName}");
var folders = new List<string>();
var files = new List<string>();
foreach (var folder in input.TargetFolder.EnumerateDirectories("*", SearchOption.AllDirectories))
{
folders.Add(folder.FullName);
foreach (var file in folder.EnumerateFiles())
files.Add(file.FullName);
}
Console.WriteLine($"Found {folders.Count} folders and {files.Count} files");
folders.Add(input.TargetFolder.FullName);
var rnd = new Random();
var newFileNames = DistributeFiles(rnd, folders, GenerateFileNames(rnd, input.NewFiles, input.MaxPathSegmentLength));
foreach (var file in newFileNames)
{
Console.WriteLine($"Creating new file {file}");
var size = rnd.NextInt64((int)input.MinFileSize, (int)input.MaxFileSize);
using var fs = new FileStream(file, FileMode.Create, FileAccess.Write);
fs.SetLength(size);
if (size > 0)
{
fs.Position = 0;
WriteRandomData(rnd, fs, size, input.SparseFactor);
}
}
var updatedFileNames = new HashSet<string>();
while (updatedFileNames.Count < input.UpdatedFiles)
{
var file = files[rnd.Next(files.Count)];
if (!updatedFileNames.Contains(file))
updatedFileNames.Add(file);
if (updatedFileNames.Count == files.Count)
break;
}
foreach (var file in updatedFileNames)
{
Console.WriteLine($"Updating file {file}");
var size = rnd.NextInt64(input.MinFileSize, input.MaxFileSize);
using var fs = new FileStream(file, FileMode.Open, FileAccess.Write);
fs.SetLength(size);
if (size > 0)
{
fs.Position = 0;
WriteRandomData(rnd, fs, size, 100 - input.UpdateFactor);
}
}
var deletedFileNames = new HashSet<string>();
while (deletedFileNames.Count < input.DeletedFiles)
{
var file = files[rnd.Next(files.Count)];
if (!deletedFileNames.Contains(file))
deletedFileNames.Add(file);
if (deletedFileNames.Count == files.Count)
break;
}
foreach (var file in deletedFileNames)
{
Console.WriteLine($"Deleting file {file}");
File.Delete(file);
}
var renamedFileNames = new HashSet<string>();
while (renamedFileNames.Count < input.RenameFiles)
{
var file = files[rnd.Next(files.Count)];
if (!renamedFileNames.Contains(file))
renamedFileNames.Add(file);
if (renamedFileNames.Count == files.Count)
break;
}
foreach (var file in renamedFileNames)
{
var newName = Path.Combine(folders[rnd.Next(folders.Count)], GenerateFileNames(rnd, 1, input.MaxPathSegmentLength).First());
Console.WriteLine($"Renaming file {file} to {newName}");
File.Move(file, newName);
}
}
}
+10
View File
@@ -0,0 +1,10 @@
using System.CommandLine;
var rootCommand = new RootCommand("Test Data Generator")
{
TestDataGenerator.Commands.Create.CreateCommand(),
TestDataGenerator.Commands.Update.CreateCommand()
};
return rootCommand.Invoke(args);
+7
View File
@@ -0,0 +1,7 @@
# Test data generator
This tool is intended for use by developer who need to generate a data set for testing Duplicati.
The tool can create a set of random data at a target folder using the `create` command, and make alterations using the `update` command.
Use the `help` command to examine the supported properties and adjust to make the desired test dataset.
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="System.CommandLine" Version="2.0.0-beta4.22272.1" />
<PackageReference Include="System.CommandLine.NamingConventionBinder" Version="2.0.0-beta4.22272.1" />
</ItemGroup>
</Project>
@@ -0,0 +1,25 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.5.002.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestDataGenerator", "TestDataGenerator.csproj", "{547C0549-040B-4F4F-A410-351FC0176F28}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{547C0549-040B-4F4F-A410-351FC0176F28}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{547C0549-040B-4F4F-A410-351FC0176F28}.Debug|Any CPU.Build.0 = Debug|Any CPU
{547C0549-040B-4F4F-A410-351FC0176F28}.Release|Any CPU.ActiveCfg = Release|Any CPU
{547C0549-040B-4F4F-A410-351FC0176F28}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {F3FB6242-FFDF-42A7-A78F-BB1FE67C12D8}
EndGlobalSection
EndGlobal