diff --git a/Duplicati/Library/Compression/CompressionModules.cs b/Duplicati/Library/Compression/CompressionModules.cs index 0b2e5c9be..9977c59c3 100644 --- a/Duplicati/Library/Compression/CompressionModules.cs +++ b/Duplicati/Library/Compression/CompressionModules.cs @@ -30,6 +30,8 @@ public static class CompressionModules /// The list of all built-in compression modules /// public static IReadOnlyList BuiltInCompressionModules => [ - new ZipCompression.FileArchiveZip() + new ZipCompression.FileArchiveZip(), + new TarZstdCompression.FileArchiveTarZstd(), + new TarZstdCompression.FileArchiveTarGzip() ]; } diff --git a/Duplicati/Library/Compression/Duplicati.Library.Compression.csproj b/Duplicati/Library/Compression/Duplicati.Library.Compression.csproj index 1e9390532..ff5505f14 100644 --- a/Duplicati/Library/Compression/Duplicati.Library.Compression.csproj +++ b/Duplicati/Library/Compression/Duplicati.Library.Compression.csproj @@ -9,6 +9,7 @@ + diff --git a/Duplicati/Library/Compression/Strings.cs b/Duplicati/Library/Compression/Strings.cs index 423a3c879..860d3894a 100644 --- a/Duplicati/Library/Compression/Strings.cs +++ b/Duplicati/Library/Compression/Strings.cs @@ -37,4 +37,26 @@ namespace Duplicati.Library.Compression.Strings public static string CompressionlibraryShort { get { return LC.L(@"Toggles the zip library to use"); } } public static string FileNotFoundError(string filename) { return LC.L(@"File not found: {0}", filename); } } + + internal static class FileArchiveTarZstd + { + public static string Description { get { return LC.L(@"This module provides Tar+Zstd compression. Files created with this module use the .tzstd extension and include an EOF header for fast random access."); } } + public static string DisplayName { get { return LC.L(@"Tar+Zstd compression"); } } + public static string CompressionlevelLong { get { return LC.L(@"This option controls the compression level used. A setting of 1 gives the fastest compression with the lowest ratio, and a setting of 22 gives maximum compression."); } } + public static string CompressionlevelShort { get { return LC.L(@"Set the TarZstd compression level"); } } + public static string MemorybufferLong { get { return LC.L(@"Use this option to buffer file data in memory instead of temporary files during compression. This increases memory usage but avoids disk I/O for temporary files."); } } + public static string MemorybufferShort { get { return LC.L(@"Use memory buffer instead of temp files"); } } + public static string FileNotFoundError(string filename) { return LC.L(@"File not found: {0}", filename); } + } + + internal static class FileArchiveTarGzip + { + public static string Description { get { return LC.L(@"This module provides Tar+GZip compression. Files created with this module use the .tgz extension and include an EOF header for fast random access."); } } + public static string DisplayName { get { return LC.L(@"Tar+GZip compression"); } } + public static string CompressionlevelLong { get { return LC.L(@"This option controls the compression level used. A setting of 1 gives the fastest compression with the lowest ratio, and a setting of 9 gives maximum compression."); } } + public static string CompressionlevelShort { get { return LC.L(@"Set the TarGzip compression level"); } } + public static string MemorybufferLong { get { return LC.L(@"Use this option to buffer file data in memory instead of temporary files during compression. This increases memory usage but avoids disk I/O for temporary files."); } } + public static string MemorybufferShort { get { return LC.L(@"Use memory buffer instead of temp files"); } } + public static string FileNotFoundError(string filename) { return LC.L(@"File not found: {0}", filename); } + } } diff --git a/Duplicati/Library/Compression/TarZstdCompression/FileArchiveTarBased.cs b/Duplicati/Library/Compression/TarZstdCompression/FileArchiveTarBased.cs new file mode 100644 index 000000000..1dd51019d --- /dev/null +++ b/Duplicati/Library/Compression/TarZstdCompression/FileArchiveTarBased.cs @@ -0,0 +1,637 @@ +// Copyright (C) 2026, The Duplicati Team +// https://duplicati.com, hello@duplicati.com +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +#nullable enable + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; +using Duplicati.Library.Interface; +using Duplicati.Library.Logging; +using Duplicati.Library.Utility; +using Duplicati.StreamUtil; + +namespace Duplicati.Library.Compression.TarZstdCompression; + +/// +/// Abstract base class for Tar-based compression implementations. +/// File format: [Compressed Stream] containing [Tar Archive] + [.eof-header with dictionary] +/// +public abstract class FileArchiveTarBased : ICompression +{ + private static readonly string LOGTAG = Log.LogTagFromType(); + + // Constant ustar header fields + // Mode: 0000644 (8 bytes at offset 100) + private static readonly byte[] UstarModeBytes = "0000644 "u8.ToArray(); + // UID: 0 (8 bytes at offset 108) + private static readonly byte[] UstarUidBytes = "0000000 "u8.ToArray(); + // GID: 0 (8 bytes at offset 116) + private static readonly byte[] UstarGidBytes = "0000000 "u8.ToArray(); + // Magic: "ustar\0" (6 bytes at offset 257) + private static readonly byte[] UstarMagicBytes = "ustar\0"u8.ToArray(); + // Version: "00" (2 bytes at offset 263) + private static readonly byte[] UstarVersionBytes = "00"u8.ToArray(); + + private readonly ArchiveMode m_mode; + private readonly Stream? m_inputStream; + private readonly Stream? m_outputStream; + private readonly string? m_tempFilePath; + private FileStream? m_tempFileStream; + private readonly int m_compressionLevel; + private readonly Dictionary m_entries; + + // Write mode fields + private Stream? m_compressorStream; + private readonly Dictionary m_writeEntries; + private long m_tarPosition; + private int m_entryCount; + private PendingEntryStream? m_currentStream; + private readonly bool m_useMemoryBuffer; + private long m_bytesWritten; + private bool m_disposed; + + /// + /// The filename extension for this compression format + /// + public abstract string FilenameExtension { get; } + + /// + /// The display name for this compression format + /// + public abstract string DisplayName { get; } + + /// + /// The description for this compression format + /// + public abstract string Description { get; } + + /// + /// The option name for compression level + /// + protected abstract string CompressionLevelOption { get; } + + /// + /// The default compression level + /// + protected abstract int DefaultCompressionLevel { get; } + + /// + /// The minimum compression level + /// + protected abstract int MinCompressionLevel { get; } + + /// + /// The maximum compression level + /// + protected abstract int MaxCompressionLevel { get; } + + /// + /// The option name for using memory buffer instead of temp files + /// + protected abstract string MemoryBufferOption { get; } + + /// + /// Creates a decompressor stream wrapper + /// + protected abstract Stream CreateDecompressorStream(Stream inputStream); + + /// + /// Creates a compressor stream wrapper + /// + protected abstract Stream CreateCompressorStream(Stream outputStream, int compressionLevel); + + /// + /// Default constructor for module discovery + /// + protected FileArchiveTarBased() + { + m_mode = ArchiveMode.Read; + m_compressionLevel = DefaultCompressionLevel; + m_entries = null!; + m_writeEntries = null!; + } + + protected FileArchiveTarBased(Stream stream, ArchiveMode mode, IReadOnlyDictionary options) + { + m_mode = mode; + m_entries = new Dictionary(StringComparer.Ordinal); + m_writeEntries = []; + + m_compressionLevel = Math.Clamp( + Utility.Utility.ParseIntOption(options, CompressionLevelOption, DefaultCompressionLevel), MinCompressionLevel, MaxCompressionLevel); + + if (mode == ArchiveMode.Read) + { + m_inputStream = stream ?? throw new ArgumentNullException(nameof(stream)); + if (!m_inputStream.CanRead) + throw new ArgumentException("Stream must be readable", nameof(stream)); + + m_tempFilePath = Path.GetTempFileName(); + m_tempFileStream = new FileStream(m_tempFilePath, FileMode.Create, FileAccess.ReadWrite, FileShare.None, 64 * 1024, FileOptions.DeleteOnClose); + + DecompressToTempFile(); + + m_tempFileStream.Position = 0; + if (!TryReadEofHeader(m_tempFileStream, out var entries)) + { + m_tempFileStream.Position = 0; + entries = BuildDictionaryByScanning(m_tempFileStream); + } + + foreach (var entry in entries) + m_entries[entry.Key] = entry.Value; + } + else + { + m_outputStream = stream ?? throw new ArgumentNullException(nameof(stream)); + if (!m_outputStream.CanWrite) + throw new ArgumentException("Stream must be writable", nameof(stream)); + + m_useMemoryBuffer = Utility.Utility.ParseBoolOption(options, MemoryBufferOption); + m_compressorStream = CreateCompressorStream(m_outputStream, m_compressionLevel); + m_tarPosition = 0; + m_entryCount = 0; + m_bytesWritten = 0; + } + } + + private void DecompressToTempFile() + { + if (m_inputStream == null || m_tempFileStream == null) + return; + + try + { + using var decompressor = CreateDecompressorStream(m_inputStream); + decompressor.CopyTo(m_tempFileStream); + m_tempFileStream.Flush(); + } + catch (Exception ex) + { + throw new InvalidDataException("Failed to decompress stream", ex); + } + } + + public abstract IList SupportedCommands { get; } + + public long Size => m_mode switch + { + ArchiveMode.Read => m_inputStream?.Length ?? 0, + ArchiveMode.Write => m_bytesWritten, // Track raw bytes written (assuming zero compression) + _ => 0 + }; + + public long FlushBufferSize + { + get + { + if (m_mode != ArchiveMode.Write) + return 0; + + // Current file being written + its header (if any) + long total = m_currentStream?.Length + 512 ?? 0; + + // EOF header estimate: header (512) + JSON (~60 bytes per entry) + padding + trailer (14) + // JSON format: {"path":{"Offset":N,"Size":N,"LastWriteTime":N}} + // Rough estimate: 60 chars per entry + long eofJsonSize = m_entryCount * 60 + 100; + long eofHeaderSize = 512 + ((eofJsonSize + 511) / 512 * 512) + 14; + eofHeaderSize = (eofHeaderSize + 511) / 512 * 512; + total += eofHeaderSize; + + return total; + } + } + + public string[] ListFiles(string? prefix) + { + if (m_mode != ArchiveMode.Read) + throw new InvalidOperationException("Cannot read while writing"); + + var files = m_entries.Keys; + if (string.IsNullOrEmpty(prefix)) + return files.ToArray(); + + return files + .Where(f => f.StartsWith(prefix, StringComparison.Ordinal) || + f.Replace('\\', '/').StartsWith(prefix, StringComparison.Ordinal)) + .ToArray(); + } + + public IEnumerable> ListFilesWithSize(string? prefix) + { + if (m_mode != ArchiveMode.Read) + throw new InvalidOperationException("Cannot read while writing"); + + foreach (var entry in m_entries) + { + if (string.IsNullOrEmpty(prefix) || + entry.Key.StartsWith(prefix, StringComparison.Ordinal) || + entry.Key.Replace('\\', '/').StartsWith(prefix, StringComparison.Ordinal)) + { + yield return new KeyValuePair(entry.Key, entry.Value.Size); + } + } + } + + public Stream? OpenRead(string file) + { + if (m_mode != ArchiveMode.Read) + throw new InvalidOperationException("Cannot read while writing"); + + var normalizedFile = file.Replace('\\', '/'); + + if (!m_entries.TryGetValue(normalizedFile, out var entry)) + { + if (file != normalizedFile) + m_entries.TryGetValue(file, out entry); + } + + if (entry == null) + return null; + + if (m_tempFileStream == null) + return null; + + return new ReadLimitLengthStream(m_tempFileStream, entry.Offset, entry.Size); + } + + public DateTime GetLastWriteTime(string file) + { + if (m_mode != ArchiveMode.Read) + throw new InvalidOperationException("Cannot read while writing"); + + var normalizedFile = file.Replace('\\', '/'); + if (m_entries.TryGetValue(normalizedFile, out var entry)) + return entry.LastWriteTime; + + if (file != normalizedFile && m_entries.TryGetValue(file, out entry)) + return entry.LastWriteTime; + + throw new FileNotFoundException($"File not found: {file}"); + } + + public bool FileExists(string file) + { + if (m_mode != ArchiveMode.Read) + throw new InvalidOperationException("Cannot read while writing"); + + var normalizedFile = file.Replace('\\', '/'); + return m_entries.ContainsKey(normalizedFile) || + (file != normalizedFile && m_entries.ContainsKey(file)); + } + + public Stream CreateFile(string file, CompressionHint hint, DateTime lastWrite) + { + if (m_mode != ArchiveMode.Write) + throw new InvalidOperationException("Cannot write while reading"); + + var normalizedFile = file.Replace('\\', '/'); + + if (m_writeEntries.ContainsKey(normalizedFile)) + throw new InvalidOperationException($"File already exists: {normalizedFile}"); + + if (m_currentStream != null) + throw new InvalidOperationException("Cannot create a new file while another file is still open"); + + m_currentStream = new PendingEntryStream(this, normalizedFile, lastWrite, m_useMemoryBuffer); + return m_currentStream; + } + + public void Dispose() + { + if (m_disposed) + return; + + try + { + if (m_mode == ArchiveMode.Write) + { + WriteEofHeader(); + m_compressorStream?.Flush(); + m_compressorStream?.Dispose(); + } + } + catch (Exception ex) + { + Log.WriteErrorMessage(LOGTAG, "DisposeError", ex, "Error during disposal"); + } + finally + { + m_tempFileStream?.Dispose(); + + if (m_tempFilePath != null && File.Exists(m_tempFilePath)) + { + try { File.Delete(m_tempFilePath); } catch { } + } + + m_disposed = true; + } + } + + private void WriteEntryToTar(string name, DateTime lastWriteTime, Stream contentStream, long size) + { + if (m_compressorStream == null) + throw new InvalidOperationException("Not in write mode"); + + // Record content offset (after the 512-byte header) + var contentOffset = m_tarPosition + 512; + + // Write tar header (512 bytes) + WriteUstarHeader(m_compressorStream, name, size, lastWriteTime); + m_bytesWritten += 512; + + // Write content from stream + contentStream.CopyTo(m_compressorStream); + m_bytesWritten += size; + + // Pad to 512-byte boundary + var padding = 512 - (int)(size % 512); + if (padding != 512) + { + m_compressorStream.Write(new byte[padding], 0, padding); + m_tarPosition += padding; + m_bytesWritten += padding; + } + + m_tarPosition += 512 + size; + m_currentStream = null; + + // Track entry for EOF header + m_writeEntries[name] = new SerializableEntry( + contentOffset, + size, + Utility.Utility.NormalizeDateTimeToEpochSeconds(lastWriteTime) + ); + m_entryCount++; + } + + private void WriteEofHeader() + { + if (m_compressorStream == null || m_writeEntries.Count == 0) + return; + + var headerStartPosition = m_tarPosition; + + var json = JsonSerializer.Serialize(m_writeEntries); + var jsonBytes = Encoding.UTF8.GetBytes(json); + + // Calculate total size: JSON + trailer (14 bytes), then pad to 512 bytes + const int trailerSize = 14; // 8 offset + 6 magic + var contentSize = jsonBytes.Length + trailerSize; + var paddedSize = (contentSize + 511) / 512 * 512; + var paddingSize = paddedSize - contentSize; + + // Write ustar header for .eof-header (512 bytes) + WriteUstarHeader(m_compressorStream, TarBaseConstants.EofHeaderFileName, paddedSize, DateTime.UtcNow); + m_bytesWritten += 512; + + // Write content: JSON + padding + trailer + m_compressorStream.Write(jsonBytes, 0, jsonBytes.Length); + m_bytesWritten += jsonBytes.Length; + m_compressorStream.Write(new byte[paddingSize], 0, paddingSize); + m_bytesWritten += paddingSize; + + // Write trailer: offset (8 bytes, little-endian) + magic (6 bytes) + var offsetBytes = BitConverter.GetBytes(headerStartPosition); + m_compressorStream.Write(offsetBytes, 0, 8); + m_bytesWritten += 8; + var magicBytes = Encoding.ASCII.GetBytes(TarBaseConstants.EofHeaderMagic); + m_compressorStream.Write(magicBytes, 0, 6); + m_bytesWritten += 6; + } + + private static void WriteUstarHeader(Stream stream, string fileName, long size, DateTime mtime) + { + var header = new byte[512]; + + // Name (100 bytes) - null terminated + var nameBytes = Encoding.UTF8.GetBytes(fileName); + var nameLen = Math.Min(nameBytes.Length, 99); + Array.Copy(nameBytes, 0, header, 0, nameLen); + header[nameLen] = 0; + + // Mode (8 bytes, octal) - 0644 + Array.Copy(UstarModeBytes, 0, header, 100, 8); + + // UID (8 bytes, octal) - 0 + Array.Copy(UstarUidBytes, 0, header, 108, 8); + + // GID (8 bytes, octal) - 0 + Array.Copy(UstarGidBytes, 0, header, 116, 8); + + // Size (12 bytes, octal) - space terminated + var sizeStr = Convert.ToString(size, 8).PadLeft(11, '0'); + var sizeBytes = Encoding.ASCII.GetBytes(sizeStr + " "); + Array.Copy(sizeBytes, 0, header, 124, 12); + + // Mtime (12 bytes, octal) - space terminated + var mtimeValue = new DateTimeOffset(mtime).ToUnixTimeSeconds(); + var mtimeStr = Convert.ToString(mtimeValue, 8).PadLeft(11, '0'); + var mtimeBytes = Encoding.ASCII.GetBytes(mtimeStr + " "); + Array.Copy(mtimeBytes, 0, header, 136, 12); + + // Checksum placeholder (8 bytes) - filled with spaces for calculation + Array.Fill(header, (byte)' ', 148, 8); + + // Type flag (1 byte) - '0' for regular file + header[156] = (byte)'0'; + + // Magic (6 bytes) - "ustar\0" + Array.Copy(UstarMagicBytes, 0, header, 257, 6); + + // Version (2 bytes) - "00" + Array.Copy(UstarVersionBytes, 0, header, 263, 2); + + // Calculate checksum + var checksum = header.Sum(b => (int)b); + var checksumStr = Convert.ToString(checksum, 8).PadLeft(6, '0'); + var checksumBytes = Encoding.ASCII.GetBytes(checksumStr + "\0 "); + Array.Copy(checksumBytes, 0, header, 148, 8); + + stream.Write(header, 0, header.Length); + } + + private static bool TryReadEofHeader(Stream stream, out Dictionary entries) + { + entries = new Dictionary(); + + try + { + if (stream.Length < TarBaseConstants.EofHeaderTrailerSize) + return false; + + stream.Seek(-TarBaseConstants.EofHeaderTrailerSize, SeekOrigin.End); + + var offsetBytes = new byte[TarBaseConstants.EofHeaderOffsetSize]; + stream.ReadExactly(offsetBytes, 0, offsetBytes.Length); + var headerOffset = BitConverter.ToInt64(offsetBytes, 0); + + var magicBytes = new byte[TarBaseConstants.EofHeaderMagicSize]; + stream.ReadExactly(magicBytes, 0, magicBytes.Length); + var magic = Encoding.ASCII.GetString(magicBytes); + + if (magic != TarBaseConstants.EofHeaderMagic) + return false; + + if (headerOffset < 0 || headerOffset >= stream.Length - TarBaseConstants.EofHeaderTrailerSize) + return false; + + stream.Seek(headerOffset, SeekOrigin.Begin); + + using var tarReader = new System.Formats.Tar.TarReader(stream, leaveOpen: true); + var entry = tarReader.GetNextEntry(); + + if (entry == null || entry.Name != TarBaseConstants.EofHeaderFileName) + return false; + + if (entry.DataStream == null) + return false; + + using var ms = new MemoryStream(); + entry.DataStream.CopyTo(ms); + var contentBytes = ms.ToArray(); + + if (contentBytes.Length < TarBaseConstants.EofHeaderTrailerSize) + return false; + + var jsonBytes = contentBytes[..^TarBaseConstants.EofHeaderTrailerSize]; + int jsonLength = jsonBytes.Length; + while (jsonLength > 0 && jsonBytes[jsonLength - 1] == 0) + jsonLength--; + + var json = Encoding.UTF8.GetString(jsonBytes, 0, jsonLength); + + var deserialized = JsonSerializer.Deserialize>(json); + if (deserialized == null) + return false; + + entries = deserialized.ToDictionary( + kvp => kvp.Key, + kvp => new FileEntry(kvp.Key, kvp.Value.Offset, kvp.Value.Size, DateTime.UnixEpoch.AddSeconds(kvp.Value.LastWriteTime)) + ); + return true; + } + catch (Exception ex) + { + Log.WriteWarningMessage(LOGTAG, "EofHeaderReadError", ex, "Failed to read EOF header"); + entries = new Dictionary(); + return false; + } + } + + private static Dictionary BuildDictionaryByScanning(Stream stream) + { + var entries = new Dictionary(StringComparer.Ordinal); + stream.Seek(0, SeekOrigin.Begin); + + try + { + using var tarReader = new System.Formats.Tar.TarReader(stream, leaveOpen: true); + + while (true) + { + var entryStartPosition = stream.Position; + var entry = tarReader.GetNextEntry(); + if (entry == null) + break; + + if (entry.Name == TarBaseConstants.EofHeaderFileName) + continue; + + if (entry.EntryType != System.Formats.Tar.TarEntryType.RegularFile && + entry.EntryType != System.Formats.Tar.TarEntryType.V7RegularFile) + continue; + + // For ustar format, header is 512 bytes, so content starts at entryStartPosition + 512 + // For pax format, headers can be variable size, but we primarily use ustar + // Since TarReader advances the stream to after the content, we calculate backwards: + // stream.Position is now at end of entry (after content padding) + // Content size with padding = ((entry.Length + 511) / 512) * 512 + var paddedContentSize = (entry.Length + 511) / 512 * 512; + var contentOffset = stream.Position - paddedContentSize; + + entries[entry.Name] = new FileEntry( + entry.Name, + contentOffset, + entry.Length, + entry.ModificationTime.UtcDateTime + ); + } + } + catch (Exception ex) + { + Log.WriteWarningMessage(LOGTAG, "TarScanError", ex, "Error scanning tar archive"); + } + + Log.WriteInformationMessage(LOGTAG, "DictionaryBuiltByScanning", "Built file dictionary by scanning {0} entries", entries.Count); + return entries; + } + + private record SerializableEntry(long Offset, long Size, long LastWriteTime); + + private class PendingEntryStream : WrappingAsyncStream + { + private readonly FileArchiveTarBased m_parent; + private readonly string m_name; + private readonly DateTime m_lastWriteTime; + private bool m_closed; + + public PendingEntryStream(FileArchiveTarBased parent, string name, DateTime lastWriteTime, bool useMemoryBuffer) + : base(useMemoryBuffer ? new MemoryStream() : Utility.TempFileStream.Create()) + { + m_parent = parent; + m_name = name; + m_lastWriteTime = lastWriteTime; + } + + public bool IsClosed => m_closed; + + protected override Task ReadImplAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + => throw new NotSupportedException(); + + protected override Task WriteImplAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + { + if (m_closed) + throw new InvalidOperationException("Stream is closed"); + return base.BaseStream.WriteAsync(buffer, offset, count, cancellationToken); + } + + protected override void Dispose(bool disposing) + { + if (!m_closed) + { + m_closed = true; + var size = base.BaseStream.Length; + base.BaseStream.Position = 0; + m_parent.WriteEntryToTar(m_name, m_lastWriteTime, base.BaseStream, size); + } + base.Dispose(disposing); + base.BaseStream.Dispose(); + } + } +} diff --git a/Duplicati/Library/Compression/TarZstdCompression/FileArchiveTarGzip.cs b/Duplicati/Library/Compression/TarZstdCompression/FileArchiveTarGzip.cs new file mode 100644 index 000000000..8373ca019 --- /dev/null +++ b/Duplicati/Library/Compression/TarZstdCompression/FileArchiveTarGzip.cs @@ -0,0 +1,109 @@ +// Copyright (C) 2026, The Duplicati Team +// https://duplicati.com, hello@duplicati.com +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +#nullable enable + +using System.Collections.Generic; +using System.IO; +using System.IO.Compression; +using Duplicati.Library.Interface; + +namespace Duplicati.Library.Compression.TarZstdCompression; + +/// +/// An ICompression implementation using Tar+GZip with EOF header for fast random access. +/// File format: [GZip Stream] containing [Tar Archive] + [.eof-header with dictionary] +/// +public class FileArchiveTarGzip : FileArchiveTarBased +{ + private const string COMPRESSION_LEVEL_OPTION = "tgz-compression-level"; + private const string MEMORY_BUFFER_OPTION = "tgz-memory-buffer"; + private const int DEFAULT_COMPRESSION_LEVEL = 2; + private const int MIN_COMPRESSION_LEVEL = 0; + private const int MAX_COMPRESSION_LEVEL = 3; + + /// + /// Default constructor for module discovery + /// + public FileArchiveTarGzip() : base() + { + } + + /// + /// Constructor with stream and options + /// + public FileArchiveTarGzip(Stream stream, ArchiveMode mode, IReadOnlyDictionary options) + : base(stream, mode, options) + { + } + + public override string FilenameExtension => "tgz"; + + public override string DisplayName => Strings.FileArchiveTarGzip.DisplayName; + + public override string Description => Strings.FileArchiveTarGzip.Description; + + protected override string CompressionLevelOption => COMPRESSION_LEVEL_OPTION; + + protected override int DefaultCompressionLevel => DEFAULT_COMPRESSION_LEVEL; + + protected override int MinCompressionLevel => MIN_COMPRESSION_LEVEL; + + protected override int MaxCompressionLevel => MAX_COMPRESSION_LEVEL; + + protected override string MemoryBufferOption => MEMORY_BUFFER_OPTION; + + public override IList SupportedCommands => + [ + new CommandLineArgument( + COMPRESSION_LEVEL_OPTION, + CommandLineArgument.ArgumentType.Enumeration, + Strings.FileArchiveTarGzip.CompressionlevelShort, + Strings.FileArchiveTarGzip.CompressionlevelLong, + DEFAULT_COMPRESSION_LEVEL.ToString(), + null, + ["0", "1", "2", "3"] + ), + new CommandLineArgument( + MEMORY_BUFFER_OPTION, + CommandLineArgument.ArgumentType.Boolean, + Strings.FileArchiveTarGzip.MemorybufferShort, + Strings.FileArchiveTarGzip.MemorybufferLong, + "false" + ) + ]; + + protected override Stream CreateDecompressorStream(Stream inputStream) + => new GZipStream(inputStream, CompressionMode.Decompress); + + protected override Stream CreateCompressorStream(Stream outputStream, int compressionLevel) + => new GZipStream(outputStream, MapToGZipLevel(compressionLevel), leaveOpen: true); + + private static CompressionLevel MapToGZipLevel(int level) + => level switch + { + 0 => CompressionLevel.NoCompression, + 1 => CompressionLevel.Fastest, + 2 => CompressionLevel.Optimal, + 3 => CompressionLevel.SmallestSize, + _ => CompressionLevel.Optimal + }; +} diff --git a/Duplicati/Library/Compression/TarZstdCompression/FileArchiveTarZstd.cs b/Duplicati/Library/Compression/TarZstdCompression/FileArchiveTarZstd.cs new file mode 100644 index 000000000..e51140dfb --- /dev/null +++ b/Duplicati/Library/Compression/TarZstdCompression/FileArchiveTarZstd.cs @@ -0,0 +1,99 @@ +// Copyright (C) 2026, The Duplicati Team +// https://duplicati.com, hello@duplicati.com +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +#nullable enable + +using System.Collections.Generic; +using System.IO; +using Duplicati.Library.Interface; +using ZstdSharp; + +namespace Duplicati.Library.Compression.TarZstdCompression; + +/// +/// An ICompression implementation using Tar+Zstd with EOF header for fast random access. +/// File format: [Zstd Stream] containing [Tar Archive] + [.eof-header with dictionary] +/// +public class FileArchiveTarZstd : FileArchiveTarBased +{ + private const string COMPRESSION_LEVEL_OPTION = "tzstd-compression-level"; + private const string MEMORY_BUFFER_OPTION = "tzstd-memory-buffer"; + private const int DEFAULT_COMPRESSION_LEVEL = 10; + private const int MIN_COMPRESSION_LEVEL = 1; + private const int MAX_COMPRESSION_LEVEL = 22; + + /// + /// Default constructor for module discovery + /// + public FileArchiveTarZstd() : base() + { + } + + /// + /// Constructor with stream and options + /// + public FileArchiveTarZstd(Stream stream, ArchiveMode mode, IReadOnlyDictionary options) + : base(stream, mode, options) + { + } + + public override string FilenameExtension => "tzstd"; + + public override string DisplayName => Strings.FileArchiveTarZstd.DisplayName; + + public override string Description => Strings.FileArchiveTarZstd.Description; + + protected override string CompressionLevelOption => COMPRESSION_LEVEL_OPTION; + + protected override int DefaultCompressionLevel => DEFAULT_COMPRESSION_LEVEL; + + protected override int MinCompressionLevel => MIN_COMPRESSION_LEVEL; + + protected override int MaxCompressionLevel => MAX_COMPRESSION_LEVEL; + + protected override string MemoryBufferOption => MEMORY_BUFFER_OPTION; + + public override IList SupportedCommands => + [ + new CommandLineArgument( + COMPRESSION_LEVEL_OPTION, + CommandLineArgument.ArgumentType.Enumeration, + Strings.FileArchiveTarZstd.CompressionlevelShort, + Strings.FileArchiveTarZstd.CompressionlevelLong, + DEFAULT_COMPRESSION_LEVEL.ToString(), + null, + ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22"] + ), + new CommandLineArgument( + MEMORY_BUFFER_OPTION, + CommandLineArgument.ArgumentType.Boolean, + Strings.FileArchiveTarZstd.MemorybufferShort, + Strings.FileArchiveTarZstd.MemorybufferLong, + "false" + ) + ]; + + protected override Stream CreateDecompressorStream(Stream inputStream) + => new DecompressionStream(inputStream); + + protected override Stream CreateCompressorStream(Stream outputStream, int compressionLevel) + => new CompressionStream(outputStream, compressionLevel, leaveOpen: true); +} diff --git a/Duplicati/Library/Compression/TarZstdCompression/FileEntry.cs b/Duplicati/Library/Compression/TarZstdCompression/FileEntry.cs new file mode 100644 index 000000000..3ddf6192a --- /dev/null +++ b/Duplicati/Library/Compression/TarZstdCompression/FileEntry.cs @@ -0,0 +1,34 @@ +// Copyright (C) 2026, The Duplicati Team +// https://duplicati.com, hello@duplicati.com +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +using System; + +namespace Duplicati.Library.Compression.TarZstdCompression; + +/// +/// Represents a file entry in the tar archive with its metadata +/// +public sealed record FileEntry( + string Name, + long Offset, + long Size, + DateTime LastWriteTime +); diff --git a/Duplicati/Library/Compression/TarZstdCompression/README.md b/Duplicati/Library/Compression/TarZstdCompression/README.md new file mode 100644 index 000000000..7de468930 --- /dev/null +++ b/Duplicati/Library/Compression/TarZstdCompression/README.md @@ -0,0 +1,322 @@ +# Tar-Based Compression Format + +This compression module provides an ICompression implementation using Tar+Compression with an custom EOF (End-of-File) header entry for fast random access, emulating a Zip-like file format. This format relies more heavily on temporary files as the compression is done on the full tar file for best compression. This approach also means that most generated volumes will be better compressed but file size may be significantly lower that the target volume size. + +Two compression variants are available: +- **Tar+GZip** (`.tgz` extension) - Uses GZip compression +- **Tar+Zstd** (`.tzstd` extension) - Uses Zstd compression (coming soon) + +The archives can be decompressed with standard tar tools, and the `.eof-header` entry will appear as a regular file in the archive for non-Duplicati tools. In case the header is missing, the library will fall back to scanning the entire archive to find the files. + +## File Extensions + +- `.tgz` - Tar+GZip format +- `.tzstd` - Tar+Zstd format + +## File Format Specification + +### Overall Structure + +``` +[GZip Compressed Stream] + └─ [Tar Archive] + ├─ [File Entry 1] + ├─ [File Entry 2] + ├─ ... + └─ [.eof-header Entry] <-- Regular tar entry with special content + ├─ [Tar Header (512 bytes)] + ├─ [JSON Dictionary content] + ├─ [Padding (to make JSON+Trailer fit in 512-byte blocks)] + ├─ [8 bytes: offset of .eof-header tar header start] + └─ [6 bytes: "EOFHD1" magic] + ↑ + All of this is INSIDE the entry content, properly padded +``` + +### EOF Header Format + +The `.eof-header` entry enables O(1) file lookups without scanning the entire archive. It is stored as a **regular tar entry** (not external to the tar), which means: + +- Standard tar tools can extract the archive normally +- The `.eof-header` file appears as a regular file in the archive +- Standard compression tools can decompress the archive + +The `.eof-header` entry contains: + +1. **Tar Header (512 bytes)**: Standard tar header for `.eof-header` file +2. **Entry Content** (padded to 512-byte boundary): + - **JSON Dictionary**: Maps filenames to their metadata (offset, size, modification time) + - **Padding**: Padding bytes to align the trailer to the 512-byte boundary + - **Header Trailer (14 bytes)**: + - **Offset (8 bytes)**: Little-endian long pointing to start of `.eof-header` tar header + - **Magic (6 bytes)**: Literal `"EOFHD1"` marking the end + +**Important**: The trailer is INSIDE the tar entry content, not after it. The total entry content (JSON + padding + trailer) is padded to a multiple of 512 bytes as required by the TAR format. + +#### JSON Dictionary Format + +```json +{ + "file1.txt": { + "offset": 512, + "size": 1024, + "lastWriteTime": 1686832200 + }, + "file2.txt": { + "offset": 2048, + "size": 2048, + "lastWriteTime": 1686832260 + } +} +``` + +The `lastWriteTime` field is stored as a Unix timestamp in seconds since epoch (January 1, 1970 UTC). + +### Reading Process + +1. **Decompress**: Decompress the entire gzip stream to a temporary file +2. **Read Trailer**: Seek to end of file and read the last 14 bytes (offset + magic) +3. **Verify Magic**: Check for literal `"EOFHD1"` magic string at the end +4. **Load Dictionary**: If valid, use the offset to seek to the start of `.eof-header` entry and parse the JSON dictionary +5. **Fallback to Scanning**: If header invalid/missing, scan entire tar file to build dictionary + +**Note**: Since `.eof-header` is always the last entry and its content is padded to 512 bytes, the trailer (14 bytes) is always at the very end of the file. No searching is needed - just read the last 14 bytes. + +### Writing Process + +1. **Collect Entries**: Store all file entries in memory with their data +2. **Write Tar Entries**: Write all file entries to tar archive using System.Formats.Tar +3. **Build Dictionary**: Create JSON dictionary mapping filenames to their offsets +4. **Write EOF Header Entry**: Write `.eof-header` as regular tar entry containing: + - Tar header (standard 512-byte tar header) + - JSON dictionary content + - Padding to 512-byte boundary + - Trailer with offset (pointing to start of this entry's header) + magic +5. **Compress**: Compress the entire tar file with the desired method + +## Tar Format Implementation + +The module uses `System.Formats.Tar` with the Pax format: + +- **Block Size**: 512 bytes +- **Format**: Pax (POSIX.1-2001 extended format) +- **Compatibility**: Standard tar tools can read the archive + +## Compression Options + +### GZip Compression (`.tgz`) + +- **Implementation**: `System.IO.Compression.GZipStream` +- **Default Level**: 2 (Optimal) +- **Level Range**: 0-3 +- **Level Mapping**: + - **0**: NoCompression + - **1**: Fastest + - **2**: Optimal + - **3**: SmallestSize +- **Option Name**: `tgz-compression-level` + +### Zstd Compression (`.tzstd`) + +- **Implementation**: Zstd compression +- **Default Level**: 10 +- **Level Range**: 1-22 +- **Option Name**: `tzstd-compression-level` + +## Performance Characteristics + +### Write Performance + +- **Memory Usage**: Proportional to total file sizes (stores in memory before writing) +- **Compression**: Single-pass after all files collected +- **I/O**: Writes tar to temp file, then compresses to output + +### Read Performance + +- **With EOF Header**: O(1) file lookup +- **Without EOF Header**: O(n) scan through entire archive +- **Decompression**: Full stream decompression required (cannot seek in gzip) + +## Command-Line Options + +### tgz-compression-level (Tar+GZip) + +Sets the compression level for GZip compression. + +- **Type**: Enumeration +- **Default**: 2 +- **Range**: 0-3 +- **Description**: Higher values provide better compression but are slower. 0 = no compression, 3 = maximum compression. + +```bash +duplicati backup ... --tgz-compression-level=2 +``` + +### tzstd-compression-level (Tar+Zstd) + +Sets the compression level for Zstd compression. + +- **Type**: Enumeration +- **Default**: 3 +- **Range**: 1-22 +- **Description**: Higher values provide better compression but are slower + +```bash +duplicati backup ... --tzstd-compression-level=9 +``` + +## Compatibility + +### With Standard Tools + +The format is designed to be compatible with standard Unix tools: + +**Tar+GZip (`.tgz`):** +```bash +# Decompress and extract with standard tools +gunzip -c backup.tgz | tar -xf - + +# List contents +gunzip -c backup.tgz | tar -tf - + +# The .eof-header file will be extracted like any other file +# It contains the JSON dictionary with file offsets +``` + +**Tar+Zstd (`.tzstd`):** +```bash +# Decompress and extract with standard tools +zstd -d -c backup.tzstd | tar -xf - + +# List contents +zstd -d -c backup.tzstd | tar -tf - + +# The .eof-header file will be extracted like any other file +# It contains the JSON dictionary with file offsets +``` + +### Archive Structure + +When extracted with standard tar, you'll see: + +- All your files (file1.txt, file2.txt, etc.) +- An extra `.eof-header` file containing the metadata dictionary + +This design ensures: + +1. **No data loss**: Standard tools can fully extract the archive +2. **Self-documenting**: The `.eof-header` is human-readable JSON +3. **Forward compatibility**: Future versions can extend the format + +## Advantages + +1. **Fast Random Access**: EOF header enables O(1) file lookups +2. **Standard Format**: Based on standard tar+gzip for universal compatibility +3. **Fallback Support**: Can read archives even if EOF header is corrupted +4. **Self-Documenting**: JSON dictionary is human-readable +5. **Tool Compatible**: Works with standard tar and gzip tools + +## Limitations + +1. **Full Decompression Required**: Must decompress entire archive to read any file +2. **Memory Usage During Write**: All file data held in memory until disposal +3. **No Streaming Write**: Cannot stream large files without memory overhead + +## File Structure Diagram + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Compressed Stream (GZip or Zstd) │ +├─────────────────────────────────────────────────────────────┤ +│ Tar Archive │ +│ ┌───────────────────────────────────────────────────────┐ │ +│ │ Entry 1: file1.txt │ │ +│ │ ├─ Tar Header (512 bytes) │ │ +│ │ └─ File Content (padded to 512 boundary) │ │ +│ ├───────────────────────────────────────────────────────┤ │ +│ │ Entry 2: file2.txt │ │ +│ │ ├─ Tar Header (512 bytes) │ │ +│ │ └─ File Content (padded to 512 boundary) │ │ +│ ├───────────────────────────────────────────────────────┤ │ +│ │ Entry N: .eof-header (REGULAR TAR ENTRY) │ │ +│ │ ├─ Tar Header (512 bytes) │ │ +│ │ ├─ Content (ALL within 512-byte blocks): │ │ +│ │ │ ├─ JSON Dictionary │ │ +│ │ │ │ {"file1.txt":{"offset":512,...} │ │ +│ │ │ ├─ Padding (to align trailer) │ │ +│ │ │ ├─ Header Offset (8 bytes) ───┐ │ │ +│ │ │ └─ "EOFHD1" Magic (6 bytes) │ │ │ +│ │ │ │ │ │ +│ │ └────────────────────────────────│───────────────────┘ │ +│ │ │ │ +│ └───────────────────────────────────┘ │ +│ The offset points back to the start │ +│ of THIS entry's tar header │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Implementation Notes + +1. The `.eof-header` is a **regular tar entry** - it has a tar header just like any other file +2. The trailer (offset + magic) is **inside the entry content**, at the end, before the 512-byte padding +3. The total entry content size (JSON + trailer) is a multiple of 512 bytes as per TAR spec +4. The offset in the trailer points to the start of the `.eof-header` tar header +5. When using standard tar tools, the `.eof-header` file is extracted normally (as a file containing JSON + binary trailer) +6. The JSON dictionary excludes the `.eof-header` file itself +7. File paths are stored with forward slashes but support both slash types on read + +## Usage Examples + +### Creating an Archive + +```csharp +using var stream = File.Create("backup.tzstd"); +using var archive = new FileArchiveTarZstd( + stream, + ArchiveMode.Write, + new Dictionary { ["tzstd-compression-level"] = "9" } +); + +using (var entry = archive.CreateFile("documents/file1.txt", CompressionHint.Compressible, DateTime.Now)) +{ + entry.Write(data, 0, data.Length); +} +``` + +### Reading an Archive + +```csharp +using var stream = File.OpenRead("backup.tzstd"); +using var archive = new FileArchiveTarZstd(stream, ArchiveMode.Read, new Dictionary()); + +var files = archive.ListFiles(null); +foreach (var file in files) +{ + using var fileStream = archive.OpenRead(file); + // Read file content +} +``` + +### Extracting with Standard Tools + +**Tar+GZip (`.tgz`):** +```bash +# The archive is valid tar+gzip +gunzip -c backup.tgz > backup.tar +tar -xf backup.tar + +# You'll see: +# - documents/file1.txt +# - .eof-header (contains the JSON offset map) +``` + +**Tar+Zstd (`.tzstd`):** +```bash +# The archive is valid tar+zstd +zstd -d -c backup.tzstd > backup.tar +tar -xf backup.tar + +# You'll see: +# - documents/file1.txt +# - .eof-header (contains the JSON offset map) +``` diff --git a/Duplicati/Library/Compression/TarZstdCompression/TarBaseConstants.cs b/Duplicati/Library/Compression/TarZstdCompression/TarBaseConstants.cs new file mode 100644 index 000000000..2123def38 --- /dev/null +++ b/Duplicati/Library/Compression/TarZstdCompression/TarBaseConstants.cs @@ -0,0 +1,53 @@ +// Copyright (C) 2026, The Duplicati Team +// https://duplicati.com, hello@duplicati.com +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +namespace Duplicati.Library.Compression.TarZstdCompression; + +/// +/// Constants for the Tar-based file formats +/// +public static class TarBaseConstants +{ + /// + /// EOF header file name + /// + public const string EofHeaderFileName = ".eof-header"; + + /// + /// Magic string at the end of the EOF header (6 bytes) + /// + public const string EofHeaderMagic = "EOFHD1"; + + /// + /// Size of the EOF header magic string + /// + public const int EofHeaderMagicSize = 6; + + /// + /// Size of the offset field (8 bytes, long) + /// + public const int EofHeaderOffsetSize = 8; + + /// + /// Total size of the EOF header trailer (magic + offset) + /// + public const int EofHeaderTrailerSize = EofHeaderMagicSize + EofHeaderOffsetSize; +} diff --git a/Duplicati/Library/Compression/ZipCompression/SharpCompressZipArchive.cs b/Duplicati/Library/Compression/ZipCompression/SharpCompressZipArchive.cs index b7428c100..726f86ab8 100644 --- a/Duplicati/Library/Compression/ZipCompression/SharpCompressZipArchive.cs +++ b/Duplicati/Library/Compression/ZipCompression/SharpCompressZipArchive.cs @@ -151,7 +151,7 @@ public class SharpCompressZipArchive : IZipArchive /// The deflate compression leve /// The Zstd compression level private static int MapDeflateCompressionLevelToZStandard(int level) - => (int)Math.Max(1, Math.Min(22, Math.Round(level * 2.33) + 1)); + => (int)Math.Clamp(Math.Round(level * 2.33), 1, 22); private IArchive Archive diff --git a/Duplicati/Library/Main/Controller.cs b/Duplicati/Library/Main/Controller.cs index e27b73096..40e32daf3 100644 --- a/Duplicati/Library/Main/Controller.cs +++ b/Duplicati/Library/Main/Controller.cs @@ -122,6 +122,13 @@ namespace Duplicati.Library.Main UsageReporter.Reporter.Report("USE_COMPRESSION", m_options.CompressionModule); UsageReporter.Reporter.Report("USE_ENCRYPTION", m_options.EncryptionModule); + // Warn about experimental tar-based compression modules + if (m_options.CompressionModule.Equals("tzstd", StringComparison.OrdinalIgnoreCase) || + m_options.CompressionModule.Equals("tgz", StringComparison.OrdinalIgnoreCase)) + { + Logging.Log.WriteWarningMessage(LOGTAG, "ExperimentalCompressionModule", null, $"The compression module '{m_options.CompressionModule}' is experimental and for testing only."); + } + CheckAutoCompactInterval(); CheckAutoVacuumInterval(); diff --git a/Duplicati/License/Duplicati.License.csproj b/Duplicati/License/Duplicati.License.csproj index e1cb1f27d..5e12a713e 100644 --- a/Duplicati/License/Duplicati.License.csproj +++ b/Duplicati/License/Duplicati.License.csproj @@ -310,6 +310,18 @@ licenses\Vanara\licensedata.json PreserveNewest + + licenses\ZstdSharp.Port\Homepage.txt + PreserveNewest + + + licenses\ZstdSharp.Port\License.txt + PreserveNewest + + + licenses\ZstdSharp.Port\licensedata.json + PreserveNewest + diff --git a/Duplicati/UnitTest/BorderTests.cs b/Duplicati/UnitTest/BorderTests.cs index 468d770c9..9297c032a 100644 --- a/Duplicati/UnitTest/BorderTests.cs +++ b/Duplicati/UnitTest/BorderTests.cs @@ -198,6 +198,26 @@ namespace Duplicati.UnitTest }); } + [Test] + [Category("Border")] + public void Run10kTgzCompression() + { + RunCommands(1024 * 10, modifyOptions: opts => + { + opts["compression-module"] = "tgz"; + }); + } + + [Test] + [Category("Border")] + public void Run10kTzstdCompression() + { + RunCommands(1024 * 10, modifyOptions: opts => + { + opts["compression-module"] = "tzstd"; + }); + } + public static Dictionary WriteTestFilesToFolder(string targetfolder, int blocksize, int basedatasize = 0) { if (basedatasize <= 0) diff --git a/Duplicati/UnitTest/TarZstdCompressionTests.cs b/Duplicati/UnitTest/TarZstdCompressionTests.cs new file mode 100644 index 000000000..3fcf2fad3 --- /dev/null +++ b/Duplicati/UnitTest/TarZstdCompressionTests.cs @@ -0,0 +1,507 @@ +// Copyright (C) 2026, The Duplicati Team +// https://duplicati.com, hello@duplicati.com +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +using System; +using NUnit.Framework; +using System.IO; +using System.Linq; +using System.Collections.Generic; +using Duplicati.Library.Interface; +using System.Text; +using System.Text.Json; +using ZstdSharp; +using Duplicati.Library.Compression.TarZstdCompression; +using Assert = NUnit.Framework.Legacy.ClassicAssert; + +#nullable enable + +namespace Duplicati.UnitTest +{ + [Category("Compression")] + [TestFixture] + public class TarZstdCompressionTests : BasicSetupHelper + { + private static byte[] GenerateTestData(int size, byte seed = 1) + { + var data = new byte[size]; + for (int i = 0; i < size; i++) + { + data[i] = (byte)((i * 22695477 + seed) % 256); + } + return data; + } + + [Test] + public void TestCreateAndReadArchive() + { + using var archiveStream = new MemoryStream(); + var testData = GenerateTestData(1024, 1); + + // Write + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using var entry = archive.CreateFile("test.txt", CompressionHint.Compressible, DateTime.Now); + entry.Write(testData, 0, testData.Length); + } + + // Read + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + var files = archive.ListFiles(null); + Assert.AreEqual(1, files.Length); + Assert.AreEqual("test.txt", files[0]); + + using var stream = archive.OpenRead("test.txt"); + Assert.IsNotNull(stream); + + using var ms = new MemoryStream(); + stream!.CopyTo(ms); + var readData = ms.ToArray(); + Assert.That(readData, Is.EqualTo(testData)); + } + } + + [Test] + public void TestMultipleFiles() + { + using var archiveStream = new MemoryStream(); + var testData1 = GenerateTestData(1024, 1); + var testData2 = GenerateTestData(2048, 2); + var testData3 = GenerateTestData(512, 3); + + // Write + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using (var entry = archive.CreateFile("file1.bin", CompressionHint.Compressible, DateTime.Now)) + entry.Write(testData1, 0, testData1.Length); + + using (var entry = archive.CreateFile("file2.bin", CompressionHint.Compressible, DateTime.Now)) + entry.Write(testData2, 0, testData2.Length); + + using (var entry = archive.CreateFile("dir/file3.bin", CompressionHint.Compressible, DateTime.Now)) + entry.Write(testData3, 0, testData3.Length); + } + + // Read + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + var files = archive.ListFiles(null); + Assert.AreEqual(3, files.Length); + + // Verify file1.bin + using (var stream = archive.OpenRead("file1.bin")) + { + using var ms = new MemoryStream(); + stream!.CopyTo(ms); + var readData = ms.ToArray(); + Console.WriteLine($"[TEST] file1.bin: expected[0]={testData1[0]}, actual[0]={readData[0]}, length={readData.Length}"); + Assert.That(readData, Is.EqualTo(testData1)); + } + + // Verify file2.bin + using (var stream = archive.OpenRead("file2.bin")) + { + using var ms = new MemoryStream(); + stream!.CopyTo(ms); + Assert.That(ms.ToArray(), Is.EqualTo(testData2)); + } + + // Verify file3.bin + using (var stream = archive.OpenRead("dir/file3.bin")) + { + using var ms = new MemoryStream(); + stream!.CopyTo(ms); + Assert.That(ms.ToArray(), Is.EqualTo(testData3)); + } + } + } + + [Test] + public void TestEofHeaderPresent() + { + using var archiveStream = new MemoryStream(); + var testData = GenerateTestData(1024, 1); + + // Write archive + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using var entry = archive.CreateFile("test.txt", CompressionHint.Compressible, DateTime.Now); + entry.Write(testData, 0, testData.Length); + } + + // Decompress and verify EOF header is present + archiveStream.Position = 0; + var decompressedStream = new MemoryStream(); + using (var decompressor = new DecompressionStream(archiveStream)) + { + decompressor.CopyTo(decompressedStream); + } + + decompressedStream.Position = 0; + + // Read the tar entries and verify EOF header exists + using var tarReader = new System.Formats.Tar.TarReader(decompressedStream); + bool foundEofHeader = false; + int entryCount = 0; + + while (true) + { + var entry = tarReader.GetNextEntry(); + if (entry == null) + break; + + entryCount++; + if (entry.Name == ".eof-header") + { + foundEofHeader = true; + + // Parse the JSON to verify content + if (entry.DataStream != null) + { + using var ms = new MemoryStream(); + entry.DataStream.CopyTo(ms); + var contentBytes = ms.ToArray(); + + // Remove trailer (last 14 bytes: 8 offset + 6 magic) + const int trailerSize = 14; // EofHeaderTrailerSize + if (contentBytes.Length >= trailerSize) + { + var jsonBytes = contentBytes[..^trailerSize]; + // Trim null padding bytes from end + int jsonLength = jsonBytes.Length; + while (jsonLength > 0 && jsonBytes[jsonLength - 1] == 0) + jsonLength--; + + var json = Encoding.UTF8.GetString(jsonBytes, 0, jsonLength); + var dict = JsonSerializer.Deserialize>(json); + Assert.IsNotNull(dict); + Assert.That(dict!.ContainsKey("test.txt"), Is.True); + } + } + } + } + + Assert.That(foundEofHeader, Is.True, "EOF header should be present"); + Assert.That(entryCount, Is.EqualTo(2), "Should have data file and EOF header"); + } + + [Test] + public void TestFallbackScanning() + { + using var archiveStream = new MemoryStream(); + var testData = GenerateTestData(1024, 1); + + // Create a tar archive without EOF header (simulate old/corrupt format) + using (var tempStream = new MemoryStream()) + { + // Write tar entry using System.Formats.Tar (ustar format for fixed 512-byte headers) + using (var tarWriter = new System.Formats.Tar.TarWriter(tempStream, System.Formats.Tar.TarEntryFormat.Ustar, leaveOpen: true)) + { + var entry = new System.Formats.Tar.UstarTarEntry( + System.Formats.Tar.TarEntryType.RegularFile, + "test.txt") + { + DataStream = new MemoryStream(testData) + }; + tarWriter.WriteEntry(entry); + } + + // Compress with Zstd + tempStream.Position = 0; + using (var compressor = new CompressionStream(archiveStream, 3)) + { + tempStream.CopyTo(compressor); + } + } + + // Read - should fallback to scanning + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + var files = archive.ListFiles(null); + Assert.AreEqual(1, files.Length); + Assert.AreEqual("test.txt", files[0]); + + using var stream = archive.OpenRead("test.txt"); + using var ms = new MemoryStream(); + stream!.CopyTo(ms); + Assert.That(ms.ToArray(), Is.EqualTo(testData)); + } + } + + [Test] + public void TestEmptyArchive() + { + using var archiveStream = new MemoryStream(); + + // Write empty archive + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + // Don't add any files + } + + // Read + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + var files = archive.ListFiles(null); + Assert.AreEqual(0, files.Length); + } + } + + [Test] + public void TestLargeFile() + { + using var archiveStream = new MemoryStream(); + const int size = 10 * 1024 * 1024; // 10 MB + var testData = GenerateTestData(size, 42); + + // Write + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using var entry = archive.CreateFile("large.bin", CompressionHint.Compressible, DateTime.Now); + entry.Write(testData, 0, testData.Length); + } + + // Read + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + using var stream = archive.OpenRead("large.bin"); + using var ms = new MemoryStream(); + stream!.CopyTo(ms); + Assert.That(ms.Length, Is.EqualTo(testData.Length)); + Assert.That(ms.ToArray(), Is.EqualTo(testData)); + } + } + + [Test] + public void TestCompressionReversibility() + { + const int testSize = 1024 * 1024; + var testData1 = GenerateTestData(testSize, 1); + var testData2 = GenerateTestData(testSize, 2); + + using var archiveStream = new MemoryStream(); + + // Compress + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using (var entry = archive.CreateFile("sample1", CompressionHint.Compressible, DateTime.Now)) + entry.Write(testData1, 0, testData1.Length); + + using (var entry = archive.CreateFile("sample2", CompressionHint.Compressible, DateTime.Now)) + entry.Write(testData2, 0, testData2.Length); + } + + Console.WriteLine("Compression rate for Tar+Zstd: {0:0.00}%", 100.0 * archiveStream.Length / (testSize * 2)); + + // Decompress + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + var files = archive.ListFiles(null); + Assert.AreEqual(2, files.Length); + + // Read second file + using (var stream = archive.OpenRead(files[1])) + { + using var ms = new MemoryStream(); + stream!.CopyTo(ms); + Assert.That(ms.ToArray(), Is.EqualTo(testData2)); + } + + // Read first file + using (var stream = archive.OpenRead(files[0])) + { + using var ms = new MemoryStream(); + stream!.CopyTo(ms); + Assert.That(ms.ToArray(), Is.EqualTo(testData1)); + } + } + } + + [Test] + public void TestFileNotFound() + { + using var archiveStream = new MemoryStream(); + var testData = GenerateTestData(1024, 1); + + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using var entry = archive.CreateFile("existing.txt", CompressionHint.Compressible, DateTime.Now); + entry.Write(testData, 0, testData.Length); + } + + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + var stream = archive.OpenRead("nonexistent.txt"); + Assert.IsNull(stream); + Assert.IsFalse(archive.FileExists("nonexistent.txt")); + } + } + + [Test] + public void TestListFilesWithPrefix() + { + using var archiveStream = new MemoryStream(); + + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using (var entry = archive.CreateFile("dir1/file1.txt", CompressionHint.Compressible, DateTime.Now)) + entry.Write(GenerateTestData(100), 0, 100); + + using (var entry = archive.CreateFile("dir1/file2.txt", CompressionHint.Compressible, DateTime.Now)) + entry.Write(GenerateTestData(100), 0, 100); + + using (var entry = archive.CreateFile("dir2/file3.txt", CompressionHint.Compressible, DateTime.Now)) + entry.Write(GenerateTestData(100), 0, 100); + + using (var entry = archive.CreateFile("root.txt", CompressionHint.Compressible, DateTime.Now)) + entry.Write(GenerateTestData(100), 0, 100); + } + + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + var dir1Files = archive.ListFiles("dir1/"); + Assert.AreEqual(2, dir1Files.Length); + Assert.That(dir1Files, Does.Contain("dir1/file1.txt")); + Assert.That(dir1Files, Does.Contain("dir1/file2.txt")); + + var dir2Files = archive.ListFiles("dir2/"); + Assert.AreEqual(1, dir2Files.Length); + Assert.AreEqual("dir2/file3.txt", dir2Files[0]); + + var allFiles = archive.ListFiles(null); + Assert.AreEqual(4, allFiles.Length); + } + } + + [Test] + public void TestListFilesWithSize() + { + using var archiveStream = new MemoryStream(); + var data1 = GenerateTestData(100); + var data2 = GenerateTestData(200); + + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using (var entry = archive.CreateFile("small.txt", CompressionHint.Compressible, DateTime.Now)) + entry.Write(data1, 0, data1.Length); + + using (var entry = archive.CreateFile("large.txt", CompressionHint.Compressible, DateTime.Now)) + entry.Write(data2, 0, data2.Length); + } + + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + var files = archive.ListFilesWithSize(null).ToList(); + Assert.AreEqual(2, files.Count); + + var smallEntry = files.FirstOrDefault(f => f.Key == "small.txt"); + Assert.That(smallEntry.Value, Is.EqualTo(100)); + + var largeEntry = files.FirstOrDefault(f => f.Key == "large.txt"); + Assert.That(largeEntry.Value, Is.EqualTo(200)); + } + } + + [Test] + public void TestGetLastWriteTime() + { + using var archiveStream = new MemoryStream(); + var testTime = new DateTime(2023, 6, 15, 12, 30, 0, DateTimeKind.Utc); + + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using var entry = archive.CreateFile("test.txt", CompressionHint.Compressible, testTime); + entry.Write(GenerateTestData(100), 0, 100); + } + + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + var lastWriteTime = archive.GetLastWriteTime("test.txt"); + // Allow some precision loss due to tar format (seconds only) + var diff = Math.Abs((lastWriteTime - testTime).TotalSeconds); + Assert.That(diff, Is.LessThan(1.0), "Last write time should match within 1 second"); + } + } + + [Test] + public void TestPathSeparators() + { + using var archiveStream = new MemoryStream(); + var testData = GenerateTestData(100); + + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Write, new Dictionary())) + { + using var entry = archive.CreateFile("path/to/file.txt", CompressionHint.Compressible, DateTime.Now); + entry.Write(testData, 0, testData.Length); + } + + archiveStream.Position = 0; + using (var archive = new FileArchiveTarZstd(archiveStream, ArchiveMode.Read, new Dictionary())) + { + // Should be able to access with forward slash + Assert.That(archive.FileExists("path/to/file.txt"), Is.True); + + // Should be able to access with backslash + Assert.That(archive.FileExists("path\\to\\file.txt"), Is.True); + } + } + + [Test] + public void TestCompressionLevel() + { + using var archiveStream1 = new MemoryStream(); + using var archiveStream9 = new MemoryStream(); + + var testData = GenerateTestData(10000); + + // Write with level 1 + var opts1 = new Dictionary { { "tzstd-compression-level", "1" } }; + using (var archive = new FileArchiveTarZstd(archiveStream1, ArchiveMode.Write, opts1)) + { + using var entry = archive.CreateFile("test.txt", CompressionHint.Compressible, DateTime.Now); + entry.Write(testData, 0, testData.Length); + } + + // Write with level 9 + var opts9 = new Dictionary { { "tzstd-compression-level", "9" } }; + using (var archive = new FileArchiveTarZstd(archiveStream9, ArchiveMode.Write, opts9)) + { + using var entry = archive.CreateFile("test.txt", CompressionHint.Compressible, DateTime.Now); + entry.Write(testData, 0, testData.Length); + } + + // Level 9 should generally produce smaller output than level 1 + Console.WriteLine($"Level 1 size: {archiveStream1.Length}"); + Console.WriteLine($"Level 9 size: {archiveStream9.Length}"); + // Note: This isn't always guaranteed for small/random data, but generally holds + } + } +} diff --git a/thirdparty/ZstdSharp.Port/Homepage.txt b/thirdparty/ZstdSharp.Port/Homepage.txt new file mode 100644 index 000000000..6edfc288d --- /dev/null +++ b/thirdparty/ZstdSharp.Port/Homepage.txt @@ -0,0 +1 @@ +https://github.com/oleg-st/ZstdSharp \ No newline at end of file diff --git a/thirdparty/ZstdSharp.Port/License.txt b/thirdparty/ZstdSharp.Port/License.txt new file mode 100644 index 000000000..e93074afa --- /dev/null +++ b/thirdparty/ZstdSharp.Port/License.txt @@ -0,0 +1,20 @@ +MIT License +Copyright (c) 2021-2025 Oleg Stepanischev + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/thirdparty/ZstdSharp.Port/licensedata.json b/thirdparty/ZstdSharp.Port/licensedata.json new file mode 100644 index 000000000..ae6212cc7 --- /dev/null +++ b/thirdparty/ZstdSharp.Port/licensedata.json @@ -0,0 +1,6 @@ +{ + "name": "ZstdSharp.Port", + "description": "Port of zstd compression library to C#", + "link": "https://github.com/oleg-st/ZstdSharp", + "license": "MIT" +}