cloud provider coverage
Continuous Integration / backend-tests (push) Successful in 39s
Continuous Integration / frontend-check (push) Successful in 16s
Continuous Integration / e2e-tests (push) Successful in 6m48s

This commit is contained in:
2026-05-05 18:38:42 -04:00
parent ae74a0bf02
commit d77a79876f
+363 -68
View File
@@ -1,84 +1,379 @@
import hashlib
import io
import os
import pytest
from unittest.mock import MagicMock
from app.providers.cloud import CloudStorageProvider
def test_cloud_provider_obfuscation_logic():
"""Verifies that filename hashing and sharding works as expected."""
# CASE 1: Obfuscation Disabled
config_plain = {
"bucket_name": "test-bucket",
"obfuscate_filenames": False,
"access_key": "fake",
"secret_key": "fake",
}
provider_plain = CloudStorageProvider(config_plain)
path = "documents/secret_plan.pdf"
# Expectation: Key is exactly the path with prefix
key_plain = provider_plain._get_obfuscated_key("objects", path)
assert key_plain == "objects/documents/secret_plan.pdf"
# CASE 2: Obfuscation Enabled
config_hidden = {
"bucket_name": "test-bucket",
"obfuscate_filenames": True,
"access_key": "fake",
"secret_key": "fake",
}
provider_hidden = CloudStorageProvider(config_hidden)
# Expectation: Key is hashed and sharded
# hash of "documents/secret_plan.pdf"
expected_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
expected_prefix = f"objects/{expected_hash[:2]}/{expected_hash[2:4]}"
key_hidden = provider_hidden._get_obfuscated_key("objects", path)
assert key_hidden.startswith("objects/")
assert key_hidden == f"{expected_prefix}/{expected_hash}"
assert "secret_plan.pdf" not in key_hidden
# ── Constructor & Config ──
def test_cloud_secret_lookup(mocker, db_session):
"""Verifies that the provider looks up secrets from the keystore by name."""
from app.db import models
def test_cloud_provider_endpoint_normalization(mocker):
"""Tests that endpoint URLs without protocol get https:// prepended."""
mock_boto = mocker.patch("app.providers.cloud.boto3")
# Mock boto3.client to avoid slow initialization in unit tests
provider = CloudStorageProvider(
{
"bucket_name": "test-bucket",
"endpoint_url": "s3.example.com",
"region": "eu-west-1",
"access_key": "ak",
"secret_key": "sk",
}
)
call_kwargs = mock_boto.client.call_args[1]
assert call_kwargs["endpoint_url"] == "https://s3.example.com"
assert call_kwargs["region_name"] == "eu-west-1"
assert provider.provider_type == "S3"
def test_cloud_provider_endpoint_no_modification(mocker):
"""Tests that endpoint URLs with existing protocol are left alone."""
mock_boto = mocker.patch("app.providers.cloud.boto3")
CloudStorageProvider(
{
"bucket_name": "test-bucket",
"endpoint_url": "http://minio.local:9000",
}
)
call_kwargs = mock_boto.client.call_args[1]
assert call_kwargs["endpoint_url"] == "http://minio.local:9000"
def test_cloud_provider_defaults(mocker):
"""Tests default values when minimal config is provided."""
mock_boto = mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
assert provider.region == "us-east-1"
assert provider.endpoint_url is None
assert provider.obfuscate is False
mock_boto.client.assert_called_once()
# ── Online & Identification ──
def test_check_online_success(mocker):
"""Tests check_online returns True when head_bucket succeeds."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
provider.s3.head_bucket = MagicMock(return_value=None)
# Seed the secrets keystore
assert provider.check_online() is True
def test_check_online_failure(mocker):
"""Tests check_online returns False when head_bucket raises."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
provider.s3.head_bucket = MagicMock(side_effect=Exception("timeout"))
assert provider.check_online() is False
def test_get_live_info(mocker):
"""Tests get_live_info returns provider metadata."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "my-bucket"})
provider.s3.head_bucket = MagicMock(return_value=None)
info = provider.get_live_info()
assert info["online"] is True
assert info["provider"] == "S3"
assert info["bucket"] == "my-bucket"
def test_check_existing_data_found(mocker):
"""Tests check_existing_data when objects exist under archives/."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
provider.s3.list_objects_v2 = MagicMock(
return_value={"Contents": [{"Key": "archives/1.tar"}]}
)
assert provider.check_existing_data() is True
def test_check_existing_data_empty(mocker):
"""Tests check_existing_data when no objects exist."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
provider.s3.list_objects_v2 = MagicMock(return_value={})
assert provider.check_existing_data() is False
def test_identify_media_by_id_file(mocker):
"""Tests identify_media reads .tapehoard_id when available."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
mock_body = MagicMock()
mock_body.read.return_value = b" BUCKET_001 "
provider.s3.get_object = MagicMock(return_value={"Body": mock_body})
result = provider.identify_media()
assert result == "BUCKET_001"
def test_identify_media_fallback_to_bucket_name(mocker):
"""Tests identify_media falls back to bucket name when .tapehoard_id missing."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "fallback-bucket"})
provider.s3.get_object = MagicMock(side_effect=Exception("NoSuchKey"))
provider.s3.head_bucket = MagicMock(return_value=None)
result = provider.identify_media()
assert result == "fallback-bucket"
def test_identify_media_complete_failure(mocker):
"""Tests identify_media returns None when everything fails."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
provider.s3.get_object = MagicMock(side_effect=Exception("fail"))
provider.s3.head_bucket = MagicMock(side_effect=Exception("fail"))
assert provider.identify_media() is None
# ── Write Operations ──
def test_write_archive_plain(mocker):
"""Tests writing an unencrypted archive."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b", "obfuscate_filenames": False})
stream = io.BytesIO(b"archive content")
provider.s3.upload_fileobj = MagicMock(return_value=None)
location = provider.write_archive("M1", stream)
assert location.startswith("archives/archives/")
assert location.endswith(".tar")
provider.s3.upload_fileobj.assert_called_once()
def test_write_file_direct_plain(mocker):
"""Tests writing an unencrypted object directly."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b", "obfuscate_filenames": False})
stream = io.BytesIO(b"file content")
provider.s3.upload_fileobj = MagicMock(return_value=None)
location = provider.write_file_direct("M1", "photos/image.jpg", stream)
assert location == "objects/photos/image.jpg"
def test_initialize_media_clears_and_tags(mocker):
"""Tests initialize_media clears existing objects and writes .tapehoard_id."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
provider.s3.head_bucket = MagicMock(return_value=None)
mock_paginator = MagicMock()
mock_paginator.paginate = MagicMock(
return_value=[{"Contents": [{"Key": "old1"}, {"Key": "old2"}]}]
)
provider.s3.get_paginator = MagicMock(return_value=mock_paginator)
provider.s3.delete_objects = MagicMock(return_value=None)
provider.s3.put_object = MagicMock(return_value=None)
result = provider.initialize_media("NEW_DISK")
assert result is True
provider.s3.delete_objects.assert_called_once()
provider.s3.put_object.assert_called_once()
call_kwargs = provider.s3.put_object.call_args[1]
assert call_kwargs["Key"] == ".tapehoard_id"
assert call_kwargs["Body"] == b"NEW_DISK"
def test_initialize_media_failure(mocker):
"""Tests initialize_media returns False on error."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
provider.s3.head_bucket = MagicMock(side_effect=Exception("no access"))
assert provider.initialize_media("X") is False
def test_prepare_for_write_match(mocker):
"""Tests prepare_for_write when media identifier matches."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
provider.s3.head_bucket = MagicMock(return_value=None)
provider.s3.get_object = MagicMock(side_effect=Exception("not found"))
# Fallback to bucket name
assert provider.prepare_for_write("b") is True
assert provider.prepare_for_write("wrong") is False
# ── Read Operations ──
def test_read_archive_plain(mocker):
"""Tests reading an unencrypted archive."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
provider.s3.get_object = MagicMock(
return_value={
"Body": io.BytesIO(b"raw archive data"),
"Metadata": {},
}
)
result = provider.read_archive("M1", "archives/1.tar")
assert result.read() == b"raw archive data"
def test_read_archive_encrypted(mocker, db_session):
"""Tests round-trip encryption/decryption for archives."""
from app.db import models
from Crypto.Cipher import AES
from Crypto.Protocol.KDF import PBKDF2
from Crypto.Hash import SHA256
# Seed passphrase in keystore
db_session.add(
models.SystemSetting(
key="secrets",
value='{"my-encryption-key": "local-override", "empty-secret": ""}',
)
models.SystemSetting(key="secrets", value='{"cloud-enc": "my-passphrase-123"}')
)
db_session.commit()
# CASE 1: Secret name provided and exists in keystore
config_local = {
"bucket_name": "b",
"encryption_secret_name": "my-encryption-key",
}
provider_local = CloudStorageProvider(config_local)
assert provider_local.passphrase == "local-override"
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider(
{
"bucket_name": "b",
"encryption_secret_name": "cloud-enc",
}
)
# CASE 2: No secret name provided, passphrase is None
config_empty = {"bucket_name": "b"}
provider_fallback = CloudStorageProvider(config_empty)
assert provider_fallback.passphrase is None
# Encrypt data ourselves to simulate stored payload
original_data = b"secret archive content"
salt = os.urandom(16)
nonce = os.urandom(12)
key = PBKDF2(
"my-passphrase-123", salt, dkLen=32, count=100000, hmac_hash_module=SHA256
)
cipher = AES.new(key, AES.MODE_GCM, nonce=nonce)
ciphertext, tag = cipher.encrypt_and_digest(original_data)
payload = salt + nonce + tag + ciphertext
# CASE 3: Secret name provided but value is empty string
config_empty_secret = {
"bucket_name": "b",
"encryption_secret_name": "empty-secret",
}
provider_empty = CloudStorageProvider(config_empty_secret)
assert provider_empty.passphrase == ""
provider.s3.get_object = MagicMock(
return_value={
"Body": io.BytesIO(payload),
"Metadata": {"tapehoard-encrypted": "v2-gcm"},
}
)
# CASE 4: No passphrase anywhere (ValueError on key derivation)
provider_none = CloudStorageProvider({"bucket_name": "b"})
with pytest.raises(ValueError, match="No encryption passphrase configured"):
provider_none._derive_key(b"salt")
result = provider.read_archive("M1", "archives/enc.tar")
assert result.read() == original_data
def test_read_archive_encrypted_tampered(mocker, db_session):
"""Tests that tampered encrypted archive raises ValueError."""
from app.db import models
db_session.add(
models.SystemSetting(key="secrets", value='{"cloud-enc": "my-passphrase-123"}')
)
db_session.commit()
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider(
{
"bucket_name": "b",
"encryption_secret_name": "cloud-enc",
}
)
# Corrupt payload: valid structure but wrong ciphertext
fake_payload = os.urandom(16) + os.urandom(12) + os.urandom(16) + b"garbage"
provider.s3.get_object = MagicMock(
return_value={
"Body": io.BytesIO(fake_payload),
"Metadata": {"tapehoard-encrypted": "v2-gcm"},
}
)
with pytest.raises(ValueError, match="tampering detected"):
provider.read_archive("M1", "archives/bad.tar")
# ── Encryption Round-Trip ──
def test_write_and_read_archive_encrypted(mocker, db_session):
"""End-to-end test: write encrypted archive, read it back."""
from app.db import models
db_session.add(
models.SystemSetting(key="secrets", value='{"cloud-enc": "my-passphrase-123"}')
)
db_session.commit()
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider(
{
"bucket_name": "b",
"encryption_secret_name": "cloud-enc",
"obfuscate_filenames": False,
}
)
# Capture the uploaded payload
uploaded = {}
def capture_put_object(**kwargs):
uploaded["key"] = kwargs["Key"]
uploaded["body"] = kwargs["Body"]
uploaded["metadata"] = kwargs.get("Metadata", {})
provider.s3.put_object = MagicMock(side_effect=capture_put_object)
original = b"round-trip test data"
location = provider.write_archive("M1", io.BytesIO(original))
# Verify upload happened with encryption metadata
assert uploaded["metadata"].get("x-amz-meta-tapehoard-encrypted") == "v2-gcm"
assert uploaded["metadata"].get("x-amz-meta-tapehoard-type") == "archive"
# Now read it back
provider.s3.get_object = MagicMock(
return_value={
"Body": io.BytesIO(uploaded["body"]),
"Metadata": {"tapehoard-encrypted": "v2-gcm"},
}
)
result = provider.read_archive("M1", location)
assert result.read() == original
# ── Misc ──
def test_get_name(mocker):
"""Tests get_name returns provider type string."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b", "provider": "Wasabi"})
assert provider.get_name() == "Cloud (Wasabi)"
def test_finalize_media(mocker):
"""Tests finalize_media is a no-op that logs."""
mocker.patch("app.providers.cloud.boto3")
provider = CloudStorageProvider({"bucket_name": "b"})
# Should not raise
provider.finalize_media("M1")