more tape drive polling
Continuous Integration / frontend-check (push) Failing after 5m8s
Continuous Integration / backend-tests (push) Successful in 10m10s

This commit is contained in:
2026-04-27 19:27:17 -04:00
parent 3a242527d2
commit 46fa2555bb
3 changed files with 161 additions and 148 deletions
+88 -136
View File
@@ -6,27 +6,14 @@ from loguru import logger
class LTOProvider(AbstractStorageProvider): class LTOProvider(AbstractStorageProvider):
# Class-level caches to ensure UI stability during busy periods
# device_path -> { info }
_drive_cache: dict = {}
# device_path -> { info }
_mam_cache: dict = {}
# barcode/serial -> label_id
_id_cache: dict = {}
def __init__( def __init__(
self, device_path: str = "/dev/nst0", encryption_key: Optional[str] = None self, device_path: str = "/dev/nst0", encryption_key: Optional[str] = None
): ):
self.device_path = device_path self.device_path = device_path
self.encryption_key = encryption_key self.encryption_key = encryption_key
self.drive_busy = False
def get_drive_info(self) -> dict: def get_drive_info(self) -> dict:
"""Retrieves vendor, model, and firmware version of the tape drive using sg_inq.""" """Retrieves vendor, model, and firmware version of the tape drive using sg_inq."""
# Drive hardware info never changes, so cache it forever per device path
if self.device_path in self._drive_cache:
return self._drive_cache[self.device_path]
try: try:
# Use sg_inq for reliable SCSI inquiry # Use sg_inq for reliable SCSI inquiry
result = subprocess.run( result = subprocess.run(
@@ -44,8 +31,6 @@ class LTOProvider(AbstractStorageProvider):
elif "Product revision level:" in line: elif "Product revision level:" in line:
info["firmware"] = line.split(":", 1)[1].strip() info["firmware"] = line.split(":", 1)[1].strip()
if info:
self._drive_cache[self.device_path] = info
return info return info
except Exception as e: except Exception as e:
logger.debug(f"Failed to get drive info for {self.device_path}: {e}") logger.debug(f"Failed to get drive info for {self.device_path}: {e}")
@@ -64,12 +49,11 @@ class LTOProvider(AbstractStorageProvider):
) )
if result.returncode != 0 or not result.stdout: if result.returncode != 0 or not result.stdout:
# If busy, return the last known MAM for this drive return {}
return self._mam_cache.get(self.device_path, {})
data = result.stdout data = result.stdout
if len(data) < 4: if len(data) < 4:
return self._mam_cache.get(self.device_path, {}) return {}
# SCSI READ ATTRIBUTE parameter data starts with a 4-byte length field (Big Endian) # SCSI READ ATTRIBUTE parameter data starts with a 4-byte length field (Big Endian)
available_len = struct.unpack(">I", data[:4])[0] available_len = struct.unpack(">I", data[:4])[0]
@@ -77,17 +61,25 @@ class LTOProvider(AbstractStorageProvider):
end = min(pos + available_len, len(data)) end = min(pos + available_len, len(data))
mam = {} mam = {}
# Standard MAM Attribute IDs (SPC-3 / SSC-2 / LTO Specs)
attr_map = { attr_map = {
0x0000: "barcode", 0x0000: "remaining_capacity_mib",
0x0002: "load_count", 0x0001: "max_capacity_mib",
0x0002: "tape_alert_flags",
0x0003: "load_count",
0x0220: "lifetime_mib_written",
0x0221: "lifetime_mib_read",
0x0222: "session_mib_written",
0x0223: "session_mib_read",
0x0400: "manufacturer", 0x0400: "manufacturer",
0x0401: "serial", 0x0401: "serial",
0x0800: "density", 0x0405: "density",
0x0805: "label", 0x0406: "manufacture_date",
0x0806: "manufacture_date", 0x0806: "barcode",
} }
while pos + 5 <= end: while pos + 5 <= end:
# Each attribute header: ID (2), Flags (1), Length (2)
attr_id, flags, attr_len = struct.unpack(">HBH", data[pos : pos + 5]) attr_id, flags, attr_len = struct.unpack(">HBH", data[pos : pos + 5])
pos += 5 pos += 5
if pos + attr_len > end: if pos + attr_len > end:
@@ -97,12 +89,23 @@ class LTOProvider(AbstractStorageProvider):
if attr_id in attr_map: if attr_id in attr_map:
key = attr_map[attr_id] key = attr_map[attr_id]
if attr_id == 0x0800: # Binary integers (1, 2, 4, or 8 bytes)
mam[key] = hex(val_bytes[0]) if val_bytes else "0x00" if attr_id in [
elif attr_id == 0x0002: 0x0000,
0x0001,
0x0002,
0x0003,
0x0220,
0x0221,
0x0222,
0x0223,
]:
mam[key] = int.from_bytes(val_bytes, "big") mam[key] = int.from_bytes(val_bytes, "big")
elif attr_id == 0x0405: # Density is a single byte
mam[key] = hex(val_bytes[0]) if val_bytes else "0x00"
else: else:
try: try:
# Most attributes are ASCII strings
val = ( val = (
val_bytes.decode("ascii", errors="ignore") val_bytes.decode("ascii", errors="ignore")
.split("\x00")[0] .split("\x00")[0]
@@ -113,39 +116,60 @@ class LTOProvider(AbstractStorageProvider):
except Exception: except Exception:
continue continue
if "density" in mam: # 1. Decode TapeAlerts (Common flags)
gen_map = { if mam.get("tape_alert_flags"):
"0x40": "LTO-1", alerts = []
"0x42": "LTO-2", flags = mam["tape_alert_flags"]
"0x44": "LTO-3", # Bit indices for common LTO alerts
"0x46": "LTO-4", alert_map = {
"0x48": "LTO-5", 3: "Hard Error",
"0x58": "LTO-6", 4: "Media Error",
"0x5a": "LTO-7", 5: "Read Failure",
"0x5c": "LTO-8", 6: "Write Failure",
"0x60": "LTO-9", 12: "Media Broken",
20: "Clean Now",
21: "Clean Periodic",
30: "Hardware Failure",
31: "Interface Failure",
} }
val = mam["density"].lower() for bit, msg in alert_map.items():
mam["generation_label"] = gen_map.get(val, f"Density {val}") if (flags >> (64 - bit)) & 1:
alerts.append(msg)
mam["alerts"] = alerts
# Update cache with new successful read # 2. Derive LTO generation from Capacity (the most reliable indicator)
if mam: if "max_capacity_mib" in mam:
self._mam_cache[self.device_path] = mam cap = mam["max_capacity_mib"]
return mam if cap < 150000:
mam["generation_label"] = "LTO-1"
elif cap < 300000:
mam["generation_label"] = "LTO-2"
elif cap < 600000:
mam["generation_label"] = "LTO-3"
elif cap < 1200000:
mam["generation_label"] = "LTO-4"
elif cap < 2000000:
mam["generation_label"] = "LTO-5"
elif cap < 4000000:
mam["generation_label"] = "LTO-6"
elif cap < 10000000:
mam["generation_label"] = "LTO-7"
elif cap < 15000000:
mam["generation_label"] = "LTO-8"
else:
mam["generation_label"] = "LTO-9"
return {k: v for k, v in mam.items() if v}
except Exception as e: except Exception as e:
if "Device or resource busy" in str(e): logger.debug(f"Failed to read/parse MAM for {self.device_path}: {e}")
self.drive_busy = True return {}
return self._mam_cache.get(self.device_path, {})
def get_name(self) -> str: def get_name(self) -> str:
return "LTO Tape" return "LTO Tape"
def check_online(self) -> bool: def check_online(self) -> bool:
"""Checks if the tape drive is present and READY (or BUSY which implies online)""" """Checks if the tape drive is present and READY (or BUSY)"""
if not os.path.exists(self.device_path): if not os.path.exists(self.device_path):
# If device node disappeared, clear caches for this path
self._drive_cache.pop(self.device_path, None)
self._mam_cache.pop(self.device_path, None)
return False return False
try: try:
result = subprocess.run( result = subprocess.run(
@@ -154,9 +178,8 @@ class LTOProvider(AbstractStorageProvider):
text=True, text=True,
timeout=5, timeout=5,
) )
# "Device or resource busy" is a SUCCESS for "is it online" # "Device or resource busy" is a success for "is it online"
if result.returncode != 0 and "Device or resource busy" in result.stderr: if result.returncode != 0 and "Device or resource busy" in result.stderr:
self.drive_busy = True
return True return True
is_ready = ( is_ready = (
@@ -177,8 +200,6 @@ class LTOProvider(AbstractStorageProvider):
text=True, text=True,
timeout=5, timeout=5,
) )
# Common indicators of write protection in mt status
# WR_PROT is common on Linux, 'read-only' on others
output = result.stdout.upper() output = result.stdout.upper()
return ( return (
"WR_PROT" in output "WR_PROT" in output
@@ -196,14 +217,12 @@ class LTOProvider(AbstractStorageProvider):
self._run_mt("rewind") self._run_mt("rewind")
# Skip the label file (file 0) # Skip the label file (file 0)
self._run_mt("fsf 1") self._run_mt("fsf 1")
# If we are not at EOT, there is probably data
result = subprocess.run( result = subprocess.run(
["mt", "-f", self.device_path, "status"], ["mt", "-f", self.device_path, "status"],
capture_output=True, capture_output=True,
text=True, text=True,
timeout=5, timeout=5,
) )
# If file number > 0, it means we successfully skipped at least one file
import re import re
match = re.search(r"File number=(\d+)", result.stdout) match = re.search(r"File number=(\d+)", result.stdout)
@@ -211,32 +230,18 @@ class LTOProvider(AbstractStorageProvider):
return True return True
return False return False
except Exception: except Exception:
# If we fail to fsf 1, it usually means we hit EOD/EOT right after file 0
return False return False
def _run_mt(self, command: str): def _run_mt(self, command: str):
try: try:
result = subprocess.run( subprocess.run(["mt", "-f", self.device_path, command], check=True)
["mt", "-f", self.device_path, command], capture_output=True, text=True
)
if result.returncode != 0:
if "Device or resource busy" in result.stderr:
self.drive_busy = True
raise subprocess.CalledProcessError(
result.returncode,
["mt", "-f", self.device_path, command],
output=result.stdout,
stderr=result.stderr,
)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
if not self.drive_busy: logger.error(f"Tape command 'mt {command}' failed: {e}")
logger.error(f"Tape command 'mt {command}' failed: {e}")
raise raise
def _setup_encryption(self): def _setup_encryption(self):
"""Configures hardware encryption on the drive using stenc""" """Configures hardware encryption on the drive using stenc"""
if not self.encryption_key: if not self.encryption_key:
# Explicitly disable encryption if no key provided
try: try:
subprocess.run( subprocess.run(
["stenc", "-f", self.device_path, "--off"], capture_output=True ["stenc", "-f", self.device_path, "--off"], capture_output=True
@@ -247,8 +252,6 @@ class LTOProvider(AbstractStorageProvider):
try: try:
logger.info(f"Setting LTO hardware encryption key for {self.device_path}") logger.info(f"Setting LTO hardware encryption key for {self.device_path}")
# stenc expects a 32-byte hex key (256-bit)
# We use a pipe to avoid leaving the key in the process list
proc = subprocess.Popen( proc = subprocess.Popen(
["stenc", "-f", self.device_path, "--import", "-k", "-"], ["stenc", "-f", self.device_path, "--import", "-k", "-"],
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
@@ -262,85 +265,41 @@ class LTOProvider(AbstractStorageProvider):
raise RuntimeError(f"LTO Encryption Setup Failed: {stderr}") raise RuntimeError(f"LTO Encryption Setup Failed: {stderr}")
# Verify encryption is on # Verify encryption is on
subprocess.run( subprocess.run(["stenc", "-f", self.device_path, "--on"], check=True)
["stenc", "-f", self.device_path, "--on"],
check=True,
capture_output=True,
)
logger.info("LTO Hardware Encryption ENABLED and LOCKED") logger.info("LTO Hardware Encryption ENABLED and LOCKED")
except Exception as e: except Exception as e:
if "Device or resource busy" in str(e):
self.drive_busy = True
logger.error(f"Hardware encryption error: {e}") logger.error(f"Hardware encryption error: {e}")
raise raise
def identify_media(self, allow_intrusive=True) -> Optional[str]: def identify_media(self) -> Optional[str]:
""" """
Identifies the tape, using MAM barcode/serial and load_count as cache keys. Identifies the tape, using MAM Barcode (0x0806) as primary identity
Set allow_intrusive=False to skip physical tape reads (rewind/tar) if not cached. to avoid disruptive head movement (rewind).
""" """
if not self.check_online(): if not self.check_online():
return None return None
# 1. Try to get MAM info first (FAST and NON-INTRUSIVE) # 1. Try non-intrusive MAM barcode first
mam = self.get_mam_info() mam = self.get_mam_info()
barcode = mam.get("barcode") barcode = mam.get("barcode")
serial = mam.get("serial") if barcode:
load_count = mam.get("load_count") return barcode
# Unique key for this specific tape session
# (device_path, load_count) is globally unique for a "loaded" tape
session_key = None
if load_count is not None:
session_key = f"{self.device_path}:{load_count}"
# 2. Check caches
# First check barcode/serial cache
if barcode and barcode in self._id_cache:
return self._id_cache[barcode]
if serial and serial in self._id_cache:
return self._id_cache[serial]
# Then check session cache (handles tapes with no barcode/serial)
if session_key and session_key in self._id_cache:
return self._id_cache[session_key]
# 3. If not in cache and intrusive allowed, we MUST read the physical label
if not allow_intrusive:
return None
# 2. Fallback to physical tape label read (intrusive!)
try: try:
# We must set up encryption BEFORE trying to read the label if it's an encrypted tape
self._setup_encryption() self._setup_encryption()
self._run_mt("rewind") self._run_mt("rewind")
# Try to read the label file
result = subprocess.run( result = subprocess.run(
["tar", "-xf", self.device_path, "-O", ".tapehoard_label"], ["tar", "-xf", self.device_path, "-O", ".tapehoard_label"],
capture_output=True, capture_output=True,
text=True, text=True,
timeout=15, # Shorter timeout for status polls timeout=20,
) )
if result.returncode == 0: if result.returncode == 0:
label_id = result.stdout.strip() return result.stdout.strip()
# Store in all relevant caches
if barcode:
self._id_cache[barcode] = label_id
if serial:
self._id_cache[serial] = label_id
if session_key:
self._id_cache[session_key] = label_id
return label_id
except Exception as e: except Exception as e:
if "Device or resource busy" in str(e): logger.debug(f"Physical identification failed for {self.device_path}: {e}")
self.drive_busy = True
# Only log if it's a real failure, not just a busy drive
if not self.drive_busy:
logger.debug(
f"Identification skipped or failed for {self.device_path}: {e}"
)
return None return None
@@ -421,15 +380,12 @@ class LTOProvider(AbstractStorageProvider):
text=True, text=True,
check=True, check=True,
) )
# mt status output varies by OS/Driver, but usually contains 'File number=X'
# We look for a line like 'File number=2, block number=0'
import re import re
match = re.search(r"File number=(\d+)", result.stdout) match = re.search(r"File number=(\d+)", result.stdout)
if match: if match:
return match.group(1) return match.group(1)
# Alternative format
match = re.search(r"file number (\d+)", result.stdout) match = re.search(r"file number (\d+)", result.stdout)
if match: if match:
return match.group(1) return match.group(1)
@@ -463,10 +419,6 @@ class LTOProvider(AbstractStorageProvider):
proc.stdin.close() proc.stdin.close()
proc.wait() proc.wait()
# After writing, we should be at the NEXT file mark.
# But tar/dd usually leaves us at the end of the written data.
# We'll return the position we started at as the 'location_id'
return file_num return file_num
def finalize_media(self, media_id: str): def finalize_media(self, media_id: str):
+2 -2
View File
@@ -1,5 +1,5 @@
# Stage 1: Build Frontend # Stage 1: Build Frontend
FROM node:22-slim AS frontend-builder FROM node:24-slim AS frontend-builder
WORKDIR /app/frontend WORKDIR /app/frontend
COPY frontend/package*.json ./ COPY frontend/package*.json ./
RUN npm install RUN npm install
@@ -7,7 +7,7 @@ COPY frontend/ ./
RUN npm run build RUN npm run build
# Stage 2: Backend & Runtime # Stage 2: Backend & Runtime
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim AS runtime FROM ghcr.io/astral-sh/uv:python3.14-trixie-slim AS runtime
WORKDIR /app/backend WORKDIR /app/backend
# Install system dependencies and build stenc from source # Install system dependencies and build stenc from source
+71 -10
View File
@@ -562,7 +562,7 @@
</div> </div>
</div> </div>
<div class="p-8 flex flex-col md:flex-row gap-12"> <div class="p-8 flex flex-col lg:flex-row gap-12">
<!-- Drive Info --> <!-- Drive Info -->
<div class="flex-1 space-y-6"> <div class="flex-1 space-y-6">
<div> <div>
@@ -580,22 +580,61 @@
</div> </div>
</div> </div>
<div class="grid grid-cols-2 gap-8 pt-6 border-t border-border-color/30"> <!-- Live Performance / Health Dashboard -->
<div class="grid grid-cols-2 gap-4 pt-6 border-t border-border-color/30">
<div class="bg-bg-primary/50 p-4 rounded-xl border border-border-color/50">
<span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-2">Session Performance</span>
<div class="space-y-3">
<div class="flex justify-between items-center text-[10px]">
<span class="text-text-secondary font-bold uppercase tracking-tighter flex items-center gap-1.5"><ArrowUp size={10} class="text-blue-400" /> WRITTEN</span>
<span class="text-text-primary font-black mono">{(info.tape?.session_mib_written || 0).toLocaleString()} MiB</span>
</div>
<div class="flex justify-between items-center text-[10px]">
<span class="text-text-secondary font-bold uppercase tracking-tighter flex items-center gap-1.5"><ArrowDown size={10} class="text-success-color" /> READ</span>
<span class="text-text-primary font-black mono">{(info.tape?.session_mib_read || 0).toLocaleString()} MiB</span>
</div>
</div>
</div>
<div class="bg-bg-primary/50 p-4 rounded-xl border border-border-color/50">
<span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-2">Hardware Health</span>
{#if info.tape?.alerts && info.tape.alerts.length > 0}
<div class="space-y-1">
{#each info.tape.alerts as alert}
<div class="flex items-center gap-2 text-[9px] font-black text-orange-400 uppercase tracking-tighter">
<ShieldAlert size={10} /> {alert}
</div>
{/each}
</div>
{:else}
<div class="flex items-center gap-2 text-[10px] font-black text-success-color uppercase tracking-tighter">
<ShieldCheck size={14} /> System Healthy
</div>
<span class="text-[8px] text-text-secondary opacity-40 uppercase font-bold block mt-1">No active TapeAlerts</span>
{/if}
</div>
</div>
<div class="grid grid-cols-2 gap-8 pt-4">
<div> <div>
<span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-1">Assigned ID</span> <span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-1">Assigned ID</span>
<span class="text-lg font-black text-text-primary mono tracking-tighter">{media.identifier}</span> <span class="text-lg font-black text-text-primary mono tracking-tighter">{media.identifier}</span>
</div> </div>
<div> <div>
<span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-1">Reported MAM Barcode</span> <span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-1">Load Count</span>
<span class="text-lg font-black text-text-primary mono tracking-tighter">{info.tape?.barcode || 'NO BARCODE'}</span> <span class="text-lg font-black text-text-primary mono tracking-tighter flex items-center gap-2">
<RotateCw size={14} class="text-blue-500 opacity-50" />
{info.tape?.load_count || '0'}
</span>
</div> </div>
</div> </div>
</div> </div>
<!-- Media/MAM Info --> <!-- Media/MAM Info -->
<div class="flex-1 bg-bg-primary/30 rounded-2xl p-6 border border-border-color/50 relative"> <div class="flex-1 bg-bg-primary/30 rounded-2xl p-6 border border-border-color/50 relative">
<div class="text-[9px] font-black uppercase tracking-[0.2em] text-text-secondary opacity-50 mb-6 flex items-center gap-2"> <div class="text-[9px] font-black uppercase tracking-[0.2em] text-text-secondary opacity-50 mb-6 flex items-center justify-between">
<Database size={12} /> Medium Auxiliary Memory (MAM) <div class="flex items-center gap-2"><Database size={12} /> Medium Metadata (MAM)</div>
<span class="text-blue-400 font-black tracking-widest font-mono">{info.tape?.barcode || 'NO BARCODE'}</span>
</div> </div>
<div class="grid grid-cols-2 gap-y-6 gap-x-12"> <div class="grid grid-cols-2 gap-y-6 gap-x-12">
@@ -617,10 +656,32 @@
<span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-1">Manufacture Date</span> <span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-1">Manufacture Date</span>
<span class="text-xs font-bold text-text-primary mono">{info.tape?.manufacture_date || 'N/A'}</span> <span class="text-xs font-bold text-text-primary mono">{info.tape?.manufacture_date || 'N/A'}</span>
</div> </div>
<div class="col-span-2 pt-2">
<span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-1">Medium Application Label</span> <div class="col-span-2 space-y-4 pt-2">
<div class="bg-bg-secondary p-3 rounded-lg border border-border-color font-mono text-xs text-text-primary italic shadow-inner"> <!-- Capacity Utilization -->
"{info.tape?.label || 'UNLABELED'}" {#if info.tape?.remaining_capacity_mib !== undefined && info.tape?.max_capacity_mib}
{@const used_mib = info.tape.max_capacity_mib - info.tape.remaining_capacity_mib}
{@const perc = Math.min(100, Math.round((used_mib / info.tape.max_capacity_mib) * 100))}
<div>
<div class="flex justify-between items-end mb-2">
<span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40">Physical Capacity Utilization</span>
<span class="text-[10px] font-black text-blue-400 mono">{perc}%</span>
</div>
<div class="h-2 bg-bg-primary rounded-full overflow-hidden border border-border-color/30 flex">
<div class="h-full bg-blue-500 shadow-[0_0_8px_rgba(59,130,246,0.5)] transition-all duration-1000" style="width: {perc}%"></div>
</div>
<div class="flex justify-between mt-1 text-[8px] font-bold text-text-secondary/50 mono uppercase">
<span>Used: {(used_mib / 1024).toFixed(1)} GiB</span>
<span>Free: {(info.tape.remaining_capacity_mib / 1024).toFixed(1)} GiB</span>
</div>
</div>
{/if}
<div>
<span class="text-[8px] font-black uppercase tracking-widest text-text-secondary opacity-40 block mb-1">Medium Application Label</span>
<div class="bg-bg-secondary p-3 rounded-lg border border-border-color font-mono text-xs text-text-primary italic shadow-inner">
"{info.tape?.label || 'UNLABELED'}"
</div>
</div> </div>
</div> </div>
</div> </div>