better scsi ready state checking
Continuous Integration / backend-tests (push) Successful in 36s
Continuous Integration / frontend-check (push) Successful in 16s
Continuous Integration / e2e-tests (push) Successful in 5m51s

This commit is contained in:
2026-05-05 20:51:41 -04:00
parent d398664e51
commit f40a76aa14
+47 -21
View File
@@ -236,19 +236,28 @@ class LTOProvider(AbstractStorageProvider):
LTOProvider._lkg_state[self.device_path]["last_check"] = time.time() LTOProvider._lkg_state[self.device_path]["last_check"] = time.time()
return mam return mam
# If we get "Device or resource busy", wait a bit and retry # Log failure so we can diagnose why sg_read_attr isn't working
stderr_text = ( stderr_text = (
(result.stderr or b"").decode().lower() (result.stderr or b"").decode()
if isinstance(result.stderr, bytes) if isinstance(result.stderr, bytes)
else (result.stderr or "").lower() else (result.stderr or "")
) )
if result.returncode != 0 and "busy" in stderr_text: if result.returncode != 0:
time.sleep(0.2) logger.warning(
continue f"sg_read_attr returned code {result.returncode} for {self.device_path} (attempt {attempt + 1}/3): {stderr_text[:200]}"
)
if "busy" in stderr_text.lower():
time.sleep(0.2)
continue
except FileNotFoundError:
logger.error(
f"'sg_read_attr' binary not found in PATH. Cannot read MAM for {self.device_path}."
)
break
except Exception as e: except Exception as e:
logger.debug( logger.warning(
f"MAM read attempt {attempt} failed for {self.device_path}: {e}" f"MAM read attempt {attempt + 1}/3 failed for {self.device_path}: {e}"
) )
time.sleep(0.1) time.sleep(0.1)
@@ -301,6 +310,9 @@ class LTOProvider(AbstractStorageProvider):
): ):
return LTOProvider._lkg_state[self.device_path]["online"] return LTOProvider._lkg_state[self.device_path]["online"]
is_online = False
# 1. Try mt status
try: try:
cmd = ["mt", "-f", self.device_path, "status"] cmd = ["mt", "-f", self.device_path, "status"]
self._log_command(cmd) self._log_command(cmd)
@@ -314,22 +326,36 @@ class LTOProvider(AbstractStorageProvider):
"Device or resource busy" in stderr "Device or resource busy" in stderr
or "Device or resource busy" in stdout or "Device or resource busy" in stdout
): ):
LTOProvider._lkg_state[self.device_path]["online"] = True is_online = True
return True else:
is_online = (
"ONLINE" in stdout or "READY" in stdout or result.returncode == 0
)
except FileNotFoundError:
logger.debug(f"'mt' binary not found for {self.device_path}")
except Exception as e:
logger.debug(f"mt status failed for {self.device_path}: {e}")
is_online = ( # 2. Fallback: try sg_turs (SCSI Test Unit Ready)
"ONLINE" in stdout or "READY" in stdout or result.returncode == 0 if not is_online:
) try:
cmd = ["sg_turs", self.device_path]
self._log_command(cmd)
result = subprocess.run(cmd, capture_output=True, timeout=5)
if result.returncode == 0:
is_online = True
except FileNotFoundError:
logger.debug(f"'sg_turs' binary not found for {self.device_path}")
except Exception as e:
logger.debug(f"sg_turs failed for {self.device_path}: {e}")
# If we transitioned from online -> offline, clear the LKG MAM (tape was likely ejected) # 3. If we transitioned from online -> offline, clear the LKG MAM (tape was likely ejected)
if LTOProvider._lkg_state[self.device_path]["online"] and not is_online: if LTOProvider._lkg_state[self.device_path]["online"] and not is_online:
LTOProvider._lkg_state[self.device_path]["mam"] = {} LTOProvider._lkg_state[self.device_path]["mam"] = {}
LTOProvider._lkg_state[self.device_path]["online"] = is_online LTOProvider._lkg_state[self.device_path]["online"] = is_online
LTOProvider._lkg_state[self.device_path]["last_check"] = now LTOProvider._lkg_state[self.device_path]["last_check"] = now
return is_online return is_online
except Exception:
return LTOProvider._lkg_state[self.device_path]["online"]
def is_write_protected(self) -> bool: def is_write_protected(self) -> bool:
"""Checks if the tape is write-protected (read-only)""" """Checks if the tape is write-protected (read-only)"""