Harden bytecode and extension stemming

This commit is contained in:
Zsolt Dollenstein
2026-05-03 13:51:20 +01:00
parent db418d2381
commit effc71cccc
2 changed files with 85 additions and 22 deletions
@@ -101,22 +101,65 @@ fn bytecode_module_stem<'a>(
) -> Option<(&'a str, &'a [String])> {
let stem = file_name.strip_suffix(".pyc")?;
if parents.last().is_some_and(|parent| parent == "__pycache__") {
Some((
stem.split('.').next().filter(|stem| !stem.is_empty())?,
&parents[..parents.len() - 1],
))
} else {
Some((stem, parents))
// A `.pyc` file in `__pycache__` does not make the module importable
// without the corresponding source file. Sourceless imports use the
// legacy `module.pyc` location instead.
return None;
}
Some((stem, parents))
}
fn extension_module_stem(file_name: &str) -> Option<&str> {
let stem = file_name
.strip_suffix(".so")
.or_else(|| file_name.strip_suffix(".pyd"))?;
// Extension modules include ABI and platform tags after the importable module name, e.g.
// `foo.cpython-312-darwin.so`. The first dot separates the module name from those tags.
stem.split('.').next().filter(|stem| !stem.is_empty())
if stem.is_empty() {
return None;
}
if let Some(module) = stem.strip_suffix(".abi3") {
return non_empty(module);
}
let Some((module, tag)) = stem.rsplit_once('.') else {
return Some(stem);
};
if is_extension_module_tag(tag) {
non_empty(module)
} else {
None
}
}
fn is_extension_module_tag(tag: &str) -> bool {
// Hardcoded forms from common `importlib.machinery.EXTENSION_SUFFIXES` values.
// These resemble wheel ABI tags, but they are import suffixes instead. For
// example, Windows debug builds use `_d.cp314t-win_amd64.pyd`, with the
// debug marker attached to the module stem rather than encoded in the tag as
// `cp314td-win_amd64`.
if tag.starts_with("cpython-") || tag.starts_with("pypy") || tag.starts_with("graalpy") {
return true;
}
let Some(rest) = tag.strip_prefix("cp") else {
return false;
};
let digit_count = rest
.chars()
.take_while(|char| char.is_ascii_digit())
.count();
if digit_count == 0 {
return false;
}
let rest = &rest[digit_count..];
let rest = rest.strip_prefix('t').unwrap_or(rest);
rest.is_empty() || rest.starts_with('-') || rest.starts_with('_')
}
fn non_empty(value: &str) -> Option<&str> {
(!value.is_empty()).then_some(value)
}
fn has_extension(path: impl AsRef<Path>, extension: &str) -> bool {
@@ -159,24 +202,50 @@ mod tests {
}
#[test]
fn record_module_from_bytecode() {
fn record_module_from_legacy_bytecode() {
let mut modules = BTreeSet::new();
add_record_module(
"package/__pycache__/module.cpython-312.opt-1.pyc",
&mut modules,
);
add_record_module("package/__pycache__/__init__.cpython-312.pyc", &mut modules);
add_record_module("package/module.pyc", &mut modules);
add_record_module("legacy.pyc", &mut modules);
assert_eq!(module_names(modules), "legacy\npackage\npackage.module");
}
#[test]
fn record_module_ignores_pycache_bytecode() {
let mut modules = BTreeSet::new();
add_record_module(
"package/__pycache__/module.cpython-312.opt-1.pyc",
&mut modules,
);
add_record_module("package/__pycache__/__init__.cpython-312.pyc", &mut modules);
assert_eq!(module_names(modules), "");
}
#[test]
fn record_module_from_extension_module() {
let mut modules = BTreeSet::new();
add_record_module("package/extension.cpython-312-darwin.so", &mut modules);
add_record_module(
"package/free_threaded.cpython-314td-darwin.so",
&mut modules,
);
add_record_module("package/limited.abi3.so", &mut modules);
add_record_module("package/windows.cp312-win_amd64.pyd", &mut modules);
add_record_module("package/__init__.cpython-312-darwin.so", &mut modules);
add_record_module("plain.so", &mut modules);
assert_eq!(module_names(modules), "package\npackage.extension");
assert_eq!(
module_names(modules),
"package\npackage.extension\npackage.free_threaded\npackage.limited\npackage.windows\nplain"
);
}
#[test]
fn record_module_ignores_unknown_extension_tags() {
let mut modules = BTreeSet::new();
add_record_module("package/extension.not-an-extension-tag.so", &mut modules);
assert_eq!(module_names(modules), "");
}
}
-6
View File
@@ -183,12 +183,6 @@ dependencies = [
"sets": []
},
"module_owners": {
"bytecode": [
"typing-extensions==0.1.0@path+[TEMP_DIR]/typing_extensions-0.1.0-py3-none-any.whl"
],
"bytecode.compiled": [
"typing-extensions==0.1.0@path+[TEMP_DIR]/typing_extensions-0.1.0-py3-none-any.whl"
],
"café": [
"typing-extensions==0.1.0@path+[TEMP_DIR]/typing_extensions-0.1.0-py3-none-any.whl"
],