refactor(dirname): implement pure string manipulation per POSIX (#8936)

This commit is contained in:
Cả thế giới là Rust
2026-01-17 23:36:28 +07:00
committed by GitHub
parent 571e13999a
commit b3ad96f8a0
3 changed files with 151 additions and 58 deletions
+1
View File
@@ -202,6 +202,7 @@ nofield
# * clippy
uninlined
nonminimal
rposition
# * CPU/hardware features
ASIMD
+85 -53
View File
@@ -4,8 +4,9 @@
// file that was distributed with this source code.
use clap::{Arg, ArgAction, Command};
use std::borrow::Cow;
use std::ffi::OsString;
use std::path::Path;
#[cfg(unix)]
use uucore::display::print_verbatim;
use uucore::error::{UResult, UUsageError};
use uucore::format_usage;
@@ -18,51 +19,84 @@ mod options {
pub const DIR: &str = "dir";
}
/// Handle the special case where a path ends with "/."
/// Perform dirname as pure string manipulation per POSIX/GNU behavior.
///
/// dirname should NOT normalize paths. It does simple string manipulation:
/// 1. Strip trailing slashes (unless path is all slashes)
/// 2. If ends with `/.` (possibly `//.` or `///.`), strip the `/+.` pattern
/// 3. Otherwise, remove everything after the last `/`
/// 4. If no `/` found, return `.`
/// 5. Strip trailing slashes from result (unless result would be empty)
///
/// Examples:
/// - `foo/.` → `foo`
/// - `foo/./bar` → `foo/.`
/// - `foo/bar` → `foo`
/// - `a/b/c` → `a/b`
///
/// This matches GNU/POSIX behavior where `dirname("/home/dos/.")` returns "/home/dos"
/// rather than "/home" (which would be the result of `Path::parent()` due to normalization).
/// Per POSIX.1-2017 dirname specification and GNU coreutils manual:
/// - POSIX: <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/dirname.html>
/// - GNU: <https://www.gnu.org/software/coreutils/manual/html_node/dirname-invocation.html>
///
/// dirname should do simple string manipulation without path normalization.
/// See issue #8910 and similar fix in basename (#8373, commit c5268a897).
///
/// Returns `Some(())` if the special case was handled (output already printed),
/// or `None` if normal `Path::parent()` logic should be used.
fn handle_trailing_dot(path_bytes: &[u8]) -> Option<()> {
if !path_bytes.ends_with(b"/.") {
return None;
fn dirname_string_manipulation(path_bytes: &[u8]) -> Cow<'_, [u8]> {
if path_bytes.is_empty() {
return Cow::Borrowed(b".");
}
// Strip the "/." suffix and print the result
if path_bytes.len() == 2 {
// Special case: "/." -> "/"
print!("/");
Some(())
} else {
// General case: "/home/dos/." -> "/home/dos"
let stripped = &path_bytes[..path_bytes.len() - 2];
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
let result = std::ffi::OsStr::from_bytes(stripped);
print_verbatim(result).unwrap();
Some(())
}
#[cfg(not(unix))]
{
// On non-Unix, fall back to lossy conversion
if let Ok(s) = std::str::from_utf8(stripped) {
print!("{s}");
Some(())
} else {
// Can't handle non-UTF-8 on non-Unix, fall through to normal logic
None
let mut bytes = path_bytes;
// Step 1: Strip trailing slashes (but not if the entire path is slashes)
let all_slashes = bytes.iter().all(|&b| b == b'/');
if all_slashes {
return Cow::Borrowed(b"/");
}
while bytes.len() > 1 && bytes.ends_with(b"/") {
bytes = &bytes[..bytes.len() - 1];
}
// Step 2: Check if it ends with `/.` and strip the `/+.` pattern
if bytes.ends_with(b".") && bytes.len() >= 2 {
let dot_pos = bytes.len() - 1;
if bytes[dot_pos - 1] == b'/' {
// Find where the slashes before the dot start
let mut slash_start = dot_pos - 1;
while slash_start > 0 && bytes[slash_start - 1] == b'/' {
slash_start -= 1;
}
// Return the stripped result
if slash_start == 0 {
// Result would be empty
return if path_bytes.starts_with(b"/") {
Cow::Borrowed(b"/")
} else {
Cow::Borrowed(b".")
};
}
return Cow::Owned(bytes[..slash_start].to_vec());
}
}
// Step 3: Normal dirname - find last / and remove everything after it
if let Some(last_slash_pos) = bytes.iter().rposition(|&b| b == b'/') {
// Found a slash, remove everything after it
let mut result = &bytes[..last_slash_pos];
// Strip trailing slashes from result (but keep at least one if at the start)
while result.len() > 1 && result.ends_with(b"/") {
result = &result[..result.len() - 1];
}
if result.is_empty() {
return Cow::Borrowed(b"/");
}
return Cow::Owned(result.to_vec());
}
// No slash found, return "."
Cow::Borrowed(b".")
}
#[uucore::main]
@@ -83,27 +117,25 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
for path in &dirnames {
let path_bytes = uucore::os_str_as_bytes(path.as_os_str()).unwrap_or(&[]);
let result = dirname_string_manipulation(path_bytes);
if handle_trailing_dot(path_bytes).is_none() {
// Normal path handling using Path::parent()
let p = Path::new(path);
match p.parent() {
Some(d) => {
if d.components().next().is_none() {
print!(".");
} else {
print_verbatim(d).unwrap();
}
}
None => {
if p.is_absolute() || path.as_os_str() == "/" {
print!("/");
} else {
print!(".");
}
}
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
let result_os = std::ffi::OsStr::from_bytes(&result);
print_verbatim(result_os).unwrap();
}
#[cfg(not(unix))]
{
// On non-Unix, fall back to lossy conversion
if let Ok(s) = std::str::from_utf8(&result) {
print!("{s}");
} else {
// Fallback for non-UTF-8 paths on non-Unix systems
print!(".");
}
}
print!("{line_ending}");
}
+65 -5
View File
@@ -9,6 +9,11 @@ fn test_invalid_arg() {
new_ucmd!().arg("--definitely-invalid").fails_with_code(1);
}
#[test]
fn test_missing_operand() {
new_ucmd!().fails_with_code(1);
}
#[test]
fn test_path_with_trailing_slashes() {
new_ucmd!()
@@ -71,15 +76,11 @@ fn test_dirname_non_utf8_paths() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
// Create a test file with non-UTF-8 bytes in the name
let non_utf8_bytes = b"test_\xFF\xFE/file.txt";
let non_utf8_name = OsStr::from_bytes(non_utf8_bytes);
// Test that dirname handles non-UTF-8 paths without crashing
let result = new_ucmd!().arg(non_utf8_name).succeeds();
// Just verify it didn't crash and produced some output
// The exact output format may vary due to lossy conversion
let output = result.stdout_str_lossy();
assert!(!output.is_empty());
assert!(output.contains("test_"));
@@ -156,7 +157,7 @@ fn test_trailing_dot_edge_cases() {
new_ucmd!()
.arg("/home/dos//.")
.succeeds()
.stdout_is("/home/dos/\n");
.stdout_is("/home/dos\n");
// Path with . in middle (should use normal logic)
new_ucmd!()
@@ -216,3 +217,62 @@ fn test_existing_behavior_preserved() {
.succeeds()
.stdout_is("/home/dos\n");
}
#[test]
fn test_multiple_paths_comprehensive() {
// Comprehensive test for multiple paths in single invocation
new_ucmd!()
.args(&[
"/home/dos/.",
"/var/log",
".",
"/tmp/.",
"",
"/",
"relative/path",
])
.succeeds()
.stdout_is("/home/dos\n/var\n.\n/tmp\n.\n/\nrelative\n");
}
#[test]
fn test_all_dot_slash_variations() {
// Tests for all the cases mentioned in issue #8910 comment
// https://github.com/uutils/coreutils/issues/8910#issuecomment-3408735720
new_ucmd!().arg("foo//.").succeeds().stdout_is("foo\n");
new_ucmd!().arg("foo///.").succeeds().stdout_is("foo\n");
new_ucmd!().arg("foo/./").succeeds().stdout_is("foo\n");
new_ucmd!()
.arg("foo/bar/./")
.succeeds()
.stdout_is("foo/bar\n");
new_ucmd!().arg("foo/./bar").succeeds().stdout_is("foo/.\n");
}
#[test]
fn test_dot_slash_component_preservation() {
// Ensure that /. components in the middle are preserved
// These should NOT be normalized away
new_ucmd!().arg("a/./b").succeeds().stdout_is("a/.\n");
new_ucmd!()
.arg("a/./b/./c")
.succeeds()
.stdout_is("a/./b/.\n");
new_ucmd!()
.arg("foo/./bar/baz")
.succeeds()
.stdout_is("foo/./bar\n");
new_ucmd!()
.arg("/path/./to/file")
.succeeds()
.stdout_is("/path/./to\n");
}