mirror of
https://github.com/uutils/coreutils.git
synced 2026-05-06 15:49:31 -04:00
refactor(dirname): implement pure string manipulation per POSIX (#8936)
This commit is contained in:
committed by
GitHub
parent
571e13999a
commit
b3ad96f8a0
@@ -202,6 +202,7 @@ nofield
|
||||
# * clippy
|
||||
uninlined
|
||||
nonminimal
|
||||
rposition
|
||||
|
||||
# * CPU/hardware features
|
||||
ASIMD
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsString;
|
||||
use std::path::Path;
|
||||
#[cfg(unix)]
|
||||
use uucore::display::print_verbatim;
|
||||
use uucore::error::{UResult, UUsageError};
|
||||
use uucore::format_usage;
|
||||
@@ -18,51 +19,84 @@ mod options {
|
||||
pub const DIR: &str = "dir";
|
||||
}
|
||||
|
||||
/// Handle the special case where a path ends with "/."
|
||||
/// Perform dirname as pure string manipulation per POSIX/GNU behavior.
|
||||
///
|
||||
/// dirname should NOT normalize paths. It does simple string manipulation:
|
||||
/// 1. Strip trailing slashes (unless path is all slashes)
|
||||
/// 2. If ends with `/.` (possibly `//.` or `///.`), strip the `/+.` pattern
|
||||
/// 3. Otherwise, remove everything after the last `/`
|
||||
/// 4. If no `/` found, return `.`
|
||||
/// 5. Strip trailing slashes from result (unless result would be empty)
|
||||
///
|
||||
/// Examples:
|
||||
/// - `foo/.` → `foo`
|
||||
/// - `foo/./bar` → `foo/.`
|
||||
/// - `foo/bar` → `foo`
|
||||
/// - `a/b/c` → `a/b`
|
||||
///
|
||||
/// This matches GNU/POSIX behavior where `dirname("/home/dos/.")` returns "/home/dos"
|
||||
/// rather than "/home" (which would be the result of `Path::parent()` due to normalization).
|
||||
/// Per POSIX.1-2017 dirname specification and GNU coreutils manual:
|
||||
/// - POSIX: <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/dirname.html>
|
||||
/// - GNU: <https://www.gnu.org/software/coreutils/manual/html_node/dirname-invocation.html>
|
||||
///
|
||||
/// dirname should do simple string manipulation without path normalization.
|
||||
/// See issue #8910 and similar fix in basename (#8373, commit c5268a897).
|
||||
///
|
||||
/// Returns `Some(())` if the special case was handled (output already printed),
|
||||
/// or `None` if normal `Path::parent()` logic should be used.
|
||||
fn handle_trailing_dot(path_bytes: &[u8]) -> Option<()> {
|
||||
if !path_bytes.ends_with(b"/.") {
|
||||
return None;
|
||||
fn dirname_string_manipulation(path_bytes: &[u8]) -> Cow<'_, [u8]> {
|
||||
if path_bytes.is_empty() {
|
||||
return Cow::Borrowed(b".");
|
||||
}
|
||||
|
||||
// Strip the "/." suffix and print the result
|
||||
if path_bytes.len() == 2 {
|
||||
// Special case: "/." -> "/"
|
||||
print!("/");
|
||||
Some(())
|
||||
} else {
|
||||
// General case: "/home/dos/." -> "/home/dos"
|
||||
let stripped = &path_bytes[..path_bytes.len() - 2];
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
let result = std::ffi::OsStr::from_bytes(stripped);
|
||||
print_verbatim(result).unwrap();
|
||||
Some(())
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
// On non-Unix, fall back to lossy conversion
|
||||
if let Ok(s) = std::str::from_utf8(stripped) {
|
||||
print!("{s}");
|
||||
Some(())
|
||||
} else {
|
||||
// Can't handle non-UTF-8 on non-Unix, fall through to normal logic
|
||||
None
|
||||
let mut bytes = path_bytes;
|
||||
|
||||
// Step 1: Strip trailing slashes (but not if the entire path is slashes)
|
||||
let all_slashes = bytes.iter().all(|&b| b == b'/');
|
||||
if all_slashes {
|
||||
return Cow::Borrowed(b"/");
|
||||
}
|
||||
|
||||
while bytes.len() > 1 && bytes.ends_with(b"/") {
|
||||
bytes = &bytes[..bytes.len() - 1];
|
||||
}
|
||||
|
||||
// Step 2: Check if it ends with `/.` and strip the `/+.` pattern
|
||||
if bytes.ends_with(b".") && bytes.len() >= 2 {
|
||||
let dot_pos = bytes.len() - 1;
|
||||
if bytes[dot_pos - 1] == b'/' {
|
||||
// Find where the slashes before the dot start
|
||||
let mut slash_start = dot_pos - 1;
|
||||
while slash_start > 0 && bytes[slash_start - 1] == b'/' {
|
||||
slash_start -= 1;
|
||||
}
|
||||
// Return the stripped result
|
||||
if slash_start == 0 {
|
||||
// Result would be empty
|
||||
return if path_bytes.starts_with(b"/") {
|
||||
Cow::Borrowed(b"/")
|
||||
} else {
|
||||
Cow::Borrowed(b".")
|
||||
};
|
||||
}
|
||||
return Cow::Owned(bytes[..slash_start].to_vec());
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Normal dirname - find last / and remove everything after it
|
||||
if let Some(last_slash_pos) = bytes.iter().rposition(|&b| b == b'/') {
|
||||
// Found a slash, remove everything after it
|
||||
let mut result = &bytes[..last_slash_pos];
|
||||
|
||||
// Strip trailing slashes from result (but keep at least one if at the start)
|
||||
while result.len() > 1 && result.ends_with(b"/") {
|
||||
result = &result[..result.len() - 1];
|
||||
}
|
||||
|
||||
if result.is_empty() {
|
||||
return Cow::Borrowed(b"/");
|
||||
}
|
||||
|
||||
return Cow::Owned(result.to_vec());
|
||||
}
|
||||
|
||||
// No slash found, return "."
|
||||
Cow::Borrowed(b".")
|
||||
}
|
||||
|
||||
#[uucore::main]
|
||||
@@ -83,27 +117,25 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
|
||||
for path in &dirnames {
|
||||
let path_bytes = uucore::os_str_as_bytes(path.as_os_str()).unwrap_or(&[]);
|
||||
let result = dirname_string_manipulation(path_bytes);
|
||||
|
||||
if handle_trailing_dot(path_bytes).is_none() {
|
||||
// Normal path handling using Path::parent()
|
||||
let p = Path::new(path);
|
||||
match p.parent() {
|
||||
Some(d) => {
|
||||
if d.components().next().is_none() {
|
||||
print!(".");
|
||||
} else {
|
||||
print_verbatim(d).unwrap();
|
||||
}
|
||||
}
|
||||
None => {
|
||||
if p.is_absolute() || path.as_os_str() == "/" {
|
||||
print!("/");
|
||||
} else {
|
||||
print!(".");
|
||||
}
|
||||
}
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
let result_os = std::ffi::OsStr::from_bytes(&result);
|
||||
print_verbatim(result_os).unwrap();
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
// On non-Unix, fall back to lossy conversion
|
||||
if let Ok(s) = std::str::from_utf8(&result) {
|
||||
print!("{s}");
|
||||
} else {
|
||||
// Fallback for non-UTF-8 paths on non-Unix systems
|
||||
print!(".");
|
||||
}
|
||||
}
|
||||
|
||||
print!("{line_ending}");
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,11 @@ fn test_invalid_arg() {
|
||||
new_ucmd!().arg("--definitely-invalid").fails_with_code(1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_operand() {
|
||||
new_ucmd!().fails_with_code(1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_path_with_trailing_slashes() {
|
||||
new_ucmd!()
|
||||
@@ -71,15 +76,11 @@ fn test_dirname_non_utf8_paths() {
|
||||
use std::ffi::OsStr;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
// Create a test file with non-UTF-8 bytes in the name
|
||||
let non_utf8_bytes = b"test_\xFF\xFE/file.txt";
|
||||
let non_utf8_name = OsStr::from_bytes(non_utf8_bytes);
|
||||
|
||||
// Test that dirname handles non-UTF-8 paths without crashing
|
||||
let result = new_ucmd!().arg(non_utf8_name).succeeds();
|
||||
|
||||
// Just verify it didn't crash and produced some output
|
||||
// The exact output format may vary due to lossy conversion
|
||||
let output = result.stdout_str_lossy();
|
||||
assert!(!output.is_empty());
|
||||
assert!(output.contains("test_"));
|
||||
@@ -156,7 +157,7 @@ fn test_trailing_dot_edge_cases() {
|
||||
new_ucmd!()
|
||||
.arg("/home/dos//.")
|
||||
.succeeds()
|
||||
.stdout_is("/home/dos/\n");
|
||||
.stdout_is("/home/dos\n");
|
||||
|
||||
// Path with . in middle (should use normal logic)
|
||||
new_ucmd!()
|
||||
@@ -216,3 +217,62 @@ fn test_existing_behavior_preserved() {
|
||||
.succeeds()
|
||||
.stdout_is("/home/dos\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_paths_comprehensive() {
|
||||
// Comprehensive test for multiple paths in single invocation
|
||||
new_ucmd!()
|
||||
.args(&[
|
||||
"/home/dos/.",
|
||||
"/var/log",
|
||||
".",
|
||||
"/tmp/.",
|
||||
"",
|
||||
"/",
|
||||
"relative/path",
|
||||
])
|
||||
.succeeds()
|
||||
.stdout_is("/home/dos\n/var\n.\n/tmp\n.\n/\nrelative\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_dot_slash_variations() {
|
||||
// Tests for all the cases mentioned in issue #8910 comment
|
||||
// https://github.com/uutils/coreutils/issues/8910#issuecomment-3408735720
|
||||
|
||||
new_ucmd!().arg("foo//.").succeeds().stdout_is("foo\n");
|
||||
|
||||
new_ucmd!().arg("foo///.").succeeds().stdout_is("foo\n");
|
||||
|
||||
new_ucmd!().arg("foo/./").succeeds().stdout_is("foo\n");
|
||||
|
||||
new_ucmd!()
|
||||
.arg("foo/bar/./")
|
||||
.succeeds()
|
||||
.stdout_is("foo/bar\n");
|
||||
|
||||
new_ucmd!().arg("foo/./bar").succeeds().stdout_is("foo/.\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dot_slash_component_preservation() {
|
||||
// Ensure that /. components in the middle are preserved
|
||||
// These should NOT be normalized away
|
||||
|
||||
new_ucmd!().arg("a/./b").succeeds().stdout_is("a/.\n");
|
||||
|
||||
new_ucmd!()
|
||||
.arg("a/./b/./c")
|
||||
.succeeds()
|
||||
.stdout_is("a/./b/.\n");
|
||||
|
||||
new_ucmd!()
|
||||
.arg("foo/./bar/baz")
|
||||
.succeeds()
|
||||
.stdout_is("foo/./bar\n");
|
||||
|
||||
new_ucmd!()
|
||||
.arg("/path/./to/file")
|
||||
.succeeds()
|
||||
.stdout_is("/path/./to\n");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user