README.md: be explicit with the list of tools

Merge pull request #93 from oSoMoN/upstream-test-suite-skipped-tests-issue92
Upstream test suite: correctly handle tests that are skipped (fixes #92)
2026-06-29 15:15:15 -04:00 · 2024-10-02 13:58:16 +02:00 · 2024-10-02 13:55:53 +02:00 · 2024-10-01 18:59:15 +02:00 · 2024-10-01 13:30:57 -03:00 · 2024-10-01 13:30:57 -03:00
16 changed files with 2499 additions and 431 deletions
@@ -41,6 +41,8 @@ jobs:
    strategy:
      matrix:
        test-target:
+          - { name: fuzz_cmp, should_pass: true }
+          - { name: fuzz_cmp_args, should_pass: true }
          - { name: fuzz_ed, should_pass: true }
          - { name: fuzz_normal, should_pass: true }
          - { name: fuzz_patch, should_pass: true }
@@ -128,6 +128,7 @@ dependencies = [
 "assert_cmd",
 "chrono",
 "diff",
+ "itoa",
 "predicates",
 "pretty_assertions",
 "regex",
@@ -154,9 +155,9 @@ dependencies = [

 [[package]]
 name = "fastrand"
-version = "2.0.1"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
+checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"

 [[package]]
 name = "float-cmp"
@@ -190,6 +191,12 @@ dependencies = [
 "cc",
 ]

+[[package]]
+name = "itoa"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
+
 [[package]]
 name = "js-sys"
 version = "0.3.69"
@@ -201,15 +208,15 @@ dependencies = [

 [[package]]
 name = "libc"
-version = "0.2.153"
+version = "0.2.159"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5"

 [[package]]
 name = "linux-raw-sys"
-version = "0.4.13"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
+checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"

 [[package]]
 name = "log"
@@ -276,9 +283,9 @@ dependencies = [

 [[package]]
 name = "pretty_assertions"
-version = "1.4.0"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
+checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d"
 dependencies = [
 "diff",
 "yansi",
@@ -304,9 +311,9 @@ dependencies = [

 [[package]]
 name = "regex"
-version = "1.10.6"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
+checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
 dependencies = [
 "aho-corasick",
 "memchr",
@@ -316,9 +323,9 @@ dependencies = [

 [[package]]
 name = "regex-automata"
-version = "0.4.5"
+version = "0.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
+checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
 dependencies = [
 "aho-corasick",
 "memchr",
@@ -327,15 +334,15 @@ dependencies = [

 [[package]]
 name = "regex-syntax"
-version = "0.8.2"
+version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"

 [[package]]
 name = "rustix"
-version = "0.38.31"
+version = "0.38.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949"
+checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
 dependencies = [
 "bitflags",
 "errno",
@@ -386,9 +393,9 @@ dependencies = [

 [[package]]
 name = "tempfile"
-version = "3.12.0"
+version = "3.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64"
+checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b"
 dependencies = [
 "cfg-if",
 "fastrand",
@@ -411,9 +418,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

 [[package]]
 name = "unicode-width"
-version = "0.1.13"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d"
+checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"

 [[package]]
 name = "wait-timeout"
@@ -602,6 +609,6 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

 [[package]]
 name = "yansi"
-version = "0.5.1"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
+checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
@@ -17,9 +17,10 @@ path = "src/main.rs"
 [dependencies]
 chrono = "0.4.38"
 diff = "0.1.13"
+itoa = "1.0.11"
 regex = "1.10.4"
 same-file = "1.0.6"
-unicode-width = "0.1.12"
+unicode-width = "0.2.0"

 [dev-dependencies]
 pretty_assertions = "1.4.0"
@@ -5,7 +5,7 @@

 [![CodeCov](https://codecov.io/gh/uutils/diffutils/branch/main/graph/badge.svg)](https://codecov.io/gh/uutils/diffutils)

-The goal of this package is to be a drop-in replacement for the [diffutils commands](https://www.gnu.org/software/diffutils/) in Rust.
+The goal of this package is to be a drop-in replacement for the [diffutils commands](https://www.gnu.org/software/diffutils/) (diff, cmp, diff3, sdiff) in Rust.

 Based on the incomplete diff generator in https://github.com/rust-lang/rust/blob/master/src/tools/compiletest/src/runtest.rs, and made to be compatible with GNU's diff and patch tools.

@@ -16,6 +16,18 @@ diffutils = { path = "../" }
 [workspace]
 members = ["."]

+[[bin]]
+name = "fuzz_cmp"
+path = "fuzz_targets/fuzz_cmp.rs"
+test = false
+doc = false
+
+[[bin]]
+name = "fuzz_cmp_args"
+path = "fuzz_targets/fuzz_cmp_args.rs"
+test = false
+doc = false
+
 [[bin]]
 name = "fuzz_patch"
 path = "fuzz_targets/fuzz_patch.rs"
@@ -0,0 +1,36 @@
+"-l"
+"--verbose"
+"-b"
+"--print-bytes"
+"-lb"
+"-bl"
+"-n"
+"--bytes"
+"--bytes="
+"--bytes=1024"
+"--bytes=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+"-i"
+"--ignore-initial"
+"--ignore-initial="
+"--ignore-initial=1024"
+"--ignore-initial=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999:9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+"-s"
+"-q"
+"--quiet"
+"--silent"
+"-"
+"--"
+"1kB"
+"1G"
+"1GB"
+"1T"
+"1TB"
+"1P"
+"1PB"
+"1Z"
+"1ZB"
+"1Y"
+"1YB"
+"1Y"
+"0"
+"1:2"
@@ -0,0 +1,51 @@
+#![no_main]
+#[macro_use]
+extern crate libfuzzer_sys;
+use diffutilslib::cmp::{self, Cmp};
+
+use std::ffi::OsString;
+use std::fs::File;
+use std::io::Write;
+
+fn os(s: &str) -> OsString {
+    OsString::from(s)
+}
+
+fuzz_target!(|x: (Vec<u8>, Vec<u8>)| {
+    let args = vec!["cmp", "-l", "-b", "target/fuzz.cmp.a", "target/fuzz.cmp.b"]
+        .into_iter()
+        .map(|s| os(s))
+        .peekable();
+
+    let (from, to) = x;
+
+    File::create("target/fuzz.cmp.a")
+        .unwrap()
+        .write_all(&from)
+        .unwrap();
+
+    File::create("target/fuzz.cmp.b")
+        .unwrap()
+        .write_all(&to)
+        .unwrap();
+
+    let params =
+        cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e));
+    let ret = cmp::cmp(&params);
+    if from == to && !matches!(ret, Ok(Cmp::Equal)) {
+        panic!(
+            "target/fuzz.cmp.a and target/fuzz.cmp.b are equal, but cmp returned {:?}.",
+            ret
+        );
+    } else if from != to && !matches!(ret, Ok(Cmp::Different)) {
+        panic!(
+            "target/fuzz.cmp.a and target/fuzz.cmp.b are different, but cmp returned {:?}.",
+            ret
+        );
+    } else if ret.is_err() {
+        panic!(
+            "target/fuzz.cmp.a and target/fuzz.cmp.b caused cmp to error ({:?}).",
+            ret
+        );
+    }
+});
@@ -0,0 +1,23 @@
+#![no_main]
+#[macro_use]
+extern crate libfuzzer_sys;
+use diffutilslib::cmp;
+
+use libfuzzer_sys::Corpus;
+use std::ffi::OsString;
+
+fn os(s: &str) -> OsString {
+    OsString::from(s)
+}
+
+fuzz_target!(|x: Vec<OsString>| -> Corpus {
+    if x.len() > 6 {
+        // Make sure we try to parse an option when we get longer args. x[0] will be
+        // the executable name.
+        if ![os("-l"), os("-b"), os("-s"), os("-n"), os("-i")].contains(&x[1]) {
+            return Corpus::Reject;
+        }
+    }
+    let _ = cmp::parse_params(x.into_iter().peekable());
+    Corpus::Keep
+});
@@ -0,0 +1,98 @@
+// This file is part of the uutils diffutils package.
+//
+// For the full copyright and license information, please view the LICENSE-*
+// files that was distributed with this source code.
+
+use crate::params::{parse_params, Format};
+use crate::utils::report_failure_to_read_input_file;
+use crate::{context_diff, ed_diff, normal_diff, unified_diff};
+use std::env::ArgsOs;
+use std::ffi::OsString;
+use std::fs;
+use std::io::{self, Read, Write};
+use std::iter::Peekable;
+use std::process::{exit, ExitCode};
+
+// Exit codes are documented at
+// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html.
+//     An exit status of 0 means no differences were found,
+//     1 means some differences were found,
+//     and 2 means trouble.
+pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
+    let params = parse_params(opts).unwrap_or_else(|error| {
+        eprintln!("{error}");
+        exit(2);
+    });
+    // if from and to are the same file, no need to perform any comparison
+    let maybe_report_identical_files = || {
+        if params.report_identical_files {
+            println!(
+                "Files {} and {} are identical",
+                params.from.to_string_lossy(),
+                params.to.to_string_lossy(),
+            );
+        }
+    };
+    if params.from == "-" && params.to == "-"
+        || same_file::is_same_file(&params.from, &params.to).unwrap_or(false)
+    {
+        maybe_report_identical_files();
+        return ExitCode::SUCCESS;
+    }
+
+    // read files
+    fn read_file_contents(filepath: &OsString) -> io::Result<Vec<u8>> {
+        if filepath == "-" {
+            let mut content = Vec::new();
+            io::stdin().read_to_end(&mut content).and(Ok(content))
+        } else {
+            fs::read(filepath)
+        }
+    }
+    let mut io_error = false;
+    let from_content = match read_file_contents(&params.from) {
+        Ok(from_content) => from_content,
+        Err(e) => {
+            report_failure_to_read_input_file(&params.executable, &params.from, &e);
+            io_error = true;
+            vec![]
+        }
+    };
+    let to_content = match read_file_contents(&params.to) {
+        Ok(to_content) => to_content,
+        Err(e) => {
+            report_failure_to_read_input_file(&params.executable, &params.to, &e);
+            io_error = true;
+            vec![]
+        }
+    };
+    if io_error {
+        return ExitCode::from(2);
+    }
+
+    // run diff
+    let result: Vec<u8> = match params.format {
+        Format::Normal => normal_diff::diff(&from_content, &to_content, &params),
+        Format::Unified => unified_diff::diff(&from_content, &to_content, &params),
+        Format::Context => context_diff::diff(&from_content, &to_content, &params),
+        Format::Ed => ed_diff::diff(&from_content, &to_content, &params).unwrap_or_else(|error| {
+            eprintln!("{error}");
+            exit(2);
+        }),
+    };
+    if params.brief && !result.is_empty() {
+        println!(
+            "Files {} and {} differ",
+            params.from.to_string_lossy(),
+            params.to.to_string_lossy()
+        );
+    } else {
+        io::stdout().write_all(&result).unwrap();
+    }
+    if result.is_empty() {
+        maybe_report_identical_files();
+        ExitCode::SUCCESS
+    } else {
+        ExitCode::from(1)
+    }
+}
@@ -1,3 +1,4 @@
+pub mod cmp;
 pub mod context_diff;
 pub mod ed_diff;
 pub mod macros;
@@ -3,15 +3,17 @@
 // For the full copyright and license information, please view the LICENSE-*
 // files that was distributed with this source code.

-use crate::params::{parse_params, Format};
-use regex::Regex;
-use std::env;
-use std::ffi::OsString;
-use std::fs;
-use std::io::{self, Read, Write};
-use std::process::{exit, ExitCode};
+use std::{
+    env::ArgsOs,
+    ffi::{OsStr, OsString},
+    iter::Peekable,
+    path::{Path, PathBuf},
+    process::ExitCode,
+};

+mod cmp;
 mod context_diff;
+mod diff;
 mod ed_diff;
 mod macros;
 mod normal_diff;
@@ -19,103 +21,60 @@ mod params;
 mod unified_diff;
 mod utils;

-fn report_failure_to_read_input_file(
-    executable: &OsString,
-    filepath: &OsString,
-    error: &std::io::Error,
-) {
-    // std::io::Error's display trait outputs "{detail} (os error {code})"
-    // but we want only the {detail} (error string) part
-    let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap();
-    eprintln!(
-        "{}: {}: {}",
-        executable.to_string_lossy(),
-        filepath.to_string_lossy(),
-        error_code_re.replace(error.to_string().as_str(), ""),
-    );
+/// # Panics
+/// Panics if the binary path cannot be determined
+fn binary_path(args: &mut Peekable<ArgsOs>) -> PathBuf {
+    match args.peek() {
+        Some(ref s) if !s.is_empty() => PathBuf::from(s),
+        _ => std::env::current_exe().unwrap(),
+    }
+}
+
+/// #Panics
+/// Panics if path has no UTF-8 valid name
+fn name(binary_path: &Path) -> &OsStr {
+    binary_path.file_stem().unwrap()
+}
+
+const VERSION: &str = env!("CARGO_PKG_VERSION");
+
+fn usage(name: &str) {
+    println!("{name} {VERSION} (multi-call binary)\n");
+    println!("Usage: {name} [function [arguments...]]\n");
+    println!("Currently defined functions:\n");
+    println!("    cmp, diff\n");
+}
+
+fn second_arg_error(name: &OsStr) -> ! {
+    eprintln!("Expected utility name as second argument, got nothing.");
+    usage(&name.to_string_lossy());
+    std::process::exit(0);
 }

-// Exit codes are documented at
-// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html.
-//     An exit status of 0 means no differences were found,
-//     1 means some differences were found,
-//     and 2 means trouble.
 fn main() -> ExitCode {
-    let opts = env::args_os();
-    let params = parse_params(opts).unwrap_or_else(|error| {
-        eprintln!("{error}");
-        exit(2);
-    });
-    // if from and to are the same file, no need to perform any comparison
-    let maybe_report_identical_files = || {
-        if params.report_identical_files {
-            println!(
-                "Files {} and {} are identical",
-                params.from.to_string_lossy(),
-                params.to.to_string_lossy(),
-            );
-        }
-    };
-    if params.from == "-" && params.to == "-"
-        || same_file::is_same_file(&params.from, &params.to).unwrap_or(false)
-    {
-        maybe_report_identical_files();
-        return ExitCode::SUCCESS;
-    }
+    let mut args = std::env::args_os().peekable();

-    // read files
-    fn read_file_contents(filepath: &OsString) -> io::Result<Vec<u8>> {
-        if filepath == "-" {
-            let mut content = Vec::new();
-            io::stdin().read_to_end(&mut content).and(Ok(content))
-        } else {
-            fs::read(filepath)
-        }
-    }
-    let mut io_error = false;
-    let from_content = match read_file_contents(&params.from) {
-        Ok(from_content) => from_content,
-        Err(e) => {
-            report_failure_to_read_input_file(&params.executable, &params.from, &e);
-            io_error = true;
-            vec![]
-        }
-    };
-    let to_content = match read_file_contents(&params.to) {
-        Ok(to_content) => to_content,
-        Err(e) => {
-            report_failure_to_read_input_file(&params.executable, &params.to, &e);
-            io_error = true;
-            vec![]
-        }
-    };
-    if io_error {
-        return ExitCode::from(2);
-    }
+    let exe_path = binary_path(&mut args);
+    let exe_name = name(&exe_path);

-    // run diff
-    let result: Vec<u8> = match params.format {
-        Format::Normal => normal_diff::diff(&from_content, &to_content, &params),
-        Format::Unified => unified_diff::diff(&from_content, &to_content, &params),
-        Format::Context => context_diff::diff(&from_content, &to_content, &params),
-        Format::Ed => ed_diff::diff(&from_content, &to_content, &params).unwrap_or_else(|error| {
-            eprintln!("{error}");
-            exit(2);
-        }),
+    let util_name = if exe_name == "diffutils" {
+        // Discard the item we peeked.
+        let _ = args.next();
+
+        args.peek()
+            .cloned()
+            .unwrap_or_else(|| second_arg_error(exe_name))
+    } else {
+        OsString::from(exe_name)
    };
-    if params.brief && !result.is_empty() {
-        println!(
-            "Files {} and {} differ",
-            params.from.to_string_lossy(),
-            params.to.to_string_lossy()
-        );
-    } else {
-        io::stdout().write_all(&result).unwrap();
-    }
-    if result.is_empty() {
-        maybe_report_identical_files();
-        ExitCode::SUCCESS
-    } else {
-        ExitCode::from(1)
+
+    match util_name.to_str() {
+        Some("diff") => diff::main(args),
+        Some("cmp") => cmp::main(args),
+        Some(name) => {
+            eprintln!("{}: utility not supported", name);
+            ExitCode::from(2)
+        }
+        None => second_arg_error(exe_name),
    }
 }
@@ -1,4 +1,5 @@
 use std::ffi::OsString;
+use std::iter::Peekable;
 use std::path::PathBuf;

 use regex::Regex;
@@ -41,8 +42,7 @@ impl Default for Params {
    }
 }

-pub fn parse_params<I: IntoIterator<Item = OsString>>(opts: I) -> Result<Params, String> {
-    let mut opts = opts.into_iter().peekable();
+pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Result<Params, String> {
    // parse CLI

    let Some(executable) = opts.next() else {
@@ -323,7 +323,12 @@ mod tests {
                to: os("bar"),
                ..Default::default()
            }),
-            parse_params([os("diff"), os("foo"), os("bar")].iter().cloned())
+            parse_params(
+                [os("diff"), os("foo"), os("bar")]
+                    .iter()
+                    .cloned()
+                    .peekable()
+            )
        );
        assert_eq!(
            Ok(Params {
@@ -336,6 +341,7 @@ mod tests {
                [os("diff"), os("--normal"), os("foo"), os("bar")]
                    .iter()
                    .cloned()
+                    .peekable()
            )
        );
    }
@@ -350,7 +356,12 @@ mod tests {
                    format: Format::Ed,
                    ..Default::default()
                }),
-                parse_params([os("diff"), os(arg), os("foo"), os("bar")].iter().cloned())
+                parse_params(
+                    [os("diff"), os(arg), os("foo"), os("bar")]
+                        .iter()
+                        .cloned()
+                        .peekable()
+                )
            );
        }
    }
@@ -368,7 +379,7 @@ mod tests {
                    format: Format::Context,
                    ..Default::default()
                }),
-                parse_params(params.iter().map(|x| os(x)))
+                parse_params(params.iter().map(|x| os(x)).peekable())
            );
        }
        for args in [
@@ -390,7 +401,7 @@ mod tests {
                    context_count: 42,
                    ..Default::default()
                }),
-                parse_params(params.iter().map(|x| os(x)))
+                parse_params(params.iter().map(|x| os(x)).peekable())
            );
        }
    }
@@ -410,7 +421,7 @@ mod tests {
            let mut params = vec!["diff"];
            params.extend(args);
            params.extend(["foo", "bar"]);
-            assert!(parse_params(params.iter().map(|x| os(x))).is_err());
+            assert!(parse_params(params.iter().map(|x| os(x)).peekable()).is_err());
        }
    }
    #[test]
@@ -427,7 +438,7 @@ mod tests {
                    format: Format::Unified,
                    ..Default::default()
                }),
-                parse_params(params.iter().map(|x| os(x)))
+                parse_params(params.iter().map(|x| os(x)).peekable())
            );
        }
        for args in [
@@ -449,7 +460,7 @@ mod tests {
                    context_count: 42,
                    ..Default::default()
                }),
-                parse_params(params.iter().map(|x| os(x)))
+                parse_params(params.iter().map(|x| os(x)).peekable())
            );
        }
    }
@@ -469,7 +480,7 @@ mod tests {
            let mut params = vec!["diff"];
            params.extend(args);
            params.extend(["foo", "bar"]);
-            assert!(parse_params(params.iter().map(|x| os(x))).is_err());
+            assert!(parse_params(params.iter().map(|x| os(x)).peekable()).is_err());
        }
    }
    #[test]
@@ -487,6 +498,7 @@ mod tests {
                [os("diff"), os("-u54"), os("foo"), os("bar")]
                    .iter()
                    .cloned()
+                    .peekable()
            )
        );
        assert_eq!(
@@ -502,6 +514,7 @@ mod tests {
                [os("diff"), os("-U54"), os("foo"), os("bar")]
                    .iter()
                    .cloned()
+                    .peekable()
            )
        );
        assert_eq!(
@@ -517,6 +530,7 @@ mod tests {
                [os("diff"), os("-U"), os("54"), os("foo"), os("bar")]
                    .iter()
                    .cloned()
+                    .peekable()
            )
        );
        assert_eq!(
@@ -532,6 +546,7 @@ mod tests {
                [os("diff"), os("-c54"), os("foo"), os("bar")]
                    .iter()
                    .cloned()
+                    .peekable()
            )
        );
    }
@@ -544,7 +559,12 @@ mod tests {
                to: os("bar"),
                ..Default::default()
            }),
-            parse_params([os("diff"), os("foo"), os("bar")].iter().cloned())
+            parse_params(
+                [os("diff"), os("foo"), os("bar")]
+                    .iter()
+                    .cloned()
+                    .peekable()
+            )
        );
        assert_eq!(
            Ok(Params {
@@ -554,7 +574,12 @@ mod tests {
                report_identical_files: true,
                ..Default::default()
            }),
-            parse_params([os("diff"), os("-s"), os("foo"), os("bar")].iter().cloned())
+            parse_params(
+                [os("diff"), os("-s"), os("foo"), os("bar")]
+                    .iter()
+                    .cloned()
+                    .peekable()
+            )
        );
        assert_eq!(
            Ok(Params {
@@ -573,6 +598,7 @@ mod tests {
                ]
                .iter()
                .cloned()
+                .peekable()
            )
        );
    }
@@ -585,7 +611,12 @@ mod tests {
                to: os("bar"),
                ..Default::default()
            }),
-            parse_params([os("diff"), os("foo"), os("bar")].iter().cloned())
+            parse_params(
+                [os("diff"), os("foo"), os("bar")]
+                    .iter()
+                    .cloned()
+                    .peekable()
+            )
        );
        assert_eq!(
            Ok(Params {
@@ -595,7 +626,12 @@ mod tests {
                brief: true,
                ..Default::default()
            }),
-            parse_params([os("diff"), os("-q"), os("foo"), os("bar")].iter().cloned())
+            parse_params(
+                [os("diff"), os("-q"), os("foo"), os("bar")]
+                    .iter()
+                    .cloned()
+                    .peekable()
+            )
        );
        assert_eq!(
            Ok(Params {
@@ -609,6 +645,7 @@ mod tests {
                [os("diff"), os("--brief"), os("foo"), os("bar"),]
                    .iter()
                    .cloned()
+                    .peekable()
            )
        );
    }
@@ -621,7 +658,12 @@ mod tests {
                to: os("bar"),
                ..Default::default()
            }),
-            parse_params([os("diff"), os("foo"), os("bar")].iter().cloned())
+            parse_params(
+                [os("diff"), os("foo"), os("bar")]
+                    .iter()
+                    .cloned()
+                    .peekable()
+            )
        );
        for option in ["-t", "--expand-tabs"] {
            assert_eq!(
@@ -636,6 +678,7 @@ mod tests {
                    [os("diff"), os(option), os("foo"), os("bar")]
                        .iter()
                        .cloned()
+                        .peekable()
                )
            );
        }
@@ -649,7 +692,12 @@ mod tests {
                to: os("bar"),
                ..Default::default()
            }),
-            parse_params([os("diff"), os("foo"), os("bar")].iter().cloned())
+            parse_params(
+                [os("diff"), os("foo"), os("bar")]
+                    .iter()
+                    .cloned()
+                    .peekable()
+            )
        );
        assert_eq!(
            Ok(Params {
@@ -663,6 +711,7 @@ mod tests {
                [os("diff"), os("--tabsize=0"), os("foo"), os("bar")]
                    .iter()
                    .cloned()
+                    .peekable()
            )
        );
        assert_eq!(
@@ -677,36 +726,42 @@ mod tests {
                [os("diff"), os("--tabsize=42"), os("foo"), os("bar")]
                    .iter()
                    .cloned()
+                    .peekable()
            )
        );
        assert!(parse_params(
            [os("diff"), os("--tabsize"), os("foo"), os("bar")]
                .iter()
                .cloned()
+                .peekable()
        )
        .is_err());
        assert!(parse_params(
            [os("diff"), os("--tabsize="), os("foo"), os("bar")]
                .iter()
                .cloned()
+                .peekable()
        )
        .is_err());
        assert!(parse_params(
            [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")]
                .iter()
                .cloned()
+                .peekable()
        )
        .is_err());
        assert!(parse_params(
            [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")]
                .iter()
                .cloned()
+                .peekable()
        )
        .is_err());
        assert!(parse_params(
            [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")]
                .iter()
                .cloned()
+                .peekable()
        )
        .is_err());
        assert!(parse_params(
@@ -718,6 +773,7 @@ mod tests {
            ]
            .iter()
            .cloned()
+            .peekable()
        )
        .is_err());
    }
@@ -730,7 +786,12 @@ mod tests {
                to: os("-h"),
                ..Default::default()
            }),
-            parse_params([os("diff"), os("--"), os("-g"), os("-h")].iter().cloned())
+            parse_params(
+                [os("diff"), os("--"), os("-g"), os("-h")]
+                    .iter()
+                    .cloned()
+                    .peekable()
+            )
        );
    }
    #[test]
@@ -742,7 +803,7 @@ mod tests {
                to: os("-"),
                ..Default::default()
            }),
-            parse_params([os("diff"), os("foo"), os("-")].iter().cloned())
+            parse_params([os("diff"), os("foo"), os("-")].iter().cloned().peekable())
        );
        assert_eq!(
            Ok(Params {
@@ -751,7 +812,7 @@ mod tests {
                to: os("bar"),
                ..Default::default()
            }),
-            parse_params([os("diff"), os("-"), os("bar")].iter().cloned())
+            parse_params([os("diff"), os("-"), os("bar")].iter().cloned().peekable())
        );
        assert_eq!(
            Ok(Params {
@@ -760,27 +821,45 @@ mod tests {
                to: os("-"),
                ..Default::default()
            }),
-            parse_params([os("diff"), os("-"), os("-")].iter().cloned())
+            parse_params([os("diff"), os("-"), os("-")].iter().cloned().peekable())
        );
-        assert!(parse_params([os("diff"), os("foo"), os("bar"), os("-")].iter().cloned()).is_err());
-        assert!(parse_params([os("diff"), os("-"), os("-"), os("-")].iter().cloned()).is_err());
+        assert!(parse_params(
+            [os("diff"), os("foo"), os("bar"), os("-")]
+                .iter()
+                .cloned()
+                .peekable()
+        )
+        .is_err());
+        assert!(parse_params(
+            [os("diff"), os("-"), os("-"), os("-")]
+                .iter()
+                .cloned()
+                .peekable()
+        )
+        .is_err());
    }
    #[test]
    fn missing_arguments() {
-        assert!(parse_params([os("diff")].iter().cloned()).is_err());
-        assert!(parse_params([os("diff"), os("foo")].iter().cloned()).is_err());
+        assert!(parse_params([os("diff")].iter().cloned().peekable()).is_err());
+        assert!(parse_params([os("diff"), os("foo")].iter().cloned().peekable()).is_err());
    }
    #[test]
    fn unknown_argument() {
+        assert!(parse_params(
+            [os("diff"), os("-g"), os("foo"), os("bar")]
+                .iter()
+                .cloned()
+                .peekable()
+        )
+        .is_err());
        assert!(
-            parse_params([os("diff"), os("-g"), os("foo"), os("bar")].iter().cloned()).is_err()
+            parse_params([os("diff"), os("-g"), os("bar")].iter().cloned().peekable()).is_err()
        );
-        assert!(parse_params([os("diff"), os("-g"), os("bar")].iter().cloned()).is_err());
-        assert!(parse_params([os("diff"), os("-g")].iter().cloned()).is_err());
+        assert!(parse_params([os("diff"), os("-g")].iter().cloned().peekable()).is_err());
    }
    #[test]
    fn empty() {
-        assert!(parse_params([].iter().cloned()).is_err());
+        assert!(parse_params([].iter().cloned().peekable()).is_err());
    }
    #[test]
    fn conflicting_output_styles() {
@@ -797,6 +876,7 @@ mod tests {
                [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")]
                    .iter()
                    .cloned()
+                    .peekable()
            )
            .is_err());
        }
@@ -3,8 +3,9 @@
 // For the full copyright and license information, please view the LICENSE-*
 // files that was distributed with this source code.

-use std::io::Write;
+use std::{ffi::OsString, io::Write};

+use regex::Regex;
 use unicode_width::UnicodeWidthStr;

 /// Replace tabs by spaces in the input line.
@@ -71,6 +72,33 @@ pub fn get_modification_time(file_path: &str) -> String {
    modification_time
 }

+pub fn format_failure_to_read_input_file(
+    executable: &OsString,
+    filepath: &OsString,
+    error: &std::io::Error,
+) -> String {
+    // std::io::Error's display trait outputs "{detail} (os error {code})"
+    // but we want only the {detail} (error string) part
+    let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap();
+    format!(
+        "{}: {}: {}",
+        executable.to_string_lossy(),
+        filepath.to_string_lossy(),
+        error_code_re.replace(error.to_string().as_str(), ""),
+    )
+}
+
+pub fn report_failure_to_read_input_file(
+    executable: &OsString,
+    filepath: &OsString,
+    error: &std::io::Error,
+) {
+    eprintln!(
+        "{}",
+        format_failure_to_read_input_file(executable, filepath, error)
+    );
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -101,10 +129,11 @@ mod tests {

            // Note: The Woman Scientist emoji (👩‍🔬) is a ZWJ sequence combining
            // the Woman emoji (👩) and the Microscope emoji (🔬). On supported platforms
-            // it is displayed as a single emoji and should have a print size of 2 columns,
-            // but terminal emulators tend to not support this, and display the two emojis
-            // side by side, thus accounting for a print size of 4 columns.
-            assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo   👩‍🔬  baz");
+            // it is displayed as a single emoji and has a print size of 2 columns.
+            // Terminal emulators tend to not support this, and display the two emojis
+            // side by side, thus accounting for a print size of 4 columns, but the
+            // unicode_width crate reports a correct size of 2.
+            assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo   👩‍🔬    baz");
        }

        #[test]
@@ -21,7 +21,7 @@
 # (e.g. 'dev' or 'test').
 # Unless overridden by the $TESTS environment variable, all tests in the test
 # suite will be run. Tests targeting a command that is not yet implemented
-# (e.g. cmp, diff3 or sdiff) are skipped.
+# (e.g. diff3 or sdiff) are skipped.

 scriptpath=$(dirname "$(readlink -f "$0")")
 rev=$(git rev-parse HEAD)
@@ -57,8 +57,13 @@ upstreamrev=$(git rev-parse HEAD)
 mkdir src
 cd src
 ln -s "$binary" diff
+ln -s "$binary" cmp
 cd ../tests

+# Fetch tests/init.sh from the gnulib repository (needed since
+# https://git.savannah.gnu.org/cgit/diffutils.git/commit/tests?id=1d2456f539)
+curl -s "$gitserver/gitweb/?p=gnulib.git;a=blob_plain;f=tests/init.sh;hb=HEAD" -o init.sh
+
 if [[ -n "$TESTS" ]]
 then
  tests="$TESTS"
@@ -71,7 +76,6 @@ total=$(echo "$tests" | wc -w)
 echo "Running $total tests"
 export LC_ALL=C
 export KEEP=yes
-exitcode=0
 timestamp=$(date -Iseconds)
 urlroot="$gitserver/cgit/diffutils.git/tree/tests/"
 passed=0
@@ -82,35 +86,43 @@ for test in $tests
 do
  result="FAIL"
  url="$urlroot$test?id=$upstreamrev"
-  # Run only the tests that invoke `diff`,
+  # Run only the tests that invoke `diff` or `cmp`,
  # because other binaries aren't implemented yet
-  if ! grep -E -s -q "(cmp|diff3|sdiff)" "$test"
+  if ! grep -E -s -q "(diff3|sdiff)" "$test"
  then
-    sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS" || exitcode=1
-    json+="{\"test\":\"$test\",\"result\":\"$result\","
-    json+="\"url\":\"$url\","
-    json+="\"stdout\":\"$(base64 -w0 < stdout.txt)\","
-    json+="\"stderr\":\"$(base64 -w0 < stderr.txt)\","
-    json+="\"files\":{"
-    cd gt-$test.*
-    # Note: this doesn't include the contents of subdirectories,
-    # but there isn't much value added in doing so
-    for file in *
-    do
-      [[ -f "$file" ]] && json+="\"$file\":\"$(base64 -w0 < "$file")\","
-    done
-    json="${json%,}}},"
-    cd - > /dev/null
-    [[ "$result" = "PASS" ]] && (( passed++ ))
-    [[ "$result" = "FAIL" ]] && (( failed++ ))
+    sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS"
+    if [[ $? = 77 ]]
+    then
+      result="SKIP"
+    else
+      json+="{\"test\":\"$test\",\"result\":\"$result\","
+      json+="\"url\":\"$url\","
+      json+="\"stdout\":\"$(base64 -w0 < stdout.txt)\","
+      json+="\"stderr\":\"$(base64 -w0 < stderr.txt)\","
+      json+="\"files\":{"
+      cd gt-$test.*
+      # Note: this doesn't include the contents of subdirectories,
+      # but there isn't much value added in doing so
+      for file in *
+      do
+        [[ -f "$file" ]] && json+="\"$file\":\"$(base64 -w0 < "$file")\","
+      done
+      json="${json%,}}},"
+      cd - > /dev/null
+      [[ "$result" = "PASS" ]] && (( passed++ ))
+      [[ "$result" = "FAIL" ]] && (( failed++ ))
+    fi
  else
    result="SKIP"
-    (( skipped++ ))
-    json+="{\"test\":\"$test\",\"url\":\"$url\",\"result\":\"$result\"},"
  fi
  color=2 # green
  [[ "$result" = "FAIL" ]] && color=1 # red
-  [[ "$result" = "SKIP" ]] && color=3 # yellow
+  if [[ $result = "SKIP" ]]
+  then
+    (( skipped++ ))
+    json+="{\"test\":\"$test\",\"url\":\"$url\",\"result\":\"$result\"},"
+    color=3 # yellow
+  fi
  printf "  %-40s $(tput setaf $color)$result$(tput sgr0)\n" "$test"
 done
 echo ""
@@ -138,4 +150,5 @@ resultsfile="test-results.json"
 echo "$json" | jq > "$resultsfile"
 echo "Results written to $scriptpath/$resultsfile"

-exit $exitcode
+(( failed > 0 )) && exit 1
+exit 0
Author	SHA1	Message	Date
Sylvestre Ledru	0d5719810f	README.md: be explicit with the list of tools	2024-10-02 13:58:16 +02:00
Sylvestre Ledru	7480068e7d	Merge pull request #93 from oSoMoN/upstream-test-suite-skipped-tests-issue92 Upstream test suite: correctly handle tests that are skipped (fixes #92)	2024-10-02 13:55:53 +02:00
Olivier Tilloy	763074a804	Merge pull request #88 from kov/cmp Add cmp utility (fixes #14)	2024-10-01 18:59:15 +02:00
Gustavo Noronha Silva	fac8dab182	cmp: completely avoid Rust fmt in verbose mode This makes the code less readable, but gets us a massive improvement to performance. Comparing ~36M completely different files now takes ~40% of the time. Compared to GNU cmp, we now run the same comparison in ~26% of the time. This also improves comparing binary files. A comparison of chromium and libxul now takes ~60% of the time. We also beat GNU cmpi by about the same margin. Before: > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l huge huge.3' Benchmark 1: ../target/release/diffutils cmp -l huge huge.3 Time (mean ± σ): 2.000 s ± 0.016 s [User: 1.603 s, System: 0.392 s] Range (min … max): 1.989 s … 2.043 s 10 runs Warning: Ignoring non-zero exit code. > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l -b \ /usr/lib64/chromium-browser/chromium-browser \ /usr/lib64/firefox/libxul.so' Benchmark 1: ../target/release/diffutils cmp -l -b /usr/lib64/chromium-browser/chromium-browser /usr/lib64/firefox/libxul.so Time (mean ± σ): 24.704 s ± 0.162 s [User: 21.948 s, System: 2.700 s] Range (min … max): 24.359 s … 24.889 s 10 runs Warning: Ignoring non-zero exit code. After: > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l huge huge.3' Benchmark 1: ../target/release/diffutils cmp -l huge huge.3 Time (mean ± σ): 849.5 ms ± 6.2 ms [User: 538.3 ms, System: 306.8 ms] Range (min … max): 839.4 ms … 857.7 ms 10 runs Warning: Ignoring non-zero exit code. > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l -b \ /usr/lib64/chromium-browser/chromium-browser \ /usr/lib64/firefox/libxul.so' Benchmark 1: ../target/release/diffutils cmp -l -b /usr/lib64/chromium-browser/chromium-browser /usr/lib64/firefox/libxul.so Time (mean ± σ): 14.646 s ± 0.040 s [User: 12.328 s, System: 2.286 s] Range (min … max): 14.585 s … 14.702 s 10 runs Warning: Ignoring non-zero exit code.	2024-10-01 13:30:57 -03:00
Gustavo Noronha Silva	2e681301b4	cmp: avoid using advanced rust formatting for -l Octal conversion and simple integer to string both show up in profiling. This change improves comparing ~36M completely different files wth both -l and -b by ~11-13%.	2024-10-01 13:30:57 -03:00
Gustavo Noronha Silva	50057412bd	Add cmp utility The utility should support all the arguments supported by GNU cmp and perform slightly better. On a "bad" scenario, ~36M files which are completely different, our version runs in ~72% of the time of the original on my M1 Max: > hyperfine --warmup 1 -i --output=pipe \ 'cmp -l huge huge.3' Benchmark 1: cmp -l huge huge.3 Time (mean ± σ): 3.237 s ± 0.014 s [User: 2.891 s, System: 0.341 s] Range (min … max): 3.221 s … 3.271 s 10 runs Warning: Ignoring non-zero exit code. > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l huge huge.3' Benchmark 1: ../target/release/diffutils cmp -l huge huge.3 Time (mean ± σ): 2.392 s ± 0.009 s [User: 1.978 s, System: 0.406 s] Range (min … max): 2.378 s … 2.406 s 10 runs Warning: Ignoring non-zero exit code. Our cmp runs in ~116% of the time when comparing libxul.so to the chromium-browser binary with -l and -b. In a best case scenario of comparing 2 files which are the same except for the last byte, our tool is slightly faster.	2024-10-01 13:30:57 -03:00
Daniel Hofstetter	68292b370d	Merge pull request #95 from uutils/renovate/regex-1.x-lockfile Update Rust crate regex to v1.11.0	2024-09-29 17:43:59 +02:00
renovate[bot]	26bcc102c0	Update Rust crate regex to v1.11.0	2024-09-29 15:37:34 +00:00
Olivier Tilloy	50198ef2c1	Merge pull request #94 from uutils/renovate/tempfile-3.x-lockfile Update Rust crate tempfile to v3.13.0	2024-09-28 23:45:47 +02:00
renovate[bot]	bfdbf6d7b2	Update Rust crate tempfile to v3.13.0	2024-09-28 19:37:34 +00:00
Olivier Tilloy	f75c187971	Upstream test suite: correctly handle tests that are skipped (fixes #92 )	2024-09-27 19:45:34 +02:00
Sylvestre Ledru	d07c0438b5	Merge pull request #91 from oSoMoN/upstream-test-suite-fetch-init-issue90 When running the upstream test suite, fetch missing tests/init.sh (fixes #90)	2024-09-27 07:57:43 +02:00
Gustavo Noronha Silva	72c7802f06	Take utility name as first parameter on diffutils This is in preparation for adding the other diffutils commands, cmp, diff3, sdiff. We use a similar strategy to uutils/coreutils, with the single binary acting as one of the supported tools if called through a symlink with the appropriate name. When using the multi-tool binary directly, the utility needds to be the first parameter.	2024-09-26 21:22:24 -03:00
Olivier Tilloy	c1b66e4a47	When running the upstream test suite, fetch missing tests/init.sh (fixes #90 )	2024-09-26 22:44:56 +02:00
Daniel Hofstetter	9103365691	Merge pull request #87 from uutils/renovate/unicode-width-0.x Update Rust crate unicode-width to 0.2.0	2024-09-20 07:24:05 +02:00
renovate[bot]	7574243de1	Update Rust crate unicode-width to 0.2.0	2024-09-20 05:14:22 +00:00
Olivier Tilloy	6f3834d69d	Merge pull request #86 from uutils/renovate/unicode-width-0.x-lockfile Update Rust crate unicode-width to v0.1.14	2024-09-19 22:57:18 +02:00
Olivier Tilloy	d8b91fd60e	Update unit test expectation	2024-09-19 22:33:33 +02:00
renovate[bot]	7c9c2a1ab2	Update Rust crate unicode-width to v0.1.14	2024-09-19 20:09:56 +00:00
Daniel Hofstetter	63d51bcd69	Merge pull request #85 from uutils/renovate/pretty_assertions-1.x-lockfile Update Rust crate pretty_assertions to v1.4.1	2024-09-16 13:30:18 +02:00
renovate[bot]	d5bce65a29	Update Rust crate pretty_assertions to v1.4.1	2024-09-16 10:41:54 +00:00