Compare commits

..

1 Commits

Author SHA1 Message Date
Sylvestre Ledru 0d5719810f README.md: be explicit with the list of tools 2024-10-02 13:58:16 +02:00
20 changed files with 421 additions and 3406 deletions
+25 -34
View File
@@ -4,7 +4,6 @@ name: Basic CI
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
jobs:
check:
@@ -16,6 +15,7 @@ jobs:
os: [ubuntu-latest, macOS-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- run: cargo check
test:
@@ -27,11 +27,10 @@ jobs:
os: [ubuntu-latest, macOS-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- name: install GNU patch on MacOS
if: runner.os == 'macOS'
run: |
brew install gpatch
echo "/opt/homebrew/opt/gpatch/libexec/gnubin" >> "$GITHUB_PATH"
run: brew install gpatch
- name: set up PATH on Windows
# Needed to use GNU's patch.exe instead of Strawberry Perl patch
if: runner.os == 'Windows'
@@ -43,6 +42,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- run: rustup component add rustfmt
- run: cargo fmt --all -- --check
clippy:
@@ -54,18 +55,17 @@ jobs:
os: [ubuntu-latest, macOS-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- run: rustup component add clippy
- run: cargo clippy -- -D warnings
gnu-testsuite:
permissions:
contents: write # Publish diffutils instead of discarding
name: GNU test suite
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: |
cargo build --config=profile.release.strip=true --profile=release #-fast
zstd -19 target/release/diffutils -o diffutils-x86_64-unknown-linux-gnu.zst
- uses: dtolnay/rust-toolchain@stable
- run: cargo build --release
# do not fail, the report is merely informative (at least until all tests pass reliably)
- run: ./tests/run-upstream-testsuite.sh release || true
env:
@@ -75,17 +75,6 @@ jobs:
name: test-results.json
path: tests/test-results.json
- run: ./tests/print-test-results.sh tests/test-results.json
- name: Publish latest commit
uses: softprops/action-gh-release@v2
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
with:
tag_name: latest-commit
draft: false
prerelease: true
files: |
diffutils-x86_64-unknown-linux-gnu.zst
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
coverage:
name: Code Coverage
@@ -100,42 +89,44 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Initialize workflow variables
env:
# Use -Z
RUSTC_BOOTSTRAP: 1
id: vars
shell: bash
run: |
## VARs setup
outputs() { step_id="vars"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; }
# toolchain
TOOLCHAIN="nightly" ## default to "nightly" toolchain (required for certain required unstable compiler flags) ## !maint: refactor when stable channel has needed support
# * specify gnu-type TOOLCHAIN for windows; `grcov` requires gnu-style code coverage data files
case ${{ matrix.job.os }} in windows-*) TOOLCHAIN="$TOOLCHAIN-x86_64-pc-windows-gnu" ;; esac;
# * use requested TOOLCHAIN if specified
if [ -n "${{ matrix.job.toolchain }}" ]; then TOOLCHAIN="${{ matrix.job.toolchain }}" ; fi
outputs TOOLCHAIN
# target-specific options
# * CARGO_FEATURES_OPTION
CARGO_FEATURES_OPTION='--all -- --check' ; ## default to '--all-features' for code coverage
# * CODECOV_FLAGS
CODECOV_FLAGS=$( echo "${{ matrix.job.os }}" | sed 's/[^[:alnum:]]/_/g' )
outputs CODECOV_FLAGS
- name: rust toolchain ~ install
uses: dtolnay/rust-toolchain@nightly
- run: rustup component add llvm-tools-preview
- name: install GNU patch on MacOS
if: runner.os == 'macOS'
run: |
brew install gpatch
echo "/opt/homebrew/opt/gpatch/libexec/gnubin" >> "$GITHUB_PATH"
run: brew install gpatch
- name: set up PATH on Windows
# Needed to use GNU's patch.exe instead of Strawberry Perl patch
if: runner.os == 'Windows'
run: echo "C:\Program Files\Git\usr\bin" >> $env:GITHUB_PATH
- name: Test
run: cargo test --all-features --no-fail-fast
run: cargo test ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --no-fail-fast
env:
CARGO_INCREMENTAL: "0"
RUSTC_WRAPPER: ""
RUSTFLAGS: "-Cinstrument-coverage -Zcoverage-options=branch -Ccodegen-units=1 -Copt-level=0 -Coverflow-checks=off -Zpanic_abort_tests -Cpanic=abort"
RUSTDOCFLAGS: "-Cpanic=abort"
LLVM_PROFILE_FILE: "diffutils-%p-%m.profraw"
# Use -Z
RUSTC_BOOTSTRAP: 1
- name: "`grcov` ~ install"
env:
# Use -Z
RUSTC_BOOTSTRAP: 1
id: build_grcov
shell: bash
run: |
@@ -167,10 +158,10 @@ jobs:
grcov . --output-type lcov --output-path "${COVERAGE_REPORT_FILE}" --binary-path "${COVERAGE_REPORT_DIR}" --branch
echo "report=${COVERAGE_REPORT_FILE}" >> $GITHUB_OUTPUT
- name: Upload coverage results (to Codecov.io)
uses: codecov/codecov-action@v5
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ${{ steps.coverage.outputs.report }}
file: ${{ steps.coverage.outputs.report }}
## flags: IntegrationTests, UnitTests, ${{ steps.vars.outputs.CODECOV_FLAGS }}
flags: ${{ steps.vars.outputs.CODECOV_FLAGS }}
name: codecov-umbrella
-37
View File
@@ -1,37 +0,0 @@
name: CodSpeed
on:
push:
branches:
- "main"
pull_request:
# `workflow_dispatch` allows CodSpeed to trigger backtest
# performance analysis in order to generate initial data.
workflow_dispatch:
permissions:
contents: read
id-token: write
jobs:
codspeed:
name: Run benchmarks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup rust toolchain, cache and cargo-codspeed binary
uses: moonrepo/setup-rust@v0
with:
channel: stable
cache-target: release
bins: cargo-codspeed
- name: Build the benchmark target(s)
run: cargo codspeed build -m simulation
- name: Run the benchmarks
uses: CodSpeedHQ/action@v4
with:
mode: simulation
run: cargo codspeed run
+8 -13
View File
@@ -21,17 +21,15 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
- name: Install `cargo-fuzz`
run: |
echo "RUSTC_BOOTSTRAP=1" >> "${GITHUB_ENV}"
echo "CARGO_INCREMENTAL=0" >> "${GITHUB_ENV}"
cargo install cargo-fuzz --locked
run: cargo install cargo-fuzz
- uses: Swatinem/rust-cache@v2
with:
shared-key: "cargo-fuzz-cache-key"
cache-directories: "fuzz/target"
- name: Run `cargo-fuzz build`
run: cargo fuzz build
run: cargo +nightly fuzz build
fuzz-run:
needs: fuzz-build
@@ -48,20 +46,17 @@ jobs:
- { name: fuzz_ed, should_pass: true }
- { name: fuzz_normal, should_pass: true }
- { name: fuzz_patch, should_pass: true }
- { name: fuzz_side, should_pass: true }
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
- name: Install `cargo-fuzz`
run: |
echo "RUSTC_BOOTSTRAP=1" >> "${GITHUB_ENV}"
echo "CARGO_INCREMENTAL=0" >> "${GITHUB_ENV}"
cargo install cargo-fuzz --locked
run: cargo install cargo-fuzz
- uses: Swatinem/rust-cache@v2
with:
shared-key: "cargo-fuzz-cache-key"
cache-directories: "fuzz/target"
- name: Restore Cached Corpus
uses: actions/cache/restore@v5
uses: actions/cache/restore@v4
with:
key: corpus-cache-${{ matrix.test-target.name }}
path: |
@@ -70,9 +65,9 @@ jobs:
shell: bash
continue-on-error: ${{ !matrix.test-target.name.should_pass }}
run: |
cargo fuzz run ${{ matrix.test-target.name }} -- -max_total_time=${{ env.RUN_FOR }} -detect_leaks=0
cargo +nightly fuzz run ${{ matrix.test-target.name }} -- -max_total_time=${{ env.RUN_FOR }} -detect_leaks=0
- name: Save Corpus Cache
uses: actions/cache/save@v5
uses: actions/cache/save@v4
with:
key: corpus-cache-${{ matrix.test-target.name }}
path: |
+59 -84
View File
@@ -1,12 +1,10 @@
# This file was autogenerated by dist: https://axodotdev.github.io/cargo-dist
#
# Copyright 2022-2024, axodotdev
# SPDX-License-Identifier: MIT or Apache-2.0
#
# CI that:
#
# * checks for a Git Tag that looks like a release
# * builds artifacts with dist (archives, installers, hashes)
# * builds artifacts with cargo-dist (archives, installers, hashes)
# * uploads those artifacts to temporary workflow zip
# * on success, uploads the artifacts to a GitHub Release
#
@@ -14,8 +12,9 @@
# title/body based on your changelogs.
name: Release
permissions:
"contents": "write"
contents: write
# This task will run whenever you push a git tag that looks like a version
# like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc.
@@ -24,10 +23,10 @@ permissions:
# must be a Cargo-style SemVer Version (must have at least major.minor.patch).
#
# If PACKAGE_NAME is specified, then the announcement will be for that
# package (erroring out if it doesn't have the given version or isn't dist-able).
# package (erroring out if it doesn't have the given version or isn't cargo-dist-able).
#
# If PACKAGE_NAME isn't specified, then the announcement will be for all
# (dist-able) packages in the workspace with that version (this mode is
# (cargo-dist-able) packages in the workspace with that version (this mode is
# intended for workspaces with only one dist-able package, or with all dist-able
# packages versioned/released in lockstep).
#
@@ -39,15 +38,15 @@ permissions:
# If there's a prerelease-style suffix to the version, then the release(s)
# will be marked as a prerelease.
on:
pull_request:
push:
tags:
- '**[0-9]+.[0-9]+.[0-9]+*'
pull_request:
jobs:
# Run 'dist plan' (or host) to determine what tasks we need to do
# Run 'cargo dist plan' (or host) to determine what tasks we need to do
plan:
runs-on: "ubuntu-22.04"
runs-on: ubuntu-latest
outputs:
val: ${{ steps.plan.outputs.manifest }}
tag: ${{ !github.event.pull_request && github.ref_name || '' }}
@@ -58,18 +57,12 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
submodules: recursive
- name: Install dist
- name: Install cargo-dist
# we specify bash to get pipefail; it guards against the `curl` command
# failing. otherwise `sh` won't catch that `curl` returned non-0
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.3/cargo-dist-installer.sh | sh"
- name: Cache dist
uses: actions/upload-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/dist
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.13.3/cargo-dist-installer.sh | sh"
# sure would be cool if github gave us proper conditionals...
# so here's a doubly-nested ternary-via-truthiness to try to provide the best possible
# functionality based on whether this is a pull_request, and whether it's from a fork.
@@ -77,8 +70,8 @@ jobs:
# but also really annoying to build CI around when it needs secrets to work right.)
- id: plan
run: |
dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json
echo "dist ran successfully"
cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json
echo "cargo dist ran successfully"
cat plan-dist-manifest.json
echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
@@ -96,19 +89,18 @@ jobs:
if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }}
strategy:
fail-fast: false
# Target platforms/runners are computed by dist in create-release.
# Target platforms/runners are computed by cargo-dist in create-release.
# Each member of the matrix has the following arguments:
#
# - runner: the github runner
# - dist-args: cli flags to pass to dist
# - install-dist: expression to run to install dist on the runner
# - dist-args: cli flags to pass to cargo dist
# - install-dist: expression to run to install cargo-dist on the runner
#
# Typically there will be:
# - 1 "global" task that builds universal installers
# - N "local" tasks that build each platform's binaries and platform-specific installers
matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }}
runs-on: ${{ matrix.runner }}
container: ${{ matrix.container && matrix.container.image || null }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json
@@ -118,17 +110,12 @@ jobs:
git config --global core.longpaths true
- uses: actions/checkout@v4
with:
persist-credentials: false
submodules: recursive
- name: Install Rust non-interactively if not already installed
if: ${{ matrix.container }}
run: |
if ! command -v cargo > /dev/null 2>&1; then
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
fi
- name: Install dist
run: ${{ matrix.install_dist.run }}
- uses: swatinem/rust-cache@v2
with:
key: ${{ join(matrix.targets, '-') }}
- name: Install cargo-dist
run: ${{ matrix.install_dist }}
# Get the dist-manifest
- name: Fetch local artifacts
uses: actions/download-artifact@v4
@@ -142,8 +129,8 @@ jobs:
- name: Build artifacts
run: |
# Actually do builds and make zips and whatnot
dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
echo "dist ran successfully"
cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
echo "cargo dist ran successfully"
- id: cargo-dist
name: Post-build
# We force bash here just because github makes it really hard to get values up
@@ -153,7 +140,7 @@ jobs:
run: |
# Parse out what we just built and upload it to scratch storage
echo "paths<<EOF" >> "$GITHUB_OUTPUT"
dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT"
jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
cp dist-manifest.json "$BUILD_MANIFEST_NAME"
@@ -170,21 +157,17 @@ jobs:
needs:
- plan
- build-local-artifacts
runs-on: "ubuntu-22.04"
runs-on: "ubuntu-20.04"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/dist
- name: Install cargo-dist
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.13.3/cargo-dist-installer.sh | sh"
# Get all the local artifacts for the global tasks to use (for e.g. checksums)
- name: Fetch local artifacts
uses: actions/download-artifact@v4
@@ -195,8 +178,8 @@ jobs:
- id: cargo-dist
shell: bash
run: |
dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json
echo "dist ran successfully"
cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json
echo "cargo dist ran successfully"
# Parse out what we just built and upload it to scratch storage
echo "paths<<EOF" >> "$GITHUB_OUTPUT"
@@ -217,24 +200,19 @@ jobs:
- plan
- build-local-artifacts
- build-global-artifacts
# Only run if we're "publishing", and only if plan, local and global didn't fail (skipped is fine)
if: ${{ always() && needs.plan.result == 'success' && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }}
# Only run if we're "publishing", and only if local and global didn't fail (skipped is fine)
if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
runs-on: "ubuntu-22.04"
runs-on: "ubuntu-20.04"
outputs:
val: ${{ steps.host.outputs.manifest }}
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/dist
- name: Install cargo-dist
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.13.3/cargo-dist-installer.sh | sh"
# Fetch artifacts from scratch-storage
- name: Fetch artifacts
uses: actions/download-artifact@v4
@@ -242,10 +220,11 @@ jobs:
pattern: artifacts-*
path: target/distrib/
merge-multiple: true
# This is a harmless no-op for GitHub Releases, hosting for that happens in "announce"
- id: host
shell: bash
run: |
dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json
cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json
echo "artifacts uploaded and released successfully"
cat dist-manifest.json
echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
@@ -255,7 +234,23 @@ jobs:
# Overwrite the previous copy
name: artifacts-dist-manifest
path: dist-manifest.json
# Create a GitHub Release while uploading all files to it
# Create a GitHub Release while uploading all files to it
announce:
needs:
- plan
- host
# use "always() && ..." to allow us to wait for all publish jobs while
# still allowing individual publish jobs to skip themselves (for prereleases).
# "host" however must run to completion, no skipping allowed!
if: ${{ always() && needs.host.result == 'success' }}
runs-on: "ubuntu-20.04"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: "Download GitHub Artifacts"
uses: actions/download-artifact@v4
with:
@@ -267,30 +262,10 @@ jobs:
# Remove the granular manifests
rm -f artifacts/*-dist-manifest.json
- name: Create GitHub Release
env:
PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}"
ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}"
ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}"
RELEASE_COMMIT: "${{ github.sha }}"
run: |
# Write and read notes from a file to avoid quoting breaking things
echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt
gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/*
announce:
needs:
- plan
- host
# use "always() && ..." to allow us to wait for all publish jobs while
# still allowing individual publish jobs to skip themselves (for prereleases).
# "host" however must run to completion, no skipping allowed!
if: ${{ always() && needs.host.result == 'success' }}
runs-on: "ubuntu-22.04"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4
uses: ncipollo/release-action@v1
with:
persist-credentials: false
submodules: recursive
tag: ${{ needs.plan.outputs.tag }}
name: ${{ fromJson(needs.host.outputs.val).announcement_title }}
body: ${{ fromJson(needs.host.outputs.val).announcement_github_body }}
prerelease: ${{ fromJson(needs.host.outputs.val).announcement_is_prerelease }}
artifacts: "artifacts/*"
Generated
+65 -704
View File
File diff suppressed because it is too large Load Diff
+16 -25
View File
@@ -1,6 +1,6 @@
[package]
name = "diffutils"
version = "0.5.0"
version = "0.4.2"
edition = "2021"
description = "A CLI app for generating diff files"
license = "MIT OR Apache-2.0"
@@ -23,34 +23,25 @@ same-file = "1.0.6"
unicode-width = "0.2.0"
[dev-dependencies]
assert_cmd = "2.0.14"
divan = { version = "4.3.0", package = "codspeed-divan-compat" }
pretty_assertions = "1.4.0"
assert_cmd = "2.0.14"
predicates = "3.1.0"
rand = "0.10.0"
tempfile = "3.26.0"
tempfile = "3.10.1"
[profile.release]
lto = "thin"
codegen-units = 1
[profile.release-fast]
inherits = "release"
panic = "abort"
# The profile that 'dist' will build with
# The profile that 'cargo dist' will build with
[profile.dist]
inherits = "release"
lto = "thin"
[[bench]]
name = "bench_diffutils"
path = "benches/bench-diffutils.rs"
harness = false
[features]
# default = ["feat_bench_not_diff"]
# Turn bench for diffutils cmp off
feat_bench_not_cmp = []
# Turn bench for diffutils diff off
feat_bench_not_diff = []
# Config for 'cargo dist'
[workspace.metadata.dist]
# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax)
cargo-dist-version = "0.13.3"
# CI backends to support
ci = ["github"]
# The installers to generate for each app
installers = []
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"]
# Publish jobs to run in CI
pr-run-mode = "plan"
-1
View File
@@ -2,7 +2,6 @@
[![Discord](https://img.shields.io/badge/discord-join-7289DA.svg?logo=discord&longCache=true&style=flat)](https://discord.gg/wQVJbvJ)
[![License](http://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/uutils/diffutils/blob/main/LICENSE)
[![dependency status](https://deps.rs/repo/github/uutils/diffutils/status.svg)](https://deps.rs/repo/github/uutils/diffutils)
[![CodSpeed](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/uutils/diffutils?utm_source=badge)
[![CodeCov](https://codecov.io/gh/uutils/diffutils/branch/main/graph/badge.svg)](https://codecov.io/gh/uutils/diffutils)
-377
View File
@@ -1,377 +0,0 @@
// This file is part of the uutils diffutils package.
//
// For the full copyright and license information, please view the LICENSE-*
// files that was distributed with this source code.
//! Benches for all utils in diffutils.
//!
//! There is a file generator included to create files of different sizes for comparison. \
//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \
//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long.
/// Generate test files with these sizes in KB.
const FILE_SIZE_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB];
// const FILE_SIZE_KILO_BYTES: [u64; 3] = [100, 1 * MB, 5 * MB];
// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files
const TEMP_DIR: &str = "";
const NUM_DIFF: u64 = 4;
// just for FILE_SIZE_KILO_BYTES
const MB: u64 = 1_000;
const CHANGE_CHAR: u8 = b'#';
#[cfg(not(feature = "feat_bench_not_cmp"))]
mod diffutils_cmp {
use std::hint::black_box;
use diffutilslib::cmp;
use divan::Bencher;
use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES};
#[divan::bench(args = FILE_SIZE_KILO_BYTES)]
fn cmp_compare_files_equal(bencher: Bencher, kb: u64) {
let (from, to) = get_context().get_test_files_equal(kb);
let cmd = format!("cmp {from} {to}");
let opts = str_to_options(&cmd).into_iter().peekable();
let params = cmp::parse_params(opts).unwrap();
bencher
// .with_inputs(|| prepare::cmp_params_identical_testfiles(lines))
.with_inputs(|| params.clone())
.bench_refs(|params| black_box(cmp::cmp(&params).unwrap()));
}
// bench the actual compare; cmp exits on first difference
#[divan::bench(args = FILE_SIZE_KILO_BYTES)]
fn cmp_compare_files_different(bencher: Bencher, bytes: u64) {
let (from, to) = get_context().get_test_files_different(bytes);
let cmd = format!("cmp {from} {to} -s");
let opts = str_to_options(&cmd).into_iter().peekable();
let params = cmp::parse_params(opts).unwrap();
bencher
// .with_inputs(|| prepare::cmp_params_identical_testfiles(lines))
.with_inputs(|| params.clone())
.bench_refs(|params| black_box(cmp::cmp(&params).unwrap()));
}
// bench original GNU cmp
#[divan::bench(args = FILE_SIZE_KILO_BYTES)]
fn cmd_cmp_gnu_equal(bencher: Bencher, bytes: u64) {
let (from, to) = get_context().get_test_files_equal(bytes);
let args_str = format!("{from} {to}");
bencher
// .with_inputs(|| prepare::cmp_params_identical_testfiles(lines))
.with_inputs(|| args_str.clone())
.bench_refs(|cmd_args| binary::bench_binary("cmp", cmd_args));
}
// bench the compiled release version
#[divan::bench(args = FILE_SIZE_KILO_BYTES)]
fn cmd_cmp_release_equal(bencher: Bencher, bytes: u64) {
let (from, to) = get_context().get_test_files_equal(bytes);
let args_str = format!("cmp {from} {to}");
bencher
// .with_inputs(|| prepare::cmp_params_identical_testfiles(lines))
.with_inputs(|| args_str.clone())
.bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args));
}
}
#[cfg(not(feature = "feat_bench_not_diff"))]
mod diffutils_diff {
// use std::hint::black_box;
use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES};
// use diffutilslib::params;
use divan::Bencher;
// bench the actual compare
// TODO diff does not have a diff function
// #[divan::bench(args = [100_000,10_000])]
// fn diff_compare_files(bencher: Bencher, bytes: u64) {
// let (from, to) = gen_testfiles(lines, 0, "id");
// let cmd = format!("cmp {from} {to}");
// let opts = str_to_options(&cmd).into_iter().peekable();
// let params = params::parse_params(opts).unwrap();
//
// bencher
// // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines))
// .with_inputs(|| params.clone())
// .bench_refs(|params| diff::diff(&params).unwrap());
// }
// bench original GNU diff
#[divan::bench(args = FILE_SIZE_KILO_BYTES)]
fn cmd_diff_gnu_equal(bencher: Bencher, bytes: u64) {
let (from, to) = get_context().get_test_files_equal(bytes);
let args_str = format!("{from} {to}");
bencher
// .with_inputs(|| prepare::cmp_params_identical_testfiles(lines))
.with_inputs(|| args_str.clone())
.bench_refs(|cmd_args| binary::bench_binary("diff", cmd_args));
}
// bench the compiled release version
#[divan::bench(args = FILE_SIZE_KILO_BYTES)]
fn cmd_diff_release_equal(bencher: Bencher, bytes: u64) {
let (from, to) = get_context().get_test_files_equal(bytes);
let args_str = format!("diff {from} {to}");
bencher
// .with_inputs(|| prepare::cmp_params_identical_testfiles(lines))
.with_inputs(|| args_str.clone())
.bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args));
}
}
mod parser {
use std::hint::black_box;
use diffutilslib::{cmp, params};
use divan::Bencher;
use crate::prepare::str_to_options;
// bench the time it takes to parse the command line arguments
#[divan::bench]
fn cmp_parser(bencher: Bencher) {
let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB";
let args = str_to_options(&cmd).into_iter().peekable();
bencher
.with_inputs(|| args.clone())
.bench_values(|data| black_box(cmp::parse_params(data)));
}
// // test the impact on the benchmark if not converting the cmd to Vec<OsString> (doubles for parse)
// #[divan::bench]
// fn cmp_parser_no_prepare() {
// let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB";
// let args = str_to_options(&cmd).into_iter().peekable();
// let _ = cmp::parse_params(args);
// }
// bench the time it takes to parse the command line arguments
#[divan::bench]
fn diff_parser(bencher: Bencher) {
let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100";
let args = str_to_options(&cmd).into_iter().peekable();
bencher
.with_inputs(|| args.clone())
.bench_values(|data| black_box(params::parse_params(data)));
}
}
mod prepare {
use std::{
ffi::OsString,
fs::{self, File},
io::{BufWriter, Write},
path::Path,
sync::OnceLock,
};
use rand::RngExt;
use tempfile::TempDir;
use crate::{CHANGE_CHAR, FILE_SIZE_KILO_BYTES, NUM_DIFF, TEMP_DIR};
// file lines and .txt will be added
const FROM_FILE: &str = "from_file";
const TO_FILE: &str = "to_file";
const LINE_LENGTH: usize = 60;
/// Contains test data (file names) which only needs to be created once.
#[derive(Debug, Default)]
pub struct BenchContext {
pub tmp_dir: Option<TempDir>,
pub dir: String,
pub files_equal: Vec<(String, String)>,
pub files_different: Vec<(String, String)>,
}
impl BenchContext {
pub fn get_path(&self) -> &Path {
match &self.tmp_dir {
Some(tmp) => tmp.path(),
None => Path::new(&self.dir),
}
}
pub fn get_test_files_equal(&self, kb: u64) -> &(String, String) {
let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap();
&self.files_equal[p]
}
#[allow(unused)]
pub fn get_test_files_different(&self, kb: u64) -> &(String, String) {
let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap();
&self.files_different[p]
}
}
// Since each bench function is separate in Divan it is more difficult to dynamically create test data.
// This keeps the TempDir alive until the program exits and generates the files only once.
static SHARED_CONTEXT: OnceLock<BenchContext> = OnceLock::new();
/// Creates the test files once and provides them to all tests.
pub fn get_context() -> &'static BenchContext {
SHARED_CONTEXT.get_or_init(|| {
let mut ctx = BenchContext::default();
if TEMP_DIR.is_empty() {
let tmp_dir = TempDir::new().expect("Failed to create temp dir");
ctx.tmp_dir = Some(tmp_dir);
} else {
// uses current directory, the generated files are kept
let path = Path::new(TEMP_DIR);
if !path.exists() {
fs::create_dir_all(path).expect("Path {path} could not be created");
}
ctx.dir = TEMP_DIR.to_string();
};
// generate test bytes
for kb in FILE_SIZE_KILO_BYTES {
let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq")
.expect("generate_test_files failed");
ctx.files_equal.push(f);
let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df")
.expect("generate_test_files failed");
ctx.files_different.push(f);
}
ctx
})
}
pub fn str_to_options(opt: &str) -> Vec<OsString> {
let s: Vec<OsString> = opt
.split(" ")
.into_iter()
.filter(|s| !s.is_empty())
.map(|s| OsString::from(s))
.collect();
s
}
/// Generates two test files for comparison with <bytes> size.
///
/// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes.
/// If num_differences is set, '#' will be inserted between the first two words of a line,
/// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest.
fn generate_test_files_bytes(
dir: &Path,
bytes: u64,
num_differences: u64,
id: &str,
) -> std::io::Result<(String, String)> {
let id = if id.is_empty() {
"".to_string()
} else {
format!("{id}_")
};
let f1 = format!("{id}{FROM_FILE}_{bytes}.txt");
let f2 = format!("{id}{TO_FILE}_{bytes}.txt");
let from_path = dir.join(f1);
let to_path = dir.join(f2);
generate_file_bytes(&from_path, &to_path, bytes, num_differences)?;
Ok((
from_path.to_string_lossy().to_string(),
to_path.to_string_lossy().to_string(),
))
}
fn generate_file_bytes(
from_name: &Path,
to_name: &Path,
bytes: u64,
num_differences: u64,
) -> std::io::Result<()> {
let file_from = File::create(from_name)?;
let file_to = File::create(to_name)?;
// for int division, lines will be smaller than requested bytes
let n_lines = bytes / LINE_LENGTH as u64;
let change_every_n_lines = if num_differences == 0 {
0
} else {
let c = n_lines / num_differences;
if c == 0 {
1
} else {
c
}
};
// Use a larger 128KB buffer for massive files
let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from);
let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to);
let mut rng = rand::rng();
// Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes
let mut line_buffer = [b' '; 60];
line_buffer[59] = b'\n'; // Set the newline once at the end
for i in (0..n_lines).rev() {
// Fill only the letter positions, skipping spaces and the newline
for word_idx in 0..10 {
let start = word_idx * 6; // Each word + space block is 6 bytes
for i in 0..5 {
line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1);
}
}
// Write the raw bytes directly to both files
writer_from.write_all(&line_buffer)?;
// make changes in the file
if num_differences == 0 {
writer_to.write_all(&line_buffer)?;
} else {
if i % change_every_n_lines == 0 && n_lines - i > 2 {
line_buffer[5] = CHANGE_CHAR;
}
writer_to.write_all(&line_buffer)?;
line_buffer[5] = b' ';
}
}
// create last line
let missing = (bytes - n_lines as u64 * LINE_LENGTH as u64) as usize;
if missing > 0 {
for word_idx in 0..10 {
let start = word_idx * 6; // Each word + space block is 6 bytes
for i in 0..5 {
line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1);
}
}
line_buffer[missing - 1] = b'\n';
writer_from.write_all(&line_buffer[0..missing])?;
writer_to.write_all(&line_buffer[0..missing])?;
}
writer_from.flush()?;
writer_to.flush()?;
Ok(())
}
}
mod binary {
use std::process::Command;
use crate::prepare::str_to_options;
pub fn bench_binary(program: &str, cmd_args: &str) -> std::process::ExitStatus {
let args = str_to_options(cmd_args);
Command::new(program)
.args(args)
.status()
.expect("Failed to execute binary")
}
}
fn main() {
// Run registered benchmarks.
divan::main();
}
-13
View File
@@ -1,13 +0,0 @@
[workspace]
members = ["cargo:."]
# Config for 'dist'
[dist]
# The preferred dist version to use in CI (Cargo.toml SemVer syntax)
cargo-dist-version = "0.30.3"
# CI backends to support
ci = "github"
# The installers to generate for each app
installers = []
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"]
-474
View File
@@ -1,474 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bumpalo"
version = "3.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
[[package]]
name = "cc"
version = "1.2.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
dependencies = [
"find-msvc-tools",
"jobserver",
"libc",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "chrono"
version = "0.4.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
]
[[package]]
name = "const_format"
version = "0.2.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad"
dependencies = [
"const_format_proc_macros",
]
[[package]]
name = "const_format_proc_macros"
version = "0.2.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "diffutils"
version = "0.5.0"
dependencies = [
"chrono",
"const_format",
"diff",
"itoa",
"regex",
"same-file",
"unicode-width",
]
[[package]]
name = "find-msvc-tools"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
[[package]]
name = "getrandom"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasip2",
]
[[package]]
name = "iana-time-zone"
version = "0.1.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "itoa"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "jobserver"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [
"getrandom",
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.178"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
[[package]]
name = "libfuzzer-sys"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d"
dependencies = [
"arbitrary",
"cc",
]
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "proc-macro2"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "syn"
version = "2.0.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "unicode-xid"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "unified-diff-fuzz"
version = "0.0.0"
dependencies = [
"diffutils",
"libfuzzer-sys",
]
[[package]]
name = "wasip2"
version = "1.0.1+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40"
dependencies = [
"bumpalo",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4"
dependencies = [
"unicode-ident",
]
[[package]]
name = "winapi-util"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-core"
version = "0.62.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-result"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]
[[package]]
name = "wit-bindgen"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
+1 -5
View File
@@ -47,8 +47,4 @@ path = "fuzz_targets/fuzz_ed.rs"
test = false
doc = false
[[bin]]
name = "fuzz_side"
path = "fuzz_targets/fuzz_side.rs"
test = false
doc = false
-42
View File
@@ -1,42 +0,0 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
use diffutilslib::side_diff;
use std::fs::File;
use std::io::Write;
use diffutilslib::params::Params;
fuzz_target!(|x: (Vec<u8>, Vec<u8>, /* usize, usize */ bool)| {
let (original, new, /* width, tabsize, */ expand) = x;
// if width == 0 || tabsize == 0 {
// return;
// }
let params = Params {
// width,
// tabsize,
expand_tabs: expand,
..Default::default()
};
let mut output_buf = vec![];
side_diff::diff(&original, &new, &mut output_buf, &params);
File::create("target/fuzz.file.original")
.unwrap()
.write_all(&original)
.unwrap();
File::create("target/fuzz.file.new")
.unwrap()
.write_all(&new)
.unwrap();
File::create("target/fuzz.file")
.unwrap()
.write_all(&original)
.unwrap();
File::create("target/fuzz.diff")
.unwrap()
.write_all(&output_buf)
.unwrap();
});
+160 -186
View File
@@ -9,7 +9,7 @@ use std::ffi::OsString;
use std::io::{BufRead, BufReader, BufWriter, Read, Write};
use std::iter::Peekable;
use std::process::ExitCode;
use std::{cmp, fs, io};
use std::{fs, io};
#[cfg(not(target_os = "windows"))]
use std::os::fd::{AsRawFd, FromRawFd};
@@ -35,7 +35,7 @@ pub struct Params {
#[inline]
fn usage_string(executable: &str) -> String {
format!("Usage: {executable} <from> <to>")
format!("Usage: {} <from> <to>", executable)
}
#[cfg(not(target_os = "windows"))]
@@ -75,7 +75,8 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX,
Err(_) => {
return Err(format!(
"{executable_str}: invalid --ignore-initial value '{skip_desc}'"
"{}: invalid --ignore-initial value '{}'",
executable_str, skip_desc
))
}
};
@@ -90,29 +91,20 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
"M" => 1_048_576,
"GB" => 1_000_000_000,
"G" => 1_073_741_824,
// This only generates a warning when compiling for target_pointer_width < 64
#[allow(unused_variables)]
suffix @ ("TB" | "T" | "PB" | "P" | "EB" | "E") => {
#[cfg(target_pointer_width = "64")]
match suffix {
"TB" => 1_000_000_000_000,
"T" => 1_099_511_627_776,
"PB" => 1_000_000_000_000_000,
"P" => 1_125_899_906_842_624,
"EB" => 1_000_000_000_000_000_000,
"E" => 1_152_921_504_606_846_976,
_ => unreachable!(),
}
#[cfg(not(target_pointer_width = "64"))]
usize::MAX
}
"TB" => 1_000_000_000_000,
"T" => 1_099_511_627_776,
"PB" => 1_000_000_000_000_000,
"P" => 1_125_899_906_842_624,
"EB" => 1_000_000_000_000_000_000,
"E" => 1_152_921_504_606_846_976,
"ZB" => usize::MAX, // 1_000_000_000_000_000_000_000,
"Z" => usize::MAX, // 1_180_591_620_717_411_303_424,
"YB" => usize::MAX, // 1_000_000_000_000_000_000_000_000,
"Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176,
_ => {
return Err(format!(
"{executable_str}: invalid --ignore-initial value '{skip_desc}'"
"{}: invalid --ignore-initial value '{}'",
executable_str, skip_desc
));
}
};
@@ -178,7 +170,8 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX,
Err(_) => {
return Err(format!(
"{executable_str}: invalid --bytes value '{max_bytes}'"
"{}: invalid --bytes value '{}'",
executable_str, max_bytes
))
}
};
@@ -217,7 +210,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
std::process::exit(0);
}
if param_str.starts_with('-') {
return Err(format!("unrecognized option: {param:?}"));
return Err(format!("Unknown option: {:?}", param));
}
if from.is_none() {
from = Some(param);
@@ -243,7 +236,8 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
if params.quiet && params.verbose {
return Err(format!(
"{executable_str}: options -l and -s are incompatible"
"{}: options -l and -s are incompatible",
executable_str
));
}
@@ -326,35 +320,10 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
let mut from = prepare_reader(&params.from, &params.skip_a, params)?;
let mut to = prepare_reader(&params.to, &params.skip_b, params)?;
let mut offset_width = params.max_bytes.unwrap_or(usize::MAX);
if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(&params.from), fs::metadata(&params.to)) {
#[cfg(not(target_os = "windows"))]
let (a_size, b_size) = (a_meta.size(), b_meta.size());
#[cfg(target_os = "windows")]
let (a_size, b_size) = (a_meta.file_size(), b_meta.file_size());
// If the files have different sizes, we already know they are not identical. If we have not
// been asked to show even the first difference, we can quit early.
if params.quiet && a_size != b_size {
return Ok(Cmp::Different);
}
let smaller = cmp::min(a_size, b_size) as usize;
offset_width = cmp::min(smaller, offset_width);
}
let offset_width = 1 + offset_width.checked_ilog10().unwrap_or(1) as usize;
// Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces
let mut output = Vec::<u8>::with_capacity(offset_width + 3 * 2 + 4 * 2 + 4);
let mut at_byte = 1;
let mut at_line = 1;
let mut start_of_line = true;
let mut stdout = BufWriter::new(io::stdout().lock());
let mut compare = Cmp::Equal;
let mut verbose_diffs = vec![];
loop {
// Fill up our buffers.
let from_buf = match from.fill_buf() {
@@ -391,6 +360,10 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
&params.to.to_string_lossy()
};
if params.verbose {
report_verbose_diffs(verbose_diffs, params)?;
}
report_eof(at_byte, at_line, start_of_line, eof_on, params);
return Ok(Cmp::Different);
}
@@ -422,24 +395,8 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
// first one runs out.
for (&from_byte, &to_byte) in from_buf.iter().zip(to_buf.iter()) {
if from_byte != to_byte {
compare = Cmp::Different;
if params.verbose {
format_verbose_difference(
from_byte,
to_byte,
at_byte,
offset_width,
&mut output,
params,
)?;
stdout.write_all(output.as_slice()).map_err(|e| {
format!(
"{}: error printing output: {e}",
params.executable.to_string_lossy()
)
})?;
output.clear();
verbose_diffs.push((at_byte, from_byte, to_byte));
} else {
report_difference(from_byte, to_byte, at_byte, at_line, params);
return Ok(Cmp::Different);
@@ -465,7 +422,12 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
to.consume(consumed);
}
Ok(compare)
if params.verbose && !verbose_diffs.is_empty() {
report_verbose_diffs(verbose_diffs, params)?;
return Ok(Cmp::Different);
}
Ok(Cmp::Equal)
}
// Exit codes are documented at
@@ -488,6 +450,21 @@ pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
return ExitCode::SUCCESS;
}
// If the files have different sizes, we already know they are not identical. If we have not
// been asked to show even the first difference, we can quit early.
if params.quiet {
if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(&params.from), fs::metadata(&params.to)) {
#[cfg(not(target_os = "windows"))]
if a_meta.size() != b_meta.size() {
return ExitCode::from(1);
}
#[cfg(target_os = "windows")]
if a_meta.file_size() != b_meta.file_size() {
return ExitCode::from(1);
}
}
}
match cmp(&params) {
Ok(Cmp::Equal) => ExitCode::SUCCESS,
Ok(Cmp::Different) => ExitCode::from(1),
@@ -500,6 +477,12 @@ pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
}
}
#[inline]
fn is_ascii_printable(byte: u8) -> bool {
let c = byte as char;
c.is_ascii() && !c.is_ascii_control()
}
#[inline]
fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str {
*buf = [b' ', b' ', b'0'];
@@ -519,136 +502,130 @@ fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str {
}
#[inline]
fn write_visible_byte(output: &mut Vec<u8>, byte: u8) -> usize {
match byte {
// Control characters: ^@, ^A, ..., ^_
0..=31 => {
output.push(b'^');
output.push(byte + 64);
2
fn format_byte(byte: u8) -> String {
let mut byte = byte;
let mut quoted = vec![];
if !is_ascii_printable(byte) {
if byte >= 128 {
quoted.push(b'M');
quoted.push(b'-');
byte -= 128;
}
// Printable ASCII (space through ~)
32..=126 => {
output.push(byte);
1
}
// DEL: ^?
127 => {
output.extend_from_slice(b"^?");
2
}
// High bytes with control equivalents: M-^@, M-^A, ..., M-^_
128..=159 => {
output.push(b'M');
output.push(b'-');
output.push(b'^');
output.push(byte - 64);
4
}
// High bytes: M-<space>, M-!, ..., M-~
160..=254 => {
output.push(b'M');
output.push(b'-');
output.push(byte - 128);
3
}
// Byte 255: M-^?
255 => {
output.extend_from_slice(b"M-^?");
4
if byte < 32 {
quoted.push(b'^');
byte += 64;
} else if byte == 127 {
quoted.push(b'^');
byte = b'?';
}
assert!((byte as char).is_ascii());
}
}
/// Writes a byte in visible form with right-padding to 4 spaces.
#[inline]
fn write_visible_byte_padded(output: &mut Vec<u8>, byte: u8) {
const SPACES: &[u8] = b" ";
const WIDTH: usize = SPACES.len();
quoted.push(byte);
let display_width = write_visible_byte(output, byte);
// Add right-padding spaces
let padding = WIDTH.saturating_sub(display_width);
output.extend_from_slice(&SPACES[..padding]);
}
/// Formats a byte as a visible string (for non-performance-critical path)
#[inline]
fn format_visible_byte(byte: u8) -> String {
let mut result = Vec::with_capacity(4);
write_visible_byte(&mut result, byte);
// SAFETY: the checks and shifts in write_visible_byte match what cat and GNU
// SAFETY: the checks and shifts we do above match what cat and GNU
// cmp do to ensure characters fall inside the ascii range.
unsafe { String::from_utf8_unchecked(result) }
unsafe { String::from_utf8_unchecked(quoted) }
}
// This function has been optimized to not use the Rust fmt system, which
// leads to a massive speed up when processing large files: cuts the time
// for comparing 2 ~36MB completely different files in half on an M1 Max.
#[inline]
fn format_verbose_difference(
from_byte: u8,
to_byte: u8,
at_byte: usize,
offset_width: usize,
output: &mut Vec<u8>,
params: &Params,
) -> Result<(), String> {
fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<(), String> {
assert!(!params.quiet);
let mut at_byte_buf = itoa::Buffer::new();
let mut from_oct = [0u8; 3]; // for octal conversions
let mut to_oct = [0u8; 3];
let mut stdout = BufWriter::new(io::stdout().lock());
if let Some((offset, _, _)) = diffs.last() {
// Obtain the width of the first column from the last byte offset.
let width = format!("{}", offset).len();
if params.print_bytes {
// "{:>width$} {:>3o} {:4} {:>3o} {}",
let at_byte_str = at_byte_buf.format(at_byte);
let at_byte_padding = offset_width.saturating_sub(at_byte_str.len());
let mut at_byte_buf = itoa::Buffer::new();
let mut from_oct = [0u8; 3]; // for octal conversions
let mut to_oct = [0u8; 3];
for _ in 0..at_byte_padding {
output.push(b' ')
// Capacity calc: at_byte width + 2 x 3-byte octal numbers + 4-byte value + up to 2 byte value + 4 spaces
let mut output = Vec::<u8>::with_capacity(width + 3 * 2 + 4 + 2 + 4);
if params.print_bytes {
for (at_byte, from_byte, to_byte) in diffs {
output.clear();
// "{:>width$} {:>3o} {:4} {:>3o} {}",
let at_byte_str = at_byte_buf.format(at_byte);
let at_byte_padding = width - at_byte_str.len();
for _ in 0..at_byte_padding {
output.push(b' ')
}
output.extend_from_slice(at_byte_str.as_bytes());
output.push(b' ');
output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes());
output.push(b' ');
let from_byte_str = format_byte(from_byte);
let from_byte_padding = 4 - from_byte_str.len();
output.extend_from_slice(from_byte_str.as_bytes());
for _ in 0..from_byte_padding {
output.push(b' ')
}
output.push(b' ');
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
output.push(b' ');
output.extend_from_slice(format_byte(to_byte).as_bytes());
output.push(b'\n');
stdout.write_all(output.as_slice()).map_err(|e| {
format!(
"{}: error printing output: {e}",
params.executable.to_string_lossy()
)
})?;
}
} else {
for (at_byte, from_byte, to_byte) in diffs {
output.clear();
// "{:>width$} {:>3o} {:>3o}"
let at_byte_str = at_byte_buf.format(at_byte);
let at_byte_padding = width - at_byte_str.len();
for _ in 0..at_byte_padding {
output.push(b' ')
}
output.extend_from_slice(at_byte_str.as_bytes());
output.push(b' ');
output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes());
output.push(b' ');
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
output.push(b'\n');
stdout.write_all(output.as_slice()).map_err(|e| {
format!(
"{}: error printing output: {e}",
params.executable.to_string_lossy()
)
})?;
}
}
output.extend_from_slice(at_byte_str.as_bytes());
output.push(b' ');
output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes());
output.push(b' ');
write_visible_byte_padded(output, from_byte);
output.push(b' ');
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
output.push(b' ');
write_visible_byte(output, to_byte);
output.push(b'\n');
} else {
// "{:>width$} {:>3o} {:>3o}"
let at_byte_str = at_byte_buf.format(at_byte);
let at_byte_padding = offset_width - at_byte_str.len();
for _ in 0..at_byte_padding {
output.push(b' ')
}
output.extend_from_slice(at_byte_str.as_bytes());
output.push(b' ');
output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes());
output.push(b' ');
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
output.push(b'\n');
}
Ok(())
@@ -729,9 +706,9 @@ fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize,
print!(
" is {:>3o} {:char_width$} {:>3o} {:char_width$}",
from_byte,
format_visible_byte(from_byte),
format_byte(from_byte),
to_byte,
format_visible_byte(to_byte)
format_byte(to_byte)
);
}
println!();
@@ -1085,9 +1062,6 @@ mod tests {
from: os("foo"),
to: os("bar"),
skip_a: Some(1_000_000_000),
#[cfg(target_pointer_width = "32")]
skip_b: Some((2_147_483_647.5 * 2.0) as usize),
#[cfg(target_pointer_width = "64")]
skip_b: Some(1_152_921_504_606_846_976 * 2),
..Default::default()
}),
+2 -6
View File
@@ -5,11 +5,11 @@
use crate::params::{parse_params, Format};
use crate::utils::report_failure_to_read_input_file;
use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff};
use crate::{context_diff, ed_diff, normal_diff, unified_diff};
use std::env::ArgsOs;
use std::ffi::OsString;
use std::fs;
use std::io::{self, stdout, Read, Write};
use std::io::{self, Read, Write};
use std::iter::Peekable;
use std::process::{exit, ExitCode};
@@ -79,10 +79,6 @@ pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
eprintln!("{error}");
exit(2);
}),
Format::SideBySide => {
let mut output = stdout().lock();
side_diff::diff(&from_content, &to_content, &mut output, &params)
}
};
if params.brief && !result.is_empty() {
println!(
-2
View File
@@ -4,7 +4,6 @@ pub mod ed_diff;
pub mod macros;
pub mod normal_diff;
pub mod params;
pub mod side_diff;
pub mod unified_diff;
pub mod utils;
@@ -12,5 +11,4 @@ pub mod utils;
pub use context_diff::diff as context_diff;
pub use ed_diff::diff as ed_diff;
pub use normal_diff::diff as normal_diff;
pub use side_diff::diff as side_by_side_diff;
pub use unified_diff::diff as unified_diff;
+1 -2
View File
@@ -18,7 +18,6 @@ mod ed_diff;
mod macros;
mod normal_diff;
mod params;
mod side_diff;
mod unified_diff;
mod utils;
@@ -73,7 +72,7 @@ fn main() -> ExitCode {
Some("diff") => diff::main(args),
Some("cmp") => cmp::main(args),
Some(name) => {
eprintln!("{name}: utility not supported");
eprintln!("{}: utility not supported", name);
ExitCode::from(2)
}
None => second_arg_error(exe_name),
+26 -61
View File
@@ -11,7 +11,6 @@ pub enum Format {
Unified,
Context,
Ed,
SideBySide,
}
#[derive(Clone, Debug, Eq, PartialEq)]
@@ -25,7 +24,6 @@ pub struct Params {
pub brief: bool,
pub expand_tabs: bool,
pub tabsize: usize,
pub width: usize,
}
impl Default for Params {
@@ -40,7 +38,6 @@ impl Default for Params {
brief: false,
expand_tabs: false,
tabsize: 8,
width: 130,
}
}
}
@@ -60,7 +57,6 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
let mut format = None;
let mut context = None;
let tabsize_re = Regex::new(r"^--tabsize=(?<num>\d+)$").unwrap();
let width_re = Regex::new(r"--width=(?P<long>\d+)$").unwrap();
while let Some(param) = opts.next() {
let next_param = opts.peek();
if param == "--" {
@@ -105,34 +101,6 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
format = Some(Format::Ed);
continue;
}
if param == "-y" || param == "--side-by-side" {
if format.is_some() && format != Some(Format::SideBySide) {
return Err("Conflicting output style option".to_string());
}
format = Some(Format::SideBySide);
continue;
}
if width_re.is_match(param.to_string_lossy().as_ref()) {
let param = param.into_string().unwrap();
let width_str: &str = width_re
.captures(param.as_str())
.unwrap()
.name("long")
.unwrap()
.as_str();
params.width = match width_str.parse::<usize>() {
Ok(num) => {
if num == 0 {
return Err("invalid width «0»".to_string());
}
num
}
Err(_) => return Err(format!("invalid width «{width_str}»")),
};
continue;
}
if tabsize_re.is_match(param.to_string_lossy().as_ref()) {
// Because param matches the regular expression,
// it is safe to assume it is valid UTF-8.
@@ -144,16 +112,9 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
.unwrap()
.as_str();
params.tabsize = match tabsize_str.parse::<usize>() {
Ok(num) => {
if num == 0 {
return Err("invalid tabsize «0»".to_string());
}
num
}
Ok(num) => num,
Err(_) => return Err(format!("invalid tabsize «{tabsize_str}»")),
};
continue;
}
match match_context_diff_params(&param, next_param, format) {
@@ -195,7 +156,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
Err(error) => return Err(error),
}
if param.to_string_lossy().starts_with('-') {
return Err(format!("unrecognized option: {param:?}"));
return Err(format!("Unknown option: {:?}", param));
}
if from.is_none() {
from = Some(param);
@@ -279,15 +240,17 @@ fn match_context_diff_params(
context_count = Some(numvalue.as_str().parse::<usize>().unwrap());
}
}
if param == "-C" {
if let Some(p) = next_param {
let size_str = p.to_string_lossy();
match size_str.parse::<usize>() {
Ok(context_size) => {
context_count = Some(context_size);
next_param_consumed = true;
}
Err(_) => return Err(format!("invalid context length '{size_str}'")),
if param == "-C" && next_param.is_some() {
match next_param.unwrap().to_string_lossy().parse::<usize>() {
Ok(context_size) => {
context_count = Some(context_size);
next_param_consumed = true;
}
Err(_) => {
return Err(format!(
"invalid context length '{}'",
next_param.unwrap().to_string_lossy()
))
}
}
}
@@ -323,15 +286,17 @@ fn match_unified_diff_params(
context_count = Some(numvalue.as_str().parse::<usize>().unwrap());
}
}
if param == "-U" {
if let Some(p) = next_param {
let size_str = p.to_string_lossy();
match size_str.parse::<usize>() {
Ok(context_size) => {
context_count = Some(context_size);
next_param_consumed = true;
}
Err(_) => return Err(format!("invalid context length '{size_str}'")),
if param == "-U" && next_param.is_some() {
match next_param.unwrap().to_string_lossy().parse::<usize>() {
Ok(context_size) => {
context_count = Some(context_size);
next_param_consumed = true;
}
Err(_) => {
return Err(format!(
"invalid context length '{}'",
next_param.unwrap().to_string_lossy()
))
}
}
}
@@ -739,11 +704,11 @@ mod tests {
executable: os("diff"),
from: os("foo"),
to: os("bar"),
tabsize: 1,
tabsize: 0,
..Default::default()
}),
parse_params(
[os("diff"), os("--tabsize=1"), os("foo"), os("bar")]
[os("diff"), os("--tabsize=0"), os("foo"), os("bar")]
.iter()
.cloned()
.peekable()
-1263
View File
File diff suppressed because it is too large Load Diff
+2 -1
View File
@@ -3,8 +3,9 @@
// For the full copyright and license information, please view the LICENSE-*
// files that was distributed with this source code.
use regex::Regex;
use std::{ffi::OsString, io::Write};
use regex::Regex;
use unicode_width::UnicodeWidthStr;
/// Replace tabs by spaces in the input line.
+56 -76
View File
@@ -3,11 +3,9 @@
// For the full copyright and license information, please view the LICENSE-*
// files that was distributed with this source code.
use assert_cmd::cargo::cargo_bin_cmd;
use assert_cmd::cmd::Command;
use predicates::prelude::*;
use std::fs::File;
#[cfg(not(windows))]
use std::fs::OpenOptions;
use std::fs::{File, OpenOptions};
use std::io::Write;
use tempfile::{tempdir, NamedTempFile};
@@ -17,14 +15,14 @@ mod common {
#[test]
fn unknown_param() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("patch");
cmd.assert()
.code(predicate::eq(2))
.failure()
.stderr(predicate::eq("patch: utility not supported\n"));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.assert()
.code(predicate::eq(0))
.success()
@@ -33,15 +31,13 @@ mod common {
));
for subcmd in ["diff", "cmp"] {
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg(subcmd);
cmd.arg("--foobar");
cmd.assert()
.code(predicate::eq(2))
.failure()
.stderr(predicate::str::starts_with(
"unrecognized option: \"--foobar\"",
));
.stderr(predicate::str::starts_with("Unknown option: \"--foobar\""));
}
Ok(())
}
@@ -60,7 +56,7 @@ mod common {
let error_message = "The system cannot find the file specified.";
for subcmd in ["diff", "cmp"] {
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg(subcmd);
cmd.arg(&nopath).arg(file.path());
cmd.assert()
@@ -71,7 +67,7 @@ mod common {
&nopath.as_os_str().to_string_lossy()
)));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg(subcmd);
cmd.arg(file.path()).arg(&nopath);
cmd.assert()
@@ -83,7 +79,7 @@ mod common {
)));
}
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
cmd.arg(&nopath).arg(&nopath);
cmd.assert().code(predicate::eq(2)).failure().stderr(
@@ -107,7 +103,7 @@ mod diff {
fn no_differences() -> Result<(), Box<dyn std::error::Error>> {
let file = NamedTempFile::new()?;
for option in ["", "-u", "-c", "-e"] {
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
if !option.is_empty() {
cmd.arg(option);
@@ -127,7 +123,7 @@ mod diff {
let mut file1 = NamedTempFile::new()?;
file1.write_all("foo\n".as_bytes())?;
for option in ["", "-u", "-c", "-e"] {
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
if !option.is_empty() {
cmd.arg(option);
@@ -146,7 +142,7 @@ mod diff {
let mut file2 = NamedTempFile::new()?;
file2.write_all("foo\n".as_bytes())?;
for option in ["", "-u", "-c", "-e"] {
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
if !option.is_empty() {
cmd.arg(option);
@@ -171,7 +167,7 @@ mod diff {
let mut file2 = NamedTempFile::new()?;
file2.write_all("bar\n".as_bytes())?;
for option in ["", "-u", "-c", "-e"] {
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
if !option.is_empty() {
cmd.arg(option);
@@ -192,7 +188,7 @@ mod diff {
let mut file2 = NamedTempFile::new()?;
file2.write_all("bar\n".as_bytes())?;
for option in ["", "-u", "-c", "-e"] {
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
if !option.is_empty() {
cmd.arg(option);
@@ -216,7 +212,7 @@ mod diff {
file1.write_all("foo".as_bytes())?;
let mut file2 = NamedTempFile::new()?;
file2.write_all("bar".as_bytes())?;
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
cmd.arg("-e").arg(file1.path()).arg(file2.path());
cmd.assert()
@@ -233,7 +229,7 @@ mod diff {
let mut file2 = NamedTempFile::new()?;
file2.write_all("bar\n".as_bytes())?;
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
cmd.arg("-u")
.arg(file1.path())
@@ -250,7 +246,7 @@ mod diff {
)
);
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
cmd.arg("-u")
.arg("-")
@@ -267,7 +263,7 @@ mod diff {
)
);
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
cmd.arg("-u").arg("-").arg("-");
cmd.assert()
@@ -277,7 +273,7 @@ mod diff {
#[cfg(unix)]
{
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
cmd.arg("-u")
.arg(file1.path())
@@ -313,7 +309,7 @@ mod diff {
let mut da = File::create(&da_path).unwrap();
da.write_all(b"da\n").unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
cmd.arg("-u").arg(&directory).arg(&a_path);
cmd.assert().code(predicate::eq(1)).failure();
@@ -328,7 +324,7 @@ mod diff {
)
);
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("diff");
cmd.arg("-u").arg(&a_path).arg(&directory);
cmd.assert().code(predicate::eq(1)).failure();
@@ -352,7 +348,7 @@ mod cmp {
#[test]
fn cmp_incompatible_params() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg("-l");
cmd.arg("-s");
@@ -375,7 +371,7 @@ mod cmp {
let mut a = File::create(&a_path).unwrap();
a.write_all(b"a\n").unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg(&a_path);
cmd.write_stdin("a\n");
@@ -385,7 +381,7 @@ mod cmp {
.stderr(predicate::str::is_empty())
.stdout(predicate::str::is_empty());
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg(&a_path);
@@ -411,7 +407,7 @@ mod cmp {
let mut b = File::create(&b_path).unwrap();
b.write_all(b"a\n").unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg(&a_path).arg(&b_path);
cmd.assert()
@@ -434,7 +430,7 @@ mod cmp {
let b_path = tmp_dir.path().join("b");
let _ = File::create(&b_path).unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg(&a_path).arg(&b_path);
cmd.assert()
@@ -458,7 +454,7 @@ mod cmp {
let mut b = File::create(&b_path).unwrap();
b.write_all(b"bcd\n").unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg(&a_path).arg(&b_path);
@@ -467,7 +463,7 @@ mod cmp {
.failure()
.stdout(predicate::str::ends_with(" differ: char 1, line 1\n"));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg("-b");
@@ -480,7 +476,7 @@ mod cmp {
" differ: byte 1, line 1 is 141 a 142 b\n",
));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg("-l");
@@ -491,7 +487,7 @@ mod cmp {
.stderr(predicate::str::is_empty())
.stdout(predicate::eq("1 141 142\n2 142 143\n3 143 144\n"));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg("-l");
@@ -520,7 +516,7 @@ mod cmp {
let mut b = File::create(&b_path).unwrap();
b.write_all(b"abc\ndef\ng").unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg(&a_path).arg(&b_path);
@@ -530,7 +526,7 @@ mod cmp {
.stderr(predicate::str::is_empty())
.stdout(predicate::str::ends_with(" differ: char 8, line 2\n"));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg("-b");
@@ -543,7 +539,7 @@ mod cmp {
" differ: byte 8, line 2 is 147 g 12 ^J\n",
));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg("-l");
@@ -555,7 +551,7 @@ mod cmp {
.stderr(predicate::str::contains(" EOF on"))
.stderr(predicate::str::ends_with(" after byte 8\n"));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg("-b");
@@ -583,7 +579,7 @@ mod cmp {
let mut b = File::create(&b_path).unwrap();
b.write_all(b"abcdefghijkl\n").unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg("-l");
cmd.arg("-b");
@@ -596,7 +592,7 @@ mod cmp {
.stderr(predicate::str::is_empty())
.stdout(predicate::str::is_empty());
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg("-l");
cmd.arg("-b");
@@ -609,7 +605,7 @@ mod cmp {
.stderr(predicate::str::is_empty())
.stdout(predicate::eq("4 40 144 d\n"));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg("-l");
cmd.arg("-b");
@@ -620,7 +616,7 @@ mod cmp {
.code(predicate::eq(1))
.failure()
.stderr(predicate::str::is_empty())
.stdout(predicate::eq(" 4 40 144 d\n 8 40 150 h\n"));
.stdout(predicate::eq("4 40 144 d\n8 40 150 h\n"));
Ok(())
}
@@ -636,7 +632,7 @@ mod cmp {
let mut b = File::create(&b_path).unwrap();
b.write_all(b"###abc\n").unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg("-i");
@@ -649,7 +645,7 @@ mod cmp {
.stdout(predicate::str::is_empty());
// Positional skips should be ignored
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg("-i");
@@ -663,7 +659,7 @@ mod cmp {
.stdout(predicate::str::is_empty());
// Single positional argument should only affect first file.
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg(&a_path).arg(&b_path);
@@ -674,7 +670,7 @@ mod cmp {
.stderr(predicate::str::is_empty())
.stdout(predicate::str::ends_with(" differ: char 1, line 1\n"));
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.env("LC_ALL", "C");
cmd.arg("cmp");
cmd.arg(&a_path).arg(&b_path);
@@ -695,15 +691,15 @@ mod cmp {
let a_path = tmp_dir.path().join("a");
let mut a = File::create(&a_path).unwrap();
writeln!(a, "{}c", "a".repeat(1024)).unwrap();
write!(a, "{}c\n", "a".repeat(1024)).unwrap();
a.flush().unwrap();
let b_path = tmp_dir.path().join("b");
let mut b = File::create(&b_path).unwrap();
writeln!(b, "{}c", "b".repeat(1024)).unwrap();
write!(b, "{}c\n", "b".repeat(1024)).unwrap();
b.flush().unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg("--ignore-initial=1K");
cmd.arg(&a_path).arg(&b_path);
@@ -728,7 +724,7 @@ mod cmp {
let mut b = File::create(&b_path).unwrap();
b.write_all(b"abcdefghijkl\n").unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg("-l");
cmd.arg("-b");
@@ -741,7 +737,7 @@ mod cmp {
.stderr(predicate::str::is_empty())
.stdout(predicate::str::is_empty());
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg("-b");
cmd.arg("-i");
@@ -774,7 +770,7 @@ mod cmp {
let mut b = File::create(&b_path).unwrap();
b.write_all(&bytes).unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg("-l");
cmd.arg("-b");
@@ -819,7 +815,7 @@ mod cmp {
let dev_null = OpenOptions::new().write(true).open("/dev/null").unwrap();
let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin!("diffutils"))
let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin("diffutils"))
.arg("cmp")
.arg(&a_path)
.arg(&b_path)
@@ -827,27 +823,12 @@ mod cmp {
.spawn()
.unwrap();
// Bound the runtime to a very short time that still allows for some resource
// constraint to slow it down while also allowing very fast systems to exit as
// early as possible.
const MAX_TRIES: u8 = 50;
for tries in 0..=MAX_TRIES {
if tries == MAX_TRIES {
panic!("cmp took too long to run, /dev/null optimization probably not working")
}
match child.try_wait() {
Ok(Some(status)) => {
assert_eq!(status.code(), Some(1));
break;
}
Ok(None) => (),
Err(e) => panic!("{e:#?}"),
}
std::thread::sleep(std::time::Duration::from_millis(10));
}
std::thread::sleep(std::time::Duration::from_millis(100));
assert_eq!(child.try_wait().unwrap().unwrap().code(), Some(1));
// Two stdins should be equal
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg("-");
cmd.arg("-");
@@ -870,18 +851,17 @@ mod cmp {
let a_path = tmp_dir.path().join("a");
let mut a = File::create(&a_path).unwrap();
a.write_all(bytes).unwrap();
a.write_all(&bytes).unwrap();
a.write_all(b"A").unwrap();
let b_path = tmp_dir.path().join("b");
let mut b = File::create(&b_path).unwrap();
b.write_all(bytes).unwrap();
b.write_all(&bytes).unwrap();
b.write_all(b"B").unwrap();
let mut cmd = cargo_bin_cmd!("diffutils");
let mut cmd = Command::cargo_bin("diffutils")?;
cmd.arg("cmp");
cmd.arg(&a_path).arg(&b_path);
cmd.env("LC_ALL", "en_US");
cmd.assert()
.code(predicate::eq(1))
.failure()