From ea9376aaafc2ae596c3c95d4e10efd2cb2b9f74a Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Wed, 15 Sep 2021 19:06:14 -0700 Subject: [PATCH] Add ed-formatted diff --- .gitignore | 1 + Cargo.toml | 18 +- bin/diffutils/Cargo.toml | 17 + bin/{diff => diffutils}/main.rs | 4 + bin/{diff => diffutils}/params.rs | 19 ++ lib/context-diff/Cargo.toml | 4 - lib/ed-diff/Cargo.toml | 11 + lib/ed-diff/fuzz/.gitignore | 4 + lib/ed-diff/fuzz/Cargo.toml | 26 ++ lib/ed-diff/fuzz/fuzz_targets/fuzz_ed.rs | 55 ++++ lib/ed-diff/src/lib.rs | 382 +++++++++++++++++++++++ lib/normal-diff/Cargo.toml | 4 - lib/normal-diff/fuzz/Cargo.toml | 1 - lib/unified-diff/Cargo.toml | 4 - lib/unified-diff/fuzz/Cargo.toml | 1 - 15 files changed, 521 insertions(+), 30 deletions(-) create mode 100644 bin/diffutils/Cargo.toml rename bin/{diff => diffutils}/main.rs (92%) rename bin/{diff => diffutils}/params.rs (92%) create mode 100644 lib/ed-diff/Cargo.toml create mode 100644 lib/ed-diff/fuzz/.gitignore create mode 100644 lib/ed-diff/fuzz/Cargo.toml create mode 100644 lib/ed-diff/fuzz/fuzz_targets/fuzz_ed.rs create mode 100644 lib/ed-diff/src/lib.rs diff --git a/.gitignore b/.gitignore index e90ad7d..3991d07 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /target /lib/normal-diff/target +/lib/ed-diff/target /lib/context-diff/target /lib/unified-diff/target Cargo.lock diff --git a/Cargo.toml b/Cargo.toml index 359e342..f2c907e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,20 +3,6 @@ members = [ "lib/unified-diff", "lib/context-diff", "lib/normal-diff", - "bin/diff", + "lib/ed-diff", + "bin/diffutils", ] - -[package] -name = "diffutils" -version = "0.3.0" -authors = ["Michael Howell "] -edition = "2018" - -[dependencies] -context-diff = { path = "lib/context-diff", version = "0.3.0" } -normal-diff = { path = "lib/normal-diff", version = "0.3.0" } -unified-diff = { path = "lib/unified-diff", version = "0.3.0" } - -[[bin]] -name = "diffutils" -path = "bin/main.rs" diff --git a/bin/diffutils/Cargo.toml b/bin/diffutils/Cargo.toml new file mode 100644 index 0000000..a152cf3 --- /dev/null +++ b/bin/diffutils/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "diffutils" +version = "0.3.0" +edition = "2018" +description = "A CLI app for generating diff files" +license = "MIT OR Apache-2.0" +repository = "https://github.com/notriddle/diffutils" + +[[bin]] +name = "diffutils" +path = "main.rs" + +[dependencies] +unified-diff = { path = "../../lib/unified-diff/" } +context-diff = { path = "../../lib/context-diff/" } +normal-diff = { path = "../../lib/normal-diff/" } +ed-diff = { path = "../../lib/ed-diff/" } diff --git a/bin/diff/main.rs b/bin/diffutils/main.rs similarity index 92% rename from bin/diff/main.rs rename to bin/diffutils/main.rs index cc36661..d90dfc8 100644 --- a/bin/diff/main.rs +++ b/bin/diffutils/main.rs @@ -44,6 +44,10 @@ fn main() -> Result<(), String> { &to.to_string_lossy(), context_count, ), + Format::Ed => ed_diff::diff( + &from_content, + &to_content, + )?, }; io::stdout().write_all(&result).unwrap(); Ok(()) diff --git a/bin/diff/params.rs b/bin/diffutils/params.rs similarity index 92% rename from bin/diff/params.rs rename to bin/diffutils/params.rs index f44aeeb..1c0dcf1 100644 --- a/bin/diff/params.rs +++ b/bin/diffutils/params.rs @@ -5,6 +5,7 @@ pub enum Format { Normal, Unified, Context, + Ed, } #[cfg(unix)] @@ -73,6 +74,12 @@ pub fn parse_params>(opts: I) -> Result { + if format.is_some() && format != Some(Format::Ed) { + return Err(format!("Conflicting output style options")); + } + format = Some(Format::Ed); + } b'u' => { if format.is_some() && format != Some(Format::Unified) { return Err(format!("Conflicting output style options")); @@ -151,6 +158,18 @@ mod tests { ); } #[test] + fn basics_ed() { + assert_eq!( + Ok(Params { + from: os("foo"), + to: os("bar"), + format: Format::Ed, + context_count: 3, + }), + parse_params([os("diff"), os("-e"), os("foo"), os("bar")].iter().cloned()) + ); + } + #[test] fn context_count() { assert_eq!( Ok(Params { diff --git a/lib/context-diff/Cargo.toml b/lib/context-diff/Cargo.toml index 3b0ae82..4c8f714 100644 --- a/lib/context-diff/Cargo.toml +++ b/lib/context-diff/Cargo.toml @@ -1,10 +1,6 @@ [package] name = "context-diff" version = "0.3.0" -authors = [ - "Michael Howell ", - "The Rust Project Developers" -] edition = "2018" description = "An implementation of the GNU unified diff format" license = "MIT OR Apache-2.0" diff --git a/lib/ed-diff/Cargo.toml b/lib/ed-diff/Cargo.toml new file mode 100644 index 0000000..0b2e90d --- /dev/null +++ b/lib/ed-diff/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "ed-diff" +version = "0.3.0" +edition = "2018" +description = "An implementation of the GNU unified diff format" +license = "MIT OR Apache-2.0" +repository = "https://github.com/notriddle/diffutils" +exclude = [ "fuzz" ] + +[dependencies] +diff = "0.1.10" diff --git a/lib/ed-diff/fuzz/.gitignore b/lib/ed-diff/fuzz/.gitignore new file mode 100644 index 0000000..572e03b --- /dev/null +++ b/lib/ed-diff/fuzz/.gitignore @@ -0,0 +1,4 @@ + +target +corpus +artifacts diff --git a/lib/ed-diff/fuzz/Cargo.toml b/lib/ed-diff/fuzz/Cargo.toml new file mode 100644 index 0000000..2f3460d --- /dev/null +++ b/lib/ed-diff/fuzz/Cargo.toml @@ -0,0 +1,26 @@ + +[package] +name = "ed-diff-fuzz" +version = "0.0.0" +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.3" + +[dependencies.ed-diff] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "fuzz_ed" +path = "fuzz_targets/fuzz_ed.rs" +test = false +doc = false + diff --git a/lib/ed-diff/fuzz/fuzz_targets/fuzz_ed.rs b/lib/ed-diff/fuzz/fuzz_targets/fuzz_ed.rs new file mode 100644 index 0000000..febd1f7 --- /dev/null +++ b/lib/ed-diff/fuzz/fuzz_targets/fuzz_ed.rs @@ -0,0 +1,55 @@ +#![no_main] +#[macro_use] extern crate libfuzzer_sys; +extern crate ed_diff; + +use std::fs::{self, File}; +use std::io::Write; +use std::process::Command; + +fuzz_target!(|x: (Vec, Vec)| { + let (mut from, mut to) = x; + from.push(b'\n'); + to.push(b'\n'); + if let Ok(s) = String::from_utf8(from.clone()) { + if !s.is_ascii() { return } + if s.find(|x| x < ' ' && x != '\n').is_some() { return } + } else { + return + } + if let Ok(s) = String::from_utf8(to.clone()) { + if !s.is_ascii() { return } + if s.find(|x| x < ' ' && x != '\n').is_some() { return } + } else { + return + } + let diff = ed_diff::diff_w(&from, &to, "target/fuzz.file").unwrap(); + File::create("target/fuzz.file.original") + .unwrap() + .write_all(&from) + .unwrap(); + File::create("target/fuzz.file.expected") + .unwrap() + .write_all(&to) + .unwrap(); + File::create("target/fuzz.file") + .unwrap() + .write_all(&from) + .unwrap(); + File::create("target/fuzz.ed") + .unwrap() + .write_all(&diff) + .unwrap(); + let output = Command::new("ed") + .arg("target/fuzz.file") + .stdin(File::open("target/fuzz.ed").unwrap()) + .output() + .unwrap(); + if !output.status.success() { + panic!("STDOUT:\n{}\nSTDERR:\n{}", String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr)); + } + let result = fs::read("target/fuzz.file").unwrap(); + if result != to { + panic!("STDOUT:\n{}\nSTDERR:\n{}", String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr)); + } +}); + diff --git a/lib/ed-diff/src/lib.rs b/lib/ed-diff/src/lib.rs new file mode 100644 index 0000000..7bc8b2c --- /dev/null +++ b/lib/ed-diff/src/lib.rs @@ -0,0 +1,382 @@ +use std::io::Write; + +#[derive(Debug, PartialEq)] +struct Mismatch { + pub line_number_expected: usize, + pub line_number_actual: usize, + pub expected: Vec>, + pub actual: Vec>, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum DiffError { + MissingNL, +} + +impl std::fmt::Display for DiffError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + std::fmt::Display::fmt("No newline at end of file", f) + } +} + +impl From for String { + fn from(_: DiffError) -> String { + "No newline at end of file".into() + } +} + +impl Mismatch { + fn new(line_number_expected: usize, line_number_actual: usize) -> Mismatch { + Mismatch { + line_number_expected, + line_number_actual, + expected: Vec::new(), + actual: Vec::new(), + } + } +} + +// Produces a diff between the expected output and actual output. +fn make_diff(expected: &[u8], actual: &[u8]) -> Result, DiffError> { + let mut line_number_expected = 1; + let mut line_number_actual = 1; + let mut results = Vec::new(); + let mut mismatch = Mismatch::new(line_number_expected, line_number_actual); + + let mut expected_lines: Vec<&[u8]> = expected.split(|&c| c == b'\n').collect(); + let mut actual_lines: Vec<&[u8]> = actual.split(|&c| c == b'\n').collect(); + + debug_assert_eq!(b"".split(|&c| c == b'\n').count(), 1); + // ^ means that underflow here is impossible + let expected_lines_count = expected_lines.len() - 1; + let actual_lines_count = actual_lines.len() - 1; + + if expected_lines.last() == Some(&&b""[..]) { + expected_lines.pop(); + } else { + return Err(DiffError::MissingNL); + } + + if actual_lines.last() == Some(&&b""[..]) { + actual_lines.pop(); + } else { + return Err(DiffError::MissingNL); + } + + for result in diff::slice(&expected_lines, &actual_lines) { + match result { + diff::Result::Left(str) => { + if mismatch.actual.len() != 0 { + results.push(mismatch); + mismatch = Mismatch::new(line_number_expected, line_number_actual); + } + mismatch.expected.push(str.to_vec()); + line_number_expected += 1; + } + diff::Result::Right(str) => { + mismatch.actual.push(str.to_vec()); + line_number_actual += 1; + } + diff::Result::Both(str, _) => { + line_number_expected += 1; + line_number_actual += 1; + if mismatch.actual.len() != 0 || mismatch.expected.len() != 0 { + results.push(mismatch); + mismatch = Mismatch::new(line_number_expected, line_number_actual); + } else { + mismatch.line_number_expected = line_number_expected; + mismatch.line_number_actual = line_number_actual; + } + } + } + } + + if mismatch.actual.len() != 0 || mismatch.expected.len() != 0 { + results.push(mismatch); + } + + Ok(results) +} + +pub fn diff(expected: &[u8], actual: &[u8]) -> Result, DiffError> { + let mut output = Vec::new(); + let diff_results = make_diff(expected, actual)?; + let mut lines_offset = 0; + for result in diff_results { + let line_number_expected: isize = result.line_number_expected as isize + lines_offset; + let line_number_actual: isize = result.line_number_actual as isize + lines_offset; + let expected_count: isize = result.expected.len() as isize; + let actual_count: isize = result.actual.len() as isize; + match (expected_count, actual_count) { + (0, 0) => unreachable!(), + (0, _) => writeln!( + &mut output, + "{}a", + line_number_expected - 1 + ) + .unwrap(), + (_, 0) => writeln!( + &mut output, + "{},{}d", + line_number_expected, + expected_count + line_number_expected - 1 + ) + .unwrap(), + _ => writeln!( + &mut output, + "{},{}c", + line_number_expected, + expected_count + line_number_expected - 1 + ) + .unwrap(), + } + lines_offset += actual_count - expected_count; + if actual_count != 0 { + for actual in &result.actual { + if actual == b"." { + writeln!(&mut output, "..\n.\ns/.//\na").unwrap(); + } else { + output.write_all(actual).unwrap(); + writeln!(&mut output, "").unwrap(); + } + } + writeln!(&mut output, ".").unwrap(); + } + } + return Ok(output) +} + +pub fn diff_w(expected: &[u8], actual: &[u8], filename: &str) -> Result, DiffError> { + let mut output = diff(expected, actual)?; + writeln!(&mut output, "w {}", filename).unwrap(); + Ok(output) +} + +#[test] +fn test_permutations() { + // test all possible six-line files. + let _ = std::fs::create_dir("target"); + for &a in &[0, 1, 2] { + for &b in &[0, 1, 2] { + for &c in &[0, 1, 2] { + for &d in &[0, 1, 2] { + for &e in &[0, 1, 2] { + for &f in &[0, 1, 2] { + use std::fs::{self, File}; + use std::io::Write; + use std::process::Command; + let mut alef = Vec::new(); + let mut bet = Vec::new(); + alef.write_all(if a == 0 { b"a\n" } else { b"b\n" }) + .unwrap(); + if a != 2 { + bet.write_all(b"b\n").unwrap(); + } + alef.write_all(if b == 0 { b"c\n" } else { b"d\n" }) + .unwrap(); + if b != 2 { + bet.write_all(b"d\n").unwrap(); + } + alef.write_all(if c == 0 { b"e\n" } else { b"f\n" }) + .unwrap(); + if c != 2 { + bet.write_all(b"f\n").unwrap(); + } + alef.write_all(if d == 0 { b"g\n" } else { b"h\n" }) + .unwrap(); + if d != 2 { + bet.write_all(b"h\n").unwrap(); + } + alef.write_all(if e == 0 { b"i\n" } else { b"j\n" }) + .unwrap(); + if e != 2 { + bet.write_all(b"j\n").unwrap(); + } + alef.write_all(if f == 0 { b"k\n" } else { b"l\n" }) + .unwrap(); + if f != 2 { + bet.write_all(b"l\n").unwrap(); + } + // This test diff is intentionally reversed. + // We want it to turn the alef into bet. + let diff = diff_w(&alef, &bet, "target/alef").unwrap(); + File::create("target/ab.ed") + .unwrap() + .write_all(&diff) + .unwrap(); + let mut fa = File::create("target/alef").unwrap(); + fa.write_all(&alef[..]).unwrap(); + let mut fb = File::create("target/bet").unwrap(); + fb.write_all(&bet[..]).unwrap(); + let _ = fa; + let _ = fb; + let output = Command::new("ed") + .arg("target/alef") + .stdin(File::open("target/ab.ed").unwrap()) + .output() + .unwrap(); + if !output.status.success() { + panic!("{:?}", output); + } + //println!("{}", String::from_utf8_lossy(&output.stdout)); + //println!("{}", String::from_utf8_lossy(&output.stderr)); + let alef = fs::read("target/alef").unwrap(); + assert_eq!(alef, bet); + } + } + } + } + } + } +} + + +#[test] +fn test_permutations_empty_lines() { + // test all possible six-line files with missing newlines. + let _ = std::fs::create_dir("target"); + for &a in &[0, 1, 2] { + for &b in &[0, 1, 2] { + for &c in &[0, 1, 2] { + for &d in &[0, 1, 2] { + for &e in &[0, 1, 2] { + for &f in &[0, 1, 2] { + use std::fs::{self, File}; + use std::io::Write; + use std::process::Command; + let mut alef = Vec::new(); + let mut bet = Vec::new(); + alef.write_all(if a == 0 { b"\n" } else { b"b\n" }).unwrap(); + if a != 2 { + bet.write_all(b"b\n").unwrap(); + } + alef.write_all(if b == 0 { b"\n" } else { b"d\n" }).unwrap(); + if b != 2 { + bet.write_all(b"d\n").unwrap(); + } + alef.write_all(if c == 0 { b"\n" } else { b"f\n" }).unwrap(); + if c != 2 { + bet.write_all(b"f\n").unwrap(); + } + alef.write_all(if d == 0 { b"\n" } else { b"h\n" }).unwrap(); + if d != 2 { + bet.write_all(b"h\n").unwrap(); + } + alef.write_all(if e == 0 { b"\n" } else { b"j\n" }).unwrap(); + if e != 2 { + bet.write_all(b"j\n").unwrap(); + } + alef.write_all(if f == 0 { b"\n" } else { b"l\n" }).unwrap(); + if f != 2 { + bet.write_all(b"l\n").unwrap(); + } + // This test diff is intentionally reversed. + // We want it to turn the alef into bet. + let diff = diff_w(&alef, &bet, "target/alef_").unwrap(); + File::create("target/ab_.ed") + .unwrap() + .write_all(&diff) + .unwrap(); + let mut fa = File::create("target/alef_").unwrap(); + fa.write_all(&alef[..]).unwrap(); + let mut fb = File::create("target/bet_").unwrap(); + fb.write_all(&bet[..]).unwrap(); + let _ = fa; + let _ = fb; + let output = Command::new("ed") + .arg("target/alef_") + .stdin(File::open("target/ab_.ed").unwrap()) + .output() + .unwrap(); + if !output.status.success() { + panic!("{:?}", output); + } + //println!("{}", String::from_utf8_lossy(&output.stdout)); + //println!("{}", String::from_utf8_lossy(&output.stderr)); + let alef = fs::read("target/alef_").unwrap(); + assert_eq!(alef, bet); + } + } + } + } + } + } +} + +#[test] +fn test_permutations_reverse() { + // test all possible six-line files. + let _ = std::fs::create_dir("target"); + for &a in &[0, 1, 2] { + for &b in &[0, 1, 2] { + for &c in &[0, 1, 2] { + for &d in &[0, 1, 2] { + for &e in &[0, 1, 2] { + for &f in &[0, 1, 2] { + use std::fs::{self, File}; + use std::io::Write; + use std::process::Command; + let mut alef = Vec::new(); + let mut bet = Vec::new(); + alef.write_all(if a == 0 { b"a\n" } else { b"f\n" }) + .unwrap(); + if a != 2 { + bet.write_all(b"a\n").unwrap(); + } + alef.write_all(if b == 0 { b"b\n" } else { b"e\n" }) + .unwrap(); + if b != 2 { + bet.write_all(b"b\n").unwrap(); + } + alef.write_all(if c == 0 { b"c\n" } else { b"d\n" }) + .unwrap(); + if c != 2 { + bet.write_all(b"c\n").unwrap(); + } + alef.write_all(if d == 0 { b"d\n" } else { b"c\n" }) + .unwrap(); + if d != 2 { + bet.write_all(b"d\n").unwrap(); + } + alef.write_all(if e == 0 { b"e\n" } else { b"b\n" }) + .unwrap(); + if e != 2 { + bet.write_all(b"e\n").unwrap(); + } + alef.write_all(if f == 0 { b"f\n" } else { b"a\n" }) + .unwrap(); + if f != 2 { + bet.write_all(b"f\n").unwrap(); + } + // This test diff is intentionally reversed. + // We want it to turn the alef into bet. + let diff = diff_w(&alef, &bet, "target/alefr").unwrap(); + File::create("target/abr.ed") + .unwrap() + .write_all(&diff) + .unwrap(); + let mut fa = File::create("target/alefr").unwrap(); + fa.write_all(&alef[..]).unwrap(); + let mut fb = File::create("target/betr").unwrap(); + fb.write_all(&bet[..]).unwrap(); + let _ = fa; + let _ = fb; + let output = Command::new("ed") + .arg("target/alefr") + .stdin(File::open("target/abr.ed").unwrap()) + .output() + .unwrap(); + if !output.status.success() { + panic!("{:?}", output); + } + //println!("{}", String::from_utf8_lossy(&output.stdout)); + //println!("{}", String::from_utf8_lossy(&output.stderr)); + let alef = fs::read("target/alefr").unwrap(); + assert_eq!(alef, bet); + } + } + } + } + } + } +} diff --git a/lib/normal-diff/Cargo.toml b/lib/normal-diff/Cargo.toml index ae73c3d..f5add71 100644 --- a/lib/normal-diff/Cargo.toml +++ b/lib/normal-diff/Cargo.toml @@ -1,10 +1,6 @@ [package] name = "normal-diff" version = "0.3.0" -authors = [ - "Michael Howell ", - "The Rust Project Developers" -] edition = "2018" description = "An implementation of the GNU unified diff format" license = "MIT OR Apache-2.0" diff --git a/lib/normal-diff/fuzz/Cargo.toml b/lib/normal-diff/fuzz/Cargo.toml index 03fd540..00f0553 100644 --- a/lib/normal-diff/fuzz/Cargo.toml +++ b/lib/normal-diff/fuzz/Cargo.toml @@ -2,7 +2,6 @@ [package] name = "normal-diff-fuzz" version = "0.0.0" -authors = ["Automatically generated"] publish = false edition = "2018" diff --git a/lib/unified-diff/Cargo.toml b/lib/unified-diff/Cargo.toml index 42df1e2..fa4f693 100644 --- a/lib/unified-diff/Cargo.toml +++ b/lib/unified-diff/Cargo.toml @@ -1,10 +1,6 @@ [package] name = "unified-diff" version = "0.3.0" -authors = [ - "Michael Howell ", - "The Rust Project Developers" -] edition = "2018" description = "An implementation of the GNU unified diff format" license = "MIT OR Apache-2.0" diff --git a/lib/unified-diff/fuzz/Cargo.toml b/lib/unified-diff/fuzz/Cargo.toml index 3b9e70f..5c6aafb 100644 --- a/lib/unified-diff/fuzz/Cargo.toml +++ b/lib/unified-diff/fuzz/Cargo.toml @@ -2,7 +2,6 @@ [package] name = "unified-diff-fuzz" version = "0.0.0" -authors = ["Automatically generated"] publish = false edition = "2018"