From 9f0dd68f0a4d915c86a8fc64ad7f06aa2b426dcb Mon Sep 17 00:00:00 2001 From: Krysztal Huang Date: Wed, 26 Jun 2024 21:51:18 +0800 Subject: [PATCH] pgrep: Fix review --- .gitignore | 1 + src/uu/pgrep/src/pgrep.rs | 237 ++++++++++++++++-------------------- src/uu/pgrep/src/process.rs | 33 +++-- tests/by-util/test_pgrep.rs | 12 +- 4 files changed, 133 insertions(+), 150 deletions(-) diff --git a/.gitignore b/.gitignore index eb5a316..9e0202d 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ target +/.vscode/ \ No newline at end of file diff --git a/src/uu/pgrep/src/pgrep.rs b/src/uu/pgrep/src/pgrep.rs index 99c4031..558f52d 100644 --- a/src/uu/pgrep/src/pgrep.rs +++ b/src/uu/pgrep/src/pgrep.rs @@ -9,7 +9,7 @@ pub mod process; use clap::{arg, crate_version, Arg, ArgAction, ArgGroup, ArgMatches, Command}; use process::{walk_process, ProcessInformation, TerminalType}; use regex::Regex; -use std::{borrow::BorrowMut, cmp::Ordering, collections::HashSet, sync::OnceLock}; +use std::{collections::HashSet, sync::OnceLock}; use uucore::{ error::{UResult, USimpleError}, format_usage, help_about, help_usage, @@ -57,22 +57,19 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Verifying regex pattern // And put it into static `REGEX` - if matches.get_flag("exact") { - let regex = Regex::new(pattern.first().unwrap()) - .map_err(|e| USimpleError::new(2, e.to_string()))?; - REGEX.set(regex).unwrap(); - } + let pattern = if matches.get_flag("exact") { + format!("^{}$", pattern.first().unwrap()) + } else { + pattern.first().unwrap_or(&String::new()).to_string() + }; + REGEX + .set(Regex::new(&pattern).map_err(|e| USimpleError::new(2, e.to_string()))?) + .unwrap(); // Collect pids - let pids = collect_matched_pids(&matches); - - // Filtering pids - let pids = if flag_newest || flag_oldest { - let arg_older = matches.get_one::("older").unwrap_or(&0); - // `-o` and `-n` flag will be processed after pids collected - filter_oldest_newest(pids, flag_newest, *arg_older) - } else { - pids + let pids = { + let mut pids = collect_matched_pids(&matches); + process_flag_o_n(&matches, &mut pids) }; if pids.is_empty() { @@ -128,150 +125,120 @@ fn collect_arg_patterns(matches: &ArgMatches) -> Vec { /// Collect pids with filter construct from command line arguments fn collect_matched_pids(matches: &ArgMatches) -> Vec { - let binding = String::from(""); - let pattern = matches.get_one::("pattern").unwrap_or(&binding); - let should_inverse = matches.get_flag("inverse"); let should_ignore_case = matches.get_flag("ignore-case"); let flag_full = matches.get_flag("full"); let flag_exact = matches.get_flag("exact"); - let evaluate = |mut pid: ProcessInformation| { - let binding = pid.status(); - let name = binding.get("Name")?; + // Filtration general parameters + let filtered: Vec = { + let mut tmp_vec = Vec::new(); - // Process flag `--ignore-case` - let (name, pattern) = if should_ignore_case { - (name.to_lowercase(), pattern.to_lowercase()) - } else { - (name.into(), pattern.into()) - }; + for mut pid in walk_process().collect::>() { + let run_state_matched = + match (matches.get_one::("runstates"), (pid).run_state()) { + (Some(arg_run_states), Ok(pid_state)) => { + arg_run_states.contains(&pid_state.to_string()) + } + _ => true, + }; - // Process flag `--full` && `--exact` - let name_matched = if flag_exact { - // Equals `Name` in /proc//status - // The `unwrap` operation must succeed - // because the REGEX has been verified as correct in `uumain`. - REGEX.get().unwrap().is_match(&name) - } else if flag_full { - // Equals `cmdline` in /proc//cmdline - pid.cmdline.contains(&pattern) - } else { - name.contains(&pattern) - }; - - // Process flag `--terminal` - let tty_matched = if let Some(ttys) = matches.get_many::("terminal") { - // convert from input like `pts/0` - let ttys = ttys - .cloned() - .flat_map(TerminalType::try_from) - .collect::>(); - - if let Ok(value) = pid.ttys() { - value.iter().any(|it| ttys.contains(it)) + let binding = pid.status(); + let name = binding.get("Name").unwrap(); + let name = if should_ignore_case { + name.to_lowercase() } else { - false - } - } else { - true - }; + name.into() + }; + let pattern_matched = { + let want = if flag_exact { + // Equals `Name` in /proc//status + // The `unwrap` operation must succeed + // because the REGEX has been verified as correct in `uumain`. + &name + } else if flag_full { + // Equals `cmdline` in /proc//cmdline + &pid.cmdline + } else { + // From manpage: + // The process name used for matching is limited to the 15 characters present in the output of /proc/pid/stat. + &pid.inner_stat()[..15] + }; - // Process arg `--runstates` - // After testing, I found that GNU implementation - // just matching the char in the argument string. - let runstate_matched = if let (Some(arg_runstates), Ok(pid_state)) = - (matches.get_one::("runstates"), pid.run_state()) - { - arg_runstates.contains(&pid_state.to_string()) - } else { - true - }; + REGEX.get().unwrap().is_match(want) + }; - if (name_matched && tty_matched && runstate_matched) ^ should_inverse { - Some(pid) - } else { - None - } - }; + let tty_matched = match matches.get_many::("terminal") { + Some(ttys) => { + // convert from input like `pts/0` + let ttys = ttys + .cloned() + .flat_map(TerminalType::try_from) + .collect::>(); + match pid.ttys() { + Ok(value) => value.iter().any(|it| ttys.contains(it)), + Err(_) => false, + } + } + None => true, + }; - let mut result = Vec::new(); + let arg_older = matches.get_one::("older").unwrap_or(&0); + let older_matched = pid.start_time().unwrap() >= *arg_older; - for pid in walk_process() { - if let Some(pid) = evaluate(pid) { - result.push(pid.clone()) - } - } - - result -} - -// Make -o and -n as a group of args -fn filter_oldest_newest( - pids: Vec, - flag_newest: bool, - arg_older: u64, -) -> Vec { - let mut pids = { - let mut tmp_vec = Vec::with_capacity(pids.len()); - for mut pid in pids { - if pid.start_time().unwrap() >= arg_older { + if (run_state_matched && pattern_matched && tty_matched && older_matched) + ^ should_inverse + { tmp_vec.push(pid) } } tmp_vec }; - pids.sort_by(|a, b| { - if let (Ok(b), Ok(a)) = ( - b.to_owned().borrow_mut().start_time(), - a.to_owned().borrow_mut().start_time(), - ) { - b.cmp(&a) - } else { - Ordering::Equal - } - }); + filtered +} - let mut entry = if flag_newest { - pids.first() - } else { - pids.last() - } - .cloned() - .unwrap(); +/// Sorting pids for flag `-o` and `-n`. +/// +/// This function can also be used as a filter to filter out process information. +fn process_flag_o_n( + matches: &ArgMatches, + pids: &mut [ProcessInformation], +) -> Vec { + let flag_oldest = matches.get_flag("oldest"); + let flag_newest = matches.get_flag("newest"); - // Sort pid by start time. - // - // This closure will second sort the collecting result because there - // might be some process start at the same time, and then pick the first of the result. - let second_sort = |start_time: u64| { - let mut result = pids - .into_iter() - .filter(|it| (*it).to_owned().borrow_mut().start_time().is_ok()) - .filter(move |it| (*it).to_owned().borrow_mut().start_time().unwrap() == start_time) - .collect::>(); - - result.sort_by(|a, b| { - if let (Ok(b), Ok(a)) = ( - (*b).to_owned().borrow_mut().start_time(), - (*a).to_owned().borrow_mut().start_time(), - ) { - b.cmp(&a) - } else { - Ordering::Equal - } + if flag_oldest || flag_newest { + pids.sort_by(|a, b| { + b.clone() + .start_time() + .unwrap() + .cmp(&a.clone().start_time().unwrap()) }); - result - }; + let start_time = if flag_newest { + pids.first().cloned().unwrap().start_time().unwrap() + } else { + pids.last().cloned().unwrap().start_time().unwrap() + }; - vec![second_sort(entry.start_time().unwrap()) - .first() - .unwrap() - .clone() - .clone()] + // There might be some process start at same time, so need to be filtered. + let mut filtered = pids + .iter() + .filter(|it| (*it).clone().start_time().unwrap() == start_time) + .collect::>(); + + if flag_newest { + filtered.sort_by(|a, b| b.pid.cmp(&a.pid)) + } else { + filtered.sort_by(|a, b| a.pid.cmp(&b.pid)) + } + + vec![filtered.first().cloned().unwrap().clone()] + } else { + pids.to_vec() + } } pub fn uu_app() -> Command { diff --git a/src/uu/pgrep/src/process.rs b/src/uu/pgrep/src/process.rs index f543b39..d78d08a 100644 --- a/src/uu/pgrep/src/process.rs +++ b/src/uu/pgrep/src/process.rs @@ -217,6 +217,14 @@ impl ProcessInformation { }) } + pub fn inner_status(&self) -> &str { + &self.inner_status + } + + pub fn inner_stat(&self) -> &str { + &self.inner_stat + } + /// Collect information from `/proc//status` file pub fn status(&mut self) -> Rc> { if let Some(c) = &self.cached_status { @@ -236,16 +244,16 @@ impl ProcessInformation { } /// Collect information from `/proc//stat` file - fn stat(&mut self) -> Result>, io::Error> { + fn stat(&mut self) -> Rc> { if let Some(c) = &self.cached_stat { - return Ok(Rc::clone(c)); + return Rc::clone(c); } let result: Vec<_> = stat_split(&self.inner_stat); let result = Rc::new(result); self.cached_stat = Some(Rc::clone(&result)); - Ok(Rc::clone(&result)) + Rc::clone(&result) } /// Fetch start time from [ProcessInformation::cached_stat] @@ -259,7 +267,7 @@ impl ProcessInformation { // Kernel doc: https://docs.kernel.org/filesystems/proc.html#process-specific-subdirectories // Table 1-4 let time = self - .stat()? + .stat() .get(21) .ok_or(io::ErrorKind::InvalidData)? .parse::() @@ -270,14 +278,18 @@ impl ProcessInformation { Ok(time) } - /// Fetch start time from [ProcessInformation::cached_stat] + /// Fetch run state from [ProcessInformation::cached_stat] /// /// - [The /proc Filesystem: Table 1-4](https://docs.kernel.org/filesystems/proc.html#id10) + /// + /// # Error + /// + /// If parsing failed, this function will return [io::ErrorKind::InvalidInput] pub fn run_state(&mut self) -> Result { - RunState::try_from(self.stat()?.get(2).unwrap().as_str()) + RunState::try_from(self.stat().get(2).unwrap().as_str()) } - /// This function will scan the `/proc//df` directory + /// This function will scan the `/proc//fd` directory /// /// # Error /// @@ -400,7 +412,6 @@ mod tests { #[test] fn test_run_state_conversion() { - assert_eq!(RunState::try_from("R").unwrap(), RunState::Running); assert_eq!(RunState::try_from("R").unwrap(), RunState::Running); assert_eq!(RunState::try_from("S").unwrap(), RunState::Sleeping); assert_eq!( @@ -456,13 +467,13 @@ mod tests { #[test] fn test_stat_split() { - let case="32 (idle_inject/3) S 2 0 0 0 -1 69238848 0 0 0 0 0 0 0 0 -51 0 1 0 34 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 3 50 1 0 0 0 0 0 0 0 0 0 0 0"; + let case = "32 (idle_inject/3) S 2 0 0 0 -1 69238848 0 0 0 0 0 0 0 0 -51 0 1 0 34 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 3 50 1 0 0 0 0 0 0 0 0 0 0 0"; assert!(stat_split(case)[1] == "idle_inject/3"); - let case ="3508 (sh) S 3478 3478 3478 0 -1 4194304 67 0 0 0 0 0 0 0 20 0 1 0 11911 2961408 238 18446744073709551615 94340156948480 94340157028757 140736274114368 0 0 0 0 4096 65538 1 0 0 17 8 0 0 0 0 0 94340157054704 94340157059616 94340163108864 140736274122780 140736274122976 140736274122976 140736274124784 0"; + let case = "3508 (sh) S 3478 3478 3478 0 -1 4194304 67 0 0 0 0 0 0 0 20 0 1 0 11911 2961408 238 18446744073709551615 94340156948480 94340157028757 140736274114368 0 0 0 0 4096 65538 1 0 0 17 8 0 0 0 0 0 94340157054704 94340157059616 94340163108864 140736274122780 140736274122976 140736274122976 140736274124784 0"; assert!(stat_split(case)[1] == "sh"); - let case="47246 (kworker /10:1-events) I 2 0 0 0 -1 69238880 0 0 0 0 17 29 0 0 20 0 1 0 1396260 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 10 0 0 0 0 0 0 0 0 0 0 0 0 0"; + let case = "47246 (kworker /10:1-events) I 2 0 0 0 -1 69238880 0 0 0 0 17 29 0 0 20 0 1 0 1396260 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 10 0 0 0 0 0 0 0 0 0 0 0 0 0"; assert!(stat_split(case)[1] == "kworker /10:1-events"); } } diff --git a/tests/by-util/test_pgrep.rs b/tests/by-util/test_pgrep.rs index c8d5635..a5fbe31 100644 --- a/tests/by-util/test_pgrep.rs +++ b/tests/by-util/test_pgrep.rs @@ -45,6 +45,7 @@ fn test_full() { #[cfg(target_os = "linux")] fn test_invalid_regex() { new_ucmd!().arg("{(*").arg("--exact").fails().code_is(2); + new_ucmd!().arg("{(*").fails().code_is(2); } #[test] @@ -55,15 +56,18 @@ fn test_valid_regex() { .arg("--exact") .fails() .code_is(1); + new_ucmd!().arg("a*").succeeds(); } #[cfg(target_os = "linux")] #[test] fn test_delimiter() { - let binding = new_ucmd!().arg("sh").arg("-d |").succeeds(); - let output = binding.code_is(0).stdout_str(); - - assert!(output.contains('|')) + new_ucmd!() + .arg("sh") + .arg("-d |") + .succeeds() + .code_is(0) + .stdout_contains("|"); } #[test]