From e59d96fc470c8e9da8ee677e6faa9bcbad14b38c Mon Sep 17 00:00:00 2001 From: kqito <29191111+kqito@users.noreply.github.com> Date: Thu, 2 Jan 2025 00:47:51 +0900 Subject: [PATCH] fix match system --- Cargo.lock | 52 ++++++++++++++++++++++ Cargo.toml | 1 + README.md | 1 - src/args.rs | 11 ++--- src/grep/finder.rs | 57 ++++++++++++------------ src/grep/mod.rs | 101 ++++++++++++++++++++++++------------------ src/grep/params.rs | 37 ++++++++++++---- src/grep/tests/mod.rs | 78 ++++++++++++-------------------- src/main.rs | 4 +- 9 files changed, 202 insertions(+), 140 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c387d8..27a4fd7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,12 +59,43 @@ dependencies = [ "winapi", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "diff" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "grepath" version = "0.0.7" @@ -73,6 +104,7 @@ dependencies = [ "argh", "atty", "pretty_assertions", + "rayon", "regex", "thiserror", "walkdir", @@ -127,6 +159,26 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "regex" version = "1.10.6" diff --git a/Cargo.toml b/Cargo.toml index cb422f8..e3782a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ thiserror = "1.0.61" argh = "0.1.10" atty = "0.2.14" walkdir = "2" +rayon = "1.10.0" [dev-dependencies] pretty_assertions = "1.4.0" diff --git a/README.md b/README.md index fcfe4f3..60bf634 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,6 @@ Positional Arguments: Options: -d, --debug help - -u, --unique unique Omit duplicate paths --help display usage information ``` diff --git a/src/args.rs b/src/args.rs index 3b923f2..54fe2b6 100644 --- a/src/args.rs +++ b/src/args.rs @@ -14,11 +14,6 @@ pub struct Args { /// help pub debug: Option, - #[argh(switch, short = 'u')] - /// unique - /// Omit duplicate paths - pub unique: Option, - #[argh(option, short = 'c')] /// current_dir /// Set the current directory @@ -30,6 +25,12 @@ pub struct Args { /// ignore /// Ignore pattern pub ignore: Vec, + + #[argh(option, short = 't')] + /// filetype + /// Filter by type: file (f), directory (d/dir), symlink (l) + /// Default: file (f) + pub filetype: Vec, } pub fn get_input() -> Result> { diff --git a/src/grep/finder.rs b/src/grep/finder.rs index 7805d06..43a67e4 100644 --- a/src/grep/finder.rs +++ b/src/grep/finder.rs @@ -2,22 +2,18 @@ use regex::Regex; use std::fmt::Debug; use walkdir::{DirEntry, WalkDir}; +use super::params::Filetype; + #[derive(Debug, Clone)] pub struct IOFinder { pub current_dir: String, pub ignore_pattern: Vec, } -#[derive(Debug, Clone)] -pub enum ResourceType { - Directory, - File, -} - #[derive(Debug, Clone)] pub struct Resource { pub path: String, - pub resource_type: ResourceType, + pub filetype: Filetype, } #[derive(Debug, Clone)] @@ -25,7 +21,7 @@ pub struct Stats { pub resources: Vec, } -pub trait Finder: FinderClone + Debug { +pub trait Finder: FinderClone + Debug + Sync + Send { fn current_dir(&mut self, current_dir: &str); fn ignore(&mut self, ignore: Vec); fn find(&mut self) -> Stats; @@ -87,12 +83,20 @@ impl Finder for IOFinder { for entry in walker.filter_entry(|e| !self.is_ignored(e)) { if let Ok(entry) = entry { + let path_str = entry.path().to_str().unwrap(); + // omit "./" prefix + let path = if path_str.starts_with("./") { + path_str[2..].to_string() + } else { + path_str.to_string() + }; + resources.push(Resource { - path: entry.path().to_str().unwrap().to_string(), - resource_type: if entry.file_type().is_dir() { - ResourceType::Directory + path, + filetype: if entry.file_type().is_dir() { + Filetype::Directory } else { - ResourceType::File + Filetype::File }, }); } @@ -102,23 +106,18 @@ impl Finder for IOFinder { } } -impl Stats { +impl Resource { pub fn as_regex(&self) -> Regex { - let mut matches: Vec = self - .resources - .iter() - .map(|r| { - let path = r.path.replace("/", r"\/"); - match r.resource_type { - ResourceType::Directory => format!(r"{}", path), - // Support for line and column numbers - ResourceType::File => format!(r"{}(:\d+:\d+)?", path), - } - }) - .collect(); - matches.sort_by(|a, b| b.len().cmp(&a.len())); - - let pattern = matches.join("|"); - Regex::new(&pattern).unwrap() + match self.filetype { + Filetype::Directory => { + let path = self.path.replace("/", r"\/"); + Regex::new(&format!(r"{}", path)).unwrap() + } + // Support for line and column numbers + Filetype::File => { + let path = self.path.replace("/", r"\/"); + Regex::new(&format!(r"{}(:\d+:\d+)?", path)).unwrap() + } + } } } diff --git a/src/grep/mod.rs b/src/grep/mod.rs index 2e5fad3..aae6ef7 100644 --- a/src/grep/mod.rs +++ b/src/grep/mod.rs @@ -2,9 +2,13 @@ mod finder; pub mod params; mod tests; -use params::GrepParams; - use crate::output::{pretty_print, Status}; +use params::{Filetype, GrepParams}; +use rayon::prelude::*; +use std::{ + fmt::Debug, + sync::{Arc, RwLock}, +}; #[derive(Debug, PartialEq)] pub enum GrepItemType { @@ -18,68 +22,77 @@ pub struct GrepItem { pub line: Option, pub column: Option, pub item_type: GrepItemType, + pub filetype: Filetype, } /// Extract path in string message with regex pub fn grep(params: &GrepParams) -> Vec { - let mut items: Vec = Vec::new(); - let mut finder = params.finder.clone(); - finder.current_dir(¶ms.current_dir); - finder.ignore(params.ignore_pattern.clone()); - let find_list = finder.find(); + let finder = Arc::new(RwLock::new(params.finder.clone())); + let mut finder_lock = finder.write().unwrap(); + finder_lock.current_dir(¶ms.current_dir); + finder_lock.ignore(params.ignore_pattern.clone()); + let find_list = finder_lock.find(); + drop(finder_lock); if params.debug { + pretty_print(&format!("Content: {:#?}", ¶ms.content), Status::Info); pretty_print(&format!("Finder: {:#?}", &find_list), Status::Info); - pretty_print( - &format!("Finder Regex: {:#?}", &find_list.as_regex()), - Status::Info, - ); } - // Iterate over all matches in the content - for cap in find_list.as_regex().captures_iter(¶ms.content) { - let matched = cap[0].to_string(); - let parts: Vec<&str> = matched.split(':').collect(); + let items: Vec<_> = find_list + .resources + .par_iter() + .filter_map(|r| { + // Improve performance by checking if it matches without using regular expressions + if !params.content.contains(&r.path) { + return None; + } + + if params.debug { + pretty_print(&format!("Matched: {}", &r.path), Status::Info); + } - let path = parts.get(0).unwrap().to_string(); - let line: Option = match parts.get(1) { - Some(line) => match line.parse::() { + let numbers = match r.as_regex().find(¶ms.content) { + Some(numbers) => numbers, + None => return None, + }; + let line = match numbers.as_str().parse::() { Ok(line) => Some(line), Err(_) => None, - }, - None => None, - }; - - let column: Option = match parts.get(2) { - Some(column) => match column.parse::() { + }; + let column = match numbers.as_str().parse::() { Ok(column) => Some(column), Err(_) => None, - }, - None => None, - }; - - let item_type = match path.starts_with('/') { - true => GrepItemType::AbsolutePath, - false => GrepItemType::RelativePath, - }; + }; + let item_type = match &r.path.starts_with('/') { + true => GrepItemType::AbsolutePath, + false => GrepItemType::RelativePath, + }; - items.push(GrepItem { - path, - line, - column, - item_type, - }); - } + Some(GrepItem { + path: r.path.clone(), + line, + column, + item_type, + filetype: r.filetype.clone(), + }) + }) + .collect(); + let mut unique_items: Vec = items.into_iter().collect(); // dedup by item.path - items.sort_by(|a, b| { + unique_items.sort_by(|a, b| { a.path .partial_cmp(&b.path) .unwrap_or(std::cmp::Ordering::Equal) }); - if params.unique { - items.dedup_by(|a, b| a.path == b.path); - } + unique_items.dedup_by(|a, b| a.path == b.path); + + // filetypeでのフィルタリング + unique_items.retain(|item| match item.filetype { + Filetype::File => params.filetype.contains(&Filetype::File), + Filetype::Directory => params.filetype.contains(&Filetype::Directory), + }); - items + unique_items } diff --git a/src/grep/params.rs b/src/grep/params.rs index 46be5d3..f492877 100644 --- a/src/grep/params.rs +++ b/src/grep/params.rs @@ -1,35 +1,41 @@ use crate::grep::finder::{Finder, IOFinder}; use anyhow::{anyhow, Error}; +#[derive(Debug, Clone, PartialEq)] +pub enum Filetype { + Directory, + File, +} + #[derive(Debug, Clone)] pub struct GrepParams { pub debug: bool, - pub unique: bool, pub content: String, pub current_dir: String, pub ignore_pattern: Vec, pub finder: Box, + pub filetype: Vec, } #[derive(Debug, Clone)] pub struct GrepParamsBuilder { debug: Option, - unique: Option, content: Option, current_dir: Option, ignore_pattern: Vec, finder: Option>, + filetype: Vec, } impl GrepParamsBuilder { pub fn new() -> Self { Self { debug: None, - unique: None, content: None, current_dir: None, ignore_pattern: vec![], finder: None, + filetype: vec![], } } @@ -38,11 +44,6 @@ impl GrepParamsBuilder { self } - pub fn unique(mut self, unique: Option) -> Self { - self.unique = unique; - self - } - pub fn content(mut self, content: Option) -> Self { self.content = content; self @@ -65,6 +66,11 @@ impl GrepParamsBuilder { self } + pub fn filetype(mut self, filetype: Vec) -> Self { + self.filetype = [filetype, self.filetype].concat(); + self + } + #[cfg(test)] pub fn finder(mut self, finder: Box) -> Self { self.finder = Some(finder); @@ -99,10 +105,22 @@ impl GrepParamsBuilder { "*.lcov".to_string(), ]; + let filetype = match self.filetype.len() { + // When no filetype is provided, default to file + 0 => vec![Filetype::File], + _ => self.filetype.iter().fold(vec![], |mut acc, filetype| { + match filetype.as_str() { + "d" | "dir" => acc.push(Filetype::Directory), + "f" | "file" => acc.push(Filetype::File), + _ => {} + }; + acc + }), + }; + match self.content { Some(content) => Ok(GrepParams { debug: self.debug.unwrap_or(false), - unique: self.unique.unwrap_or(true), content, current_dir: self.current_dir.unwrap_or(".".to_string()), ignore_pattern: if self.ignore_pattern.is_empty() { @@ -111,6 +129,7 @@ impl GrepParamsBuilder { self.ignore_pattern }, finder: self.finder.unwrap_or_else(|| Box::new(IOFinder::new())), + filetype, }), None => Err(anyhow!("Missing content to search for")), } diff --git a/src/grep/tests/mod.rs b/src/grep/tests/mod.rs index cdf5eb7..3c2dda5 100644 --- a/src/grep/tests/mod.rs +++ b/src/grep/tests/mod.rs @@ -1,6 +1,7 @@ #[cfg(test)] mod tests { - use crate::grep::finder::{Finder, Resource, ResourceType, Stats}; + use crate::grep::finder::{Finder, Resource, Stats}; + use crate::grep::params::Filetype; use crate::grep::GrepItem; use crate::grep::{grep, params::GrepParamsBuilder}; use pretty_assertions::assert_eq; @@ -49,19 +50,19 @@ mod tests { let resources = vec![ Resource { path: "test_data".to_string(), - resource_type: ResourceType::Directory, + filetype: Filetype::Directory, }, Resource { path: "test_data/test_file_1.txt".to_string(), - resource_type: ResourceType::File, + filetype: Filetype::File, }, Resource { path: "test_data/test_file_2.txt".to_string(), - resource_type: ResourceType::File, + filetype: Filetype::File, }, Resource { path: "test_data/test_file_3.txt".to_string(), - resource_type: ResourceType::File, + filetype: Filetype::File, }, ]; @@ -69,6 +70,7 @@ mod tests { "Error occurred in test_data/test_file_1.txt and test_data/test_file_2.txt:1:1"; let params = GrepParamsBuilder::new() + .debug(Some(true)) .content(Some(input_content.to_string())) .finder(Box::new(MockFinder::new(resources))) .build() @@ -85,23 +87,28 @@ mod tests { #[test] fn test_grep_start_with_dot_slash() { let resources = vec![ + Resource { + path: "test_data".to_string(), + filetype: Filetype::Directory, + }, Resource { path: "./test_data/test_file_1.txt".to_string(), - resource_type: ResourceType::File, + filetype: Filetype::File, }, Resource { path: "./test_data/test_file_2.txt".to_string(), - resource_type: ResourceType::File, + filetype: Filetype::File, }, Resource { path: "./test_data/test_file_3.txt".to_string(), - resource_type: ResourceType::File, + filetype: Filetype::File, }, ]; let input_content = "./test_data/test_file_1.txt sample content to test\n./test_data/test_file_2.txt:1:1: sample content to test"; let params = GrepParamsBuilder::new() + .debug(Some(true)) .content(Some(input_content.to_string())) .finder(Box::new(MockFinder::new(resources))) .build() @@ -116,67 +123,38 @@ mod tests { } #[test] - fn test_grep_without_unique() { + fn test_grep_start_with_dir() { let resources = vec![ Resource { - path: "test_data/test_file_1.txt".to_string(), - resource_type: ResourceType::File, - }, - Resource { - path: "test_data/test_file_2.txt".to_string(), - resource_type: ResourceType::File, - }, - Resource { - path: "test_data/test_file_3.txt".to_string(), - resource_type: ResourceType::File, + path: "test_data".to_string(), + filetype: Filetype::Directory, }, - ]; - - let input_content = "test_data/test_file_2.txt:1:1 test_data/test_file_2.txt:1:1"; - - let params = GrepParamsBuilder::new() - .unique(Some(false)) - .content(Some(input_content.to_string())) - .finder(Box::new(MockFinder::new(resources))) - .build() - .expect("Failed to build GrepParams"); - - let grep_items = grep(¶ms); - - assert_paths( - grep_items, - vec!["test_data/test_file_2.txt", "test_data/test_file_2.txt"], - ); - } - - #[test] - fn test_grep_with_unique() { - let resources = vec![ Resource { - path: "test_data/test_file_1.txt".to_string(), - resource_type: ResourceType::File, + path: "./test_data/test_file_1.txt".to_string(), + filetype: Filetype::File, }, Resource { - path: "test_data/test_file_2.txt".to_string(), - resource_type: ResourceType::File, + path: "./test_data/test_file_2.txt".to_string(), + filetype: Filetype::File, }, Resource { - path: "test_data/test_file_3.txt".to_string(), - resource_type: ResourceType::File, + path: "./test_data/test_file_3.txt".to_string(), + filetype: Filetype::File, }, ]; - let input_content = "test_data/test_file_2.txt:1:1 test_data/test_file_2.txt:1:1"; + let input_content = "./test_data/test_file_1.txt sample content to test\n./test_data/test_file_2.txt:1:1: sample content to test"; let params = GrepParamsBuilder::new() - .unique(Some(true)) + .debug(Some(true)) .content(Some(input_content.to_string())) .finder(Box::new(MockFinder::new(resources))) + .filetype(vec!["d".to_string()]) .build() .expect("Failed to build GrepParams"); let grep_items = grep(¶ms); - assert_paths(grep_items, vec!["test_data/test_file_2.txt"]); + assert_paths(grep_items, vec!["test_data"]); } } diff --git a/src/main.rs b/src/main.rs index a50a626..b10c888 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,10 +20,10 @@ fn main() { let mut params_builder = GrepParamsBuilder::new() .debug(args.debug) - .unique(args.unique) .content(input) .current_dir(args.current_dir) - .ignore(args.ignore); + .ignore(args.ignore) + .filetype(args.filetype); if let Some(f) = args.file { match params_builder.read_file_content(&f) {