From 0c4a0b94f86620d23ae49e9ae64a1a032917361c Mon Sep 17 00:00:00 2001 From: Aaron Stopher <22336995+aastopher@users.noreply.github.com> Date: Mon, 10 Feb 2025 20:51:09 -0700 Subject: [PATCH 1/2] resolve clippy warnings + shift reused lib functions to core --- Cargo.lock | 6 +-- Cargo.toml | 2 +- crates/imgdd/Cargo.toml | 2 +- crates/imgdd/benches/rust_benches.rs | 18 -------- crates/imgdd/src/lib.rs | 53 +++--------------------- crates/imgdd/tests/rust_tests.rs | 31 -------------- crates/imgddcore/benches/core_benches.rs | 19 +++++++++ crates/imgddcore/src/dedupe.rs | 5 ++- crates/imgddcore/src/lib.rs | 1 + crates/imgddcore/src/utils.rs | 45 ++++++++++++++++++++ crates/imgddcore/tests/dedupe_tests.rs | 2 +- crates/imgddcore/tests/validate_tests.rs | 2 +- crates/imgddpy/Cargo.toml | 4 +- crates/imgddpy/src/lib.rs | 34 +++------------ 14 files changed, 87 insertions(+), 137 deletions(-) create mode 100644 crates/imgddcore/src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 0020bc3..e31b593 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -501,7 +501,7 @@ dependencies = [ [[package]] name = "imgdd" -version = "0.1.3" +version = "0.1.4" dependencies = [ "anyhow", "criterion", @@ -512,7 +512,7 @@ dependencies = [ [[package]] name = "imgddcore" -version = "0.1.3" +version = "0.1.4" dependencies = [ "anyhow", "codspeed-criterion-compat", @@ -527,7 +527,7 @@ dependencies = [ [[package]] name = "imgddpy" -version = "0.1.5" +version = "0.1.6" dependencies = [ "image", "imgddcore", diff --git a/Cargo.toml b/Cargo.toml index ab3c29c..4af63c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ members = [ ] [workspace.package] -version = "0.1.3" +version = "0.1.4" edition = "2021" license = "GPL-3.0-or-later" authors = ["Aaron Stopher "] diff --git a/crates/imgdd/Cargo.toml b/crates/imgdd/Cargo.toml index abc20da..4f90cec 100644 --- a/crates/imgdd/Cargo.toml +++ b/crates/imgdd/Cargo.toml @@ -13,7 +13,7 @@ categories.workspace = true readme = "README.md" [dependencies] -imgddcore = { path = "../imgddcore", version = "0.1.3" } +imgddcore = { path = "../imgddcore", version = "0.1.4" } image.workspace = true anyhow.workspace = true criterion = { version = "0.5.1", optional = true } diff --git a/crates/imgdd/benches/rust_benches.rs b/crates/imgdd/benches/rust_benches.rs index 98ea33e..7c4d022 100644 --- a/crates/imgdd/benches/rust_benches.rs +++ b/crates/imgdd/benches/rust_benches.rs @@ -2,22 +2,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use imgdd::*; use std::path::PathBuf; -fn benchmark_select_filter_type(c: &mut Criterion) { - c.bench_function("select_filter_type", |b| { - b.iter(|| { - black_box(select_filter_type(Some("nearest"))); - }); - }); -} - -fn benchmark_select_algo(c: &mut Criterion) { - c.bench_function("select_algo", |b| { - b.iter(|| { - black_box(select_algo(Some("dhash"))); - }); - }); -} - fn benchmark_hash(c: &mut Criterion) { let dir_path = PathBuf::from("../../imgs/test/single"); @@ -52,8 +36,6 @@ fn benchmark_dupes(c: &mut Criterion) { criterion_group!( rust_interface_benchmarks, - benchmark_select_filter_type, - benchmark_select_algo, benchmark_hash, benchmark_dupes ); diff --git a/crates/imgdd/src/lib.rs b/crates/imgdd/src/lib.rs index e5e0031..49a16e7 100644 --- a/crates/imgdd/src/lib.rs +++ b/crates/imgdd/src/lib.rs @@ -2,56 +2,12 @@ //! Leverages perceptual hashing algorithms to identify duplicate or visually similar images in a directory. use anyhow::Error; -use image::imageops::FilterType; -use imgddcore::dedupe::*; -use imgddcore::validate::*; +use imgddcore::dedupe::{collect_hashes, find_duplicates, sort_hashes}; +use imgddcore::utils::{select_algo, select_filter_type}; +use imgddcore::validate::validate_path; use std::collections::HashMap; use std::path::PathBuf; -/// Converts a string to a `FilterType`. -/// -/// # Arguments -/// -/// - `filter` - String specifying the filter type. -/// - **Options:** [`Nearest`, `Triangle`, `CatmullRom`, `Gaussian`, `Lanczos3`] -/// -/// # Returns -/// -/// - A `FilterType` enum corresponding to the input string. -#[inline] -pub fn select_filter_type(filter: Option<&str>) -> FilterType { - match filter.unwrap_or("nearest") { - ref f if f.eq_ignore_ascii_case("nearest") => FilterType::Nearest, - ref f if f.eq_ignore_ascii_case("triangle") => FilterType::Triangle, - ref f if f.eq_ignore_ascii_case("catmullrom") => FilterType::CatmullRom, - ref f if f.eq_ignore_ascii_case("gaussian") => FilterType::Gaussian, - ref f if f.eq_ignore_ascii_case("lanczos3") => FilterType::Lanczos3, - other => panic!("Unsupported filter type: {}", other), - } -} - -/// Selects a hashing algorithm. -/// -/// # Arguments -/// -/// - `algo` - String specifying the hashing algorithm. -/// - **Options:** [`aHash`, `mHash`, `dHash`, `pHash`, `wHash`] -/// -/// # Returns -/// -/// - A standardized `&'static str` representing the selected algorithm. -#[inline] -pub fn select_algo(algo: Option<&str>) -> &'static str { - match algo.unwrap_or("dhash") { - input if input.eq_ignore_ascii_case("dhash") => "dhash", - input if input.eq_ignore_ascii_case("ahash") => "ahash", - input if input.eq_ignore_ascii_case("mhash") => "mhash", - input if input.eq_ignore_ascii_case("phash") => "phash", - input if input.eq_ignore_ascii_case("whash") => "whash", - other => panic!("Unsupported algorithm: {}", other), - } -} - /// Calculates hashes for all images in a directory recursively. /// /// # Arguments @@ -148,5 +104,6 @@ pub fn dupes( let mut hash_paths = collect_hashes(validated_path, filter_type, selected_algo)?; sort_hashes(&mut hash_paths); - Ok(find_duplicates(&hash_paths, remove)?) + // Ok(find_duplicates(&hash_paths, remove)?) + find_duplicates(&hash_paths, remove) } diff --git a/crates/imgdd/tests/rust_tests.rs b/crates/imgdd/tests/rust_tests.rs index 2771532..01bf3ad 100644 --- a/crates/imgdd/tests/rust_tests.rs +++ b/crates/imgdd/tests/rust_tests.rs @@ -1,42 +1,11 @@ #[cfg(test)] mod tests { - use image::imageops::FilterType; use imgdd::*; use std::fs::File; use std::io::Write; use std::path::PathBuf; use tempfile::tempdir; - #[test] - fn test_select_filter_type() { - assert_eq!(select_filter_type(Some("nearest")), FilterType::Nearest); - assert_eq!(select_filter_type(Some("triangle")), FilterType::Triangle); - assert_eq!( - select_filter_type(Some("catmullrom")), - FilterType::CatmullRom - ); - assert_eq!(select_filter_type(Some("gaussian")), FilterType::Gaussian); - assert_eq!(select_filter_type(Some("lanczos3")), FilterType::Lanczos3); - - let result = std::panic::catch_unwind(|| select_filter_type(Some("unsupported"))); - assert!( - result.is_err(), - "Expected panic for unsupported filter type" - ); - } - - #[test] - fn test_select_algo() { - assert_eq!(select_algo(Some("dhash")), "dhash"); - assert_eq!(select_algo(Some("ahash")), "ahash"); - assert_eq!(select_algo(Some("mhash")), "mhash"); - assert_eq!(select_algo(Some("phash")), "phash"); - assert_eq!(select_algo(Some("whash")), "whash"); - - let result = std::panic::catch_unwind(|| select_algo(Some("unsupported"))); - assert!(result.is_err(), "Expected panic for unsupported algorithm"); - } - #[test] fn test_hash_with_valid_inputs() { let temp_dir = tempdir().unwrap(); diff --git a/crates/imgddcore/benches/core_benches.rs b/crates/imgddcore/benches/core_benches.rs index f9c7d6f..2a3d713 100644 --- a/crates/imgddcore/benches/core_benches.rs +++ b/crates/imgddcore/benches/core_benches.rs @@ -1,6 +1,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use imgddcore::dedupe::{collect_hashes, find_duplicates, open_image, sort_hashes}; +use imgddcore::utils::{select_algo, select_filter_type}; use imgddcore::hashing::ImageHash; use imgddcore::normalize::proc as normalize; use std::path::PathBuf; @@ -11,6 +12,22 @@ use std::path::PathBuf; // To resolve this we must use hosted codspeed macro runners which require a pro plan. // For now I will just leave this warning here. +fn benchmark_select_filter_type(c: &mut Criterion) { + c.bench_function("select_filter_type", |b| { + b.iter(|| { + black_box(select_filter_type(Some("nearest"))); + }); + }); +} + +fn benchmark_select_algo(c: &mut Criterion) { + c.bench_function("select_algo", |b| { + b.iter(|| { + black_box(select_algo(Some("dhash"))); + }); + }); +} + fn open_image_bench(c: &mut Criterion) { let path = PathBuf::from("../../imgs/test/single/file000898199107.jpg"); @@ -174,6 +191,8 @@ criterion_group! { criterion_group!( group3, + benchmark_select_filter_type, + benchmark_select_algo, benchmark_ahash, benchmark_mhash, benchmark_dhash, diff --git a/crates/imgddcore/src/dedupe.rs b/crates/imgddcore/src/dedupe.rs index 51ec3e4..fadb678 100644 --- a/crates/imgddcore/src/dedupe.rs +++ b/crates/imgddcore/src/dedupe.rs @@ -80,7 +80,7 @@ pub fn collect_hashes( /// /// * `hash_paths` - A mutable reference to a vector of hash-path tuples. #[inline] -pub fn sort_hashes(hash_paths: &mut Vec<(u64, PathBuf)>) { +pub fn sort_hashes(hash_paths: &mut [(u64, PathBuf)]) { hash_paths.sort_by_key(|(hash, _)| *hash); } @@ -130,7 +130,8 @@ pub fn find_duplicates( if hash1 == hash2 { duplicates_map .entry(*hash1) - .or_insert_with(Vec::new) + // .or_insert_with(Vec::new) + .or_default() .extend(vec![path1.clone(), path2.clone()]); } } diff --git a/crates/imgddcore/src/lib.rs b/crates/imgddcore/src/lib.rs index ed75035..d173d3e 100644 --- a/crates/imgddcore/src/lib.rs +++ b/crates/imgddcore/src/lib.rs @@ -1,4 +1,5 @@ pub mod dedupe; pub mod hashing; pub mod normalize; +pub mod utils; pub mod validate; diff --git a/crates/imgddcore/src/utils.rs b/crates/imgddcore/src/utils.rs new file mode 100644 index 0000000..ff21f45 --- /dev/null +++ b/crates/imgddcore/src/utils.rs @@ -0,0 +1,45 @@ +use image::imageops::FilterType; + +/// Converts a string to a `FilterType`. +/// +/// # Arguments +/// +/// - `filter` - String specifying the filter type. +/// - **Options:** [`Nearest`, `Triangle`, `CatmullRom`, `Gaussian`, `Lanczos3`] +/// +/// # Returns +/// +/// - A `FilterType` enum corresponding to the input string. +#[inline] +pub fn select_filter_type(filter: Option<&str>) -> FilterType { + match filter.unwrap_or("nearest") { + f if f.eq_ignore_ascii_case("nearest") => FilterType::Nearest, + f if f.eq_ignore_ascii_case("triangle") => FilterType::Triangle, + f if f.eq_ignore_ascii_case("catmullrom") => FilterType::CatmullRom, + f if f.eq_ignore_ascii_case("gaussian") => FilterType::Gaussian, + f if f.eq_ignore_ascii_case("lanczos3") => FilterType::Lanczos3, + other => panic!("Unsupported filter type: {}", other), + } +} + +/// Selects a hashing algorithm. +/// +/// # Arguments +/// +/// - `algo` - String specifying the hashing algorithm. +/// - **Options:** [`aHash`, `mHash`, `dHash`, `pHash`, `wHash`] +/// +/// # Returns +/// +/// - A standardized `&'static str` representing the selected algorithm. +#[inline] +pub fn select_algo(algo: Option<&str>) -> &'static str { + match algo.unwrap_or("dhash") { + input if input.eq_ignore_ascii_case("dhash") => "dhash", + input if input.eq_ignore_ascii_case("ahash") => "ahash", + input if input.eq_ignore_ascii_case("mhash") => "mhash", + input if input.eq_ignore_ascii_case("phash") => "phash", + input if input.eq_ignore_ascii_case("whash") => "whash", + other => panic!("Unsupported algorithm: {}", other), + } +} diff --git a/crates/imgddcore/tests/dedupe_tests.rs b/crates/imgddcore/tests/dedupe_tests.rs index d9dd978..75835ce 100644 --- a/crates/imgddcore/tests/dedupe_tests.rs +++ b/crates/imgddcore/tests/dedupe_tests.rs @@ -2,7 +2,7 @@ mod tests { use image::imageops::FilterType; use image::{DynamicImage, Rgba}; - use imgddcore::dedupe::*; + use imgddcore::dedupe::{collect_hashes, find_duplicates, open_image, sort_hashes}; use std::fs::File; use std::io::Write; use std::panic; diff --git a/crates/imgddcore/tests/validate_tests.rs b/crates/imgddcore/tests/validate_tests.rs index 4796239..e4ae2a1 100644 --- a/crates/imgddcore/tests/validate_tests.rs +++ b/crates/imgddcore/tests/validate_tests.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use imgddcore::validate::*; + use imgddcore::validate::validate_path; use std::path::PathBuf; use tempfile::{tempdir, NamedTempFile}; diff --git a/crates/imgddpy/Cargo.toml b/crates/imgddpy/Cargo.toml index dd60f54..b6d48d2 100644 --- a/crates/imgddpy/Cargo.toml +++ b/crates/imgddpy/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "imgddpy" -version = "0.1.5" +version = "0.1.6" edition.workspace = true license.workspace = true authors.workspace = true @@ -12,7 +12,7 @@ homepage.workspace = true readme = "README.md" [dependencies] -imgddcore = { path = "../imgddcore", version = "0.1.3" } +imgddcore = { path = "../imgddcore", version = "0.1.4" } pyo3 = { version = "0.23", features = ["extension-module", "abi3-py39"] } image.workspace = true diff --git a/crates/imgddpy/src/lib.rs b/crates/imgddpy/src/lib.rs index 4358333..89d353b 100644 --- a/crates/imgddpy/src/lib.rs +++ b/crates/imgddpy/src/lib.rs @@ -1,34 +1,10 @@ -use image::imageops::FilterType; -use imgddcore::dedupe::*; -use imgddcore::validate::*; +use imgddcore::dedupe::{collect_hashes, find_duplicates, sort_hashes}; +use imgddcore::utils::{select_algo, select_filter_type}; +use imgddcore::validate::validate_path; use pyo3::prelude::*; use std::collections::HashMap; use std::path::PathBuf; -#[inline] -fn select_filter_type(filter: Option<&str>) -> FilterType { - match filter.unwrap_or("nearest") { - ref f if f.eq_ignore_ascii_case("nearest") => FilterType::Nearest, - ref f if f.eq_ignore_ascii_case("triangle") => FilterType::Triangle, - ref f if f.eq_ignore_ascii_case("catmullrom") => FilterType::CatmullRom, - ref f if f.eq_ignore_ascii_case("gaussian") => FilterType::Gaussian, - ref f if f.eq_ignore_ascii_case("lanczos3") => FilterType::Lanczos3, - other => panic!("Unsupported filter type: {}", other), - } -} - -#[inline] -fn select_algo(algo: Option<&str>) -> &'static str { - match algo.unwrap_or("dhash") { - input if input.eq_ignore_ascii_case("dhash") => "dhash", - input if input.eq_ignore_ascii_case("ahash") => "ahash", - input if input.eq_ignore_ascii_case("mhash") => "mhash", - input if input.eq_ignore_ascii_case("phash") => "phash", - input if input.eq_ignore_ascii_case("whash") => "whash", - other => panic!("Unsupported algorithm: {}", other), - } -} - /// ```python /// hash(path, filter="triangle", algo="dhash", sort=False) /// ``` @@ -73,7 +49,7 @@ pub fn hash( let filter_type = select_filter_type(filter); let algo = select_algo(algo); - let mut hash_paths = collect_hashes(&validated_path, filter_type, &algo) + let mut hash_paths = collect_hashes(validated_path, filter_type, algo) .map_err(|e| PyErr::new::(format!("{}", e)))?; // Optionally sort hashes @@ -131,7 +107,7 @@ pub fn dupes( let filter_type = select_filter_type(filter); let algo = select_algo(algo); - let mut hash_paths = collect_hashes(&validated_path, filter_type, &algo) + let mut hash_paths = collect_hashes(validated_path, filter_type, algo) .map_err(|e| PyErr::new::(format!("{}", e)))?; sort_hashes(&mut hash_paths); From 40d76412738093a8d5603dc594580bb6eb998a0b Mon Sep 17 00:00:00 2001 From: Aaron Stopher <22336995+aastopher@users.noreply.github.com> Date: Mon, 10 Feb 2025 20:58:54 -0700 Subject: [PATCH 2/2] shift test coverage --- crates/imgddcore/tests/utils_tests.rs | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 crates/imgddcore/tests/utils_tests.rs diff --git a/crates/imgddcore/tests/utils_tests.rs b/crates/imgddcore/tests/utils_tests.rs new file mode 100644 index 0000000..0a3b4a7 --- /dev/null +++ b/crates/imgddcore/tests/utils_tests.rs @@ -0,0 +1,33 @@ +use image::imageops::FilterType; +use imgddcore::utils::{select_algo, select_filter_type}; + + +#[test] +fn test_select_filter_type() { + assert_eq!(select_filter_type(Some("nearest")), FilterType::Nearest); + assert_eq!(select_filter_type(Some("triangle")), FilterType::Triangle); + assert_eq!( + select_filter_type(Some("catmullrom")), + FilterType::CatmullRom + ); + assert_eq!(select_filter_type(Some("gaussian")), FilterType::Gaussian); + assert_eq!(select_filter_type(Some("lanczos3")), FilterType::Lanczos3); + + let result = std::panic::catch_unwind(|| select_filter_type(Some("unsupported"))); + assert!( + result.is_err(), + "Expected panic for unsupported filter type" + ); +} + +#[test] +fn test_select_algo() { + assert_eq!(select_algo(Some("dhash")), "dhash"); + assert_eq!(select_algo(Some("ahash")), "ahash"); + assert_eq!(select_algo(Some("mhash")), "mhash"); + assert_eq!(select_algo(Some("phash")), "phash"); + assert_eq!(select_algo(Some("whash")), "whash"); + + let result = std::panic::catch_unwind(|| select_algo(Some("unsupported"))); + assert!(result.is_err(), "Expected panic for unsupported algorithm"); +} \ No newline at end of file