Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ duplicates = dd.dupes(
path="path/to/images",
algo="dhash", # Optional: default = dhash
filter="triangle", # Optional: default = triangle
hash_size=8, # Optional: default = 8 (only used for pHash)
remove=False # Optional: default = False
)
print(duplicates)
Expand Down
2 changes: 2 additions & 0 deletions crates/imgdd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ let result = hash(
PathBuf::from("path/to/images"),
Some("Triangle"), // Optional: default = "Triangle"
Some("dHash"), // Optional: default = "dHash"
None, // Optional: default = 8 (only used for pHash)
Some(false), // Optional: default = false
);
println!("{:#?}", result);
Expand All @@ -55,6 +56,7 @@ let result = dupes(
PathBuf::from("path/to/images"),
Some("Triangle"), // Optional: default = "Triangle"
Some("dHash"), // Optional: default = "dHash"
None, // Optional: default = 8 (only used for pHash)
false,
);
println!("{:#?}", result);
Expand Down
2 changes: 2 additions & 0 deletions crates/imgdd/benches/rust_benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ fn benchmark_hash(c: &mut Criterion) {
black_box(dir_path.clone()),
Some("nearest"),
Some("dhash"),
None,
Some(false),
);
let _ = black_box(result).is_ok(); // Ignore the result
Expand All @@ -43,6 +44,7 @@ fn benchmark_dupes(c: &mut Criterion) {
black_box(dir_path.clone()),
Some("nearest"),
Some("dhash"),
None,
false,
);
let _ = black_box(result).is_ok(); // Ignore the result
Expand Down
20 changes: 16 additions & 4 deletions crates/imgdd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ pub fn select_algo(algo: Option<&str>) -> &'static str {
/// - **Default:** "dHash"
/// - `sort` - Boolean to determine if the hashes should be sorted.
/// - **Default:** false
/// - `hash_size` - Integer specifying the hash size to use for pHash.
/// The resulting hash will be hash_size^2 bits long. The value is
/// ignored for all hash methods other than pHash.
/// - **Default:** 8
///
/// # Returns
///
Expand All @@ -79,6 +83,7 @@ pub fn select_algo(algo: Option<&str>) -> &'static str {
/// PathBuf::from("path/to/images"),
/// Some("Triangle"), // Optional: default = "Triangle"
/// Some("dHash"), // Optional: default = "dHash"
/// None, // Optional: default = 8
/// Some(false), // Optional: default = false
/// );
///
Expand All @@ -88,13 +93,14 @@ pub fn hash(
path: PathBuf,
filter: Option<&str>,
algo: Option<&str>,
hash_size: Option<usize>,
sort: Option<bool>,
) -> Result<Vec<(u64, PathBuf)>, Error> {
) -> Result<Vec<(imgddcore::hashing::ImageHash, PathBuf)>, Error> {
let validated_path = validate_path(&path)?;
let filter_type = select_filter_type(filter);
let selected_algo = select_algo(algo);

let mut hash_paths = collect_hashes(validated_path, filter_type, selected_algo)?;
let mut hash_paths = collect_hashes(&validated_path, filter_type, selected_algo, hash_size)?;

// Optionally sort hashes
if sort.unwrap_or(false) {
Expand All @@ -115,6 +121,10 @@ pub fn hash(
/// - `algo` - String specifying the hashing algorithm to use.
/// - **Options:** [`aHash`, `mHash`, `dHash`, `pHash`, `wHash`]
/// - **Default:** "dhash"
/// - `hash_size` - Integer specifying the hash size to use for pHash.
/// The resulting hash will be hash_size^2 bits long. The value is)
/// ignored for all hash methods other than pHash.
/// - **Default:** 8
/// - `remove` - Boolean indicating whether duplicate files should be removed.
///
/// # Returns
Expand All @@ -130,6 +140,7 @@ pub fn hash(
/// PathBuf::from("path/to/images"),
/// Some("Triangle"), // Optional: default = "Triangle"
/// Some("dHash"), // Optional: default = "dHash"
/// None, // Optional: default = 8
/// false,
/// );
///
Expand All @@ -139,13 +150,14 @@ pub fn dupes(
path: PathBuf,
filter: Option<&str>,
algo: Option<&str>,
hash_size: Option<usize>,
remove: bool,
) -> Result<HashMap<u64, Vec<PathBuf>>, Error> {
) -> Result<HashMap<imgddcore::hashing::ImageHash, Vec<PathBuf>>, Error> {
let validated_path = validate_path(&path)?;
let filter_type = select_filter_type(filter);
let selected_algo = select_algo(algo);

let mut hash_paths = collect_hashes(validated_path, filter_type, selected_algo)?;
let mut hash_paths = collect_hashes(&validated_path, filter_type, selected_algo, hash_size)?;
sort_hashes(&mut hash_paths);

Ok(find_duplicates(&hash_paths, remove)?)
Expand Down
13 changes: 11 additions & 2 deletions crates/imgdd/tests/rust_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ mod tests {
temp_dir.path().to_path_buf(),
Some("nearest"),
Some("dhash"),
None,
Some(false),
);
assert!(result.is_ok(), "Hash function failed: {:?}", result.err());
Expand All @@ -62,6 +63,7 @@ mod tests {
invalid_path.clone(),
Some("nearest"),
Some("dhash"),
None,
Some(false),
);
assert!(
Expand All @@ -74,7 +76,7 @@ mod tests {
#[test]
fn test_hash_with_sorting() {
let img_dir = PathBuf::from("../../imgs/test/apple_pie");
let result = hash(img_dir, Some("nearest"), Some("dhash"), Some(true));
let result = hash(img_dir, Some("nearest"), Some("dhash"), None, Some(true));

assert!(result.is_ok(), "Hash function failed: {:?}", result.err());

Expand Down Expand Up @@ -109,6 +111,7 @@ mod tests {
temp_dir.path().to_path_buf(),
Some("nearest"),
Some("dhash"),
None,
false,
);
assert!(result.is_ok(), "Dupes function failed: {:?}", result.err());
Expand All @@ -124,7 +127,13 @@ mod tests {
#[test]
fn test_dupes_with_invalid_path() {
let invalid_path = PathBuf::from("/non/existent/path");
let result = dupes(invalid_path.clone(), Some("nearest"), Some("dhash"), false);
let result = dupes(
invalid_path.clone(),
Some("nearest"),
Some("dhash"),
None,
false,
);
assert!(
result.is_err(),
"Expected error for invalid path: {:?}",
Expand Down
21 changes: 16 additions & 5 deletions crates/imgddcore/benches/core_benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ fn benchmark_collect_hashes(c: &mut Criterion) {
black_box(&dir_path),
black_box(image::imageops::FilterType::Triangle),
black_box("dhash"),
black_box(None),
)
.expect("Failed to collect hashes");
});
Expand All @@ -55,8 +56,13 @@ fn benchmark_collect_hashes(c: &mut Criterion) {

fn benchmark_sort_hashes(c: &mut Criterion) {
let dir_path = PathBuf::from("../../imgs/test");
let mut hash_paths = collect_hashes(&dir_path, image::imageops::FilterType::Triangle, "dhash")
.expect("Failed to collect hashes");
let mut hash_paths = collect_hashes(
&dir_path,
image::imageops::FilterType::Triangle,
"dhash",
None,
)
.expect("Failed to collect hashes");

c.bench_function("sort_hashes", |b| {
b.iter(|| {
Expand All @@ -67,8 +73,13 @@ fn benchmark_sort_hashes(c: &mut Criterion) {

fn benchmark_find_duplicates(c: &mut Criterion) {
let dir_path = PathBuf::from("../../imgs/test");
let mut hash_paths = collect_hashes(&dir_path, image::imageops::FilterType::Triangle, "dhash")
.expect("Failed to collect hashes");
let mut hash_paths = collect_hashes(
&dir_path,
image::imageops::FilterType::Triangle,
"dhash",
None,
)
.expect("Failed to collect hashes");
sort_hashes(&mut hash_paths);

c.bench_function("find_duplicates", |b| {
Expand Down Expand Up @@ -139,7 +150,7 @@ fn benchmark_phash(c: &mut Criterion) {
c.bench_function("phash", |b| {
b.iter(|| {
// Compute pHash for the normalized image
ImageHash::phash(black_box(&normalized_image)).expect("Failed to compute phash");
ImageHash::phash(black_box(&normalized_image), 8).expect("Failed to compute phash");
});
});
}
Expand Down
29 changes: 16 additions & 13 deletions crates/imgddcore/src/dedupe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use walkdir::WalkDir;
/// Options: `Nearest`, `Triangle`, `CatmullRom`, `Gaussian`, `Lanczos3`.
/// * `algo` - The hashing algorithm to use.
/// Options: `dhash`, `ahash`, `mhash`, `phash`, `whash`.
/// * `hash_size` - The hash size for phash (ignored by other algorithms).
///
/// # Returns
///
Expand All @@ -27,38 +28,40 @@ pub fn collect_hashes(
path: &PathBuf,
filter: FilterType,
algo: &str,
) -> Result<Vec<(u64, PathBuf)>, Error> {
hash_size: Option<usize>,
) -> Result<Vec<(ImageHash, PathBuf)>, Error> {
let files: Vec<PathBuf> = WalkDir::new(path)
.into_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.map(|entry| entry.path().to_path_buf())
.collect();

let hash_paths: Vec<(u64, PathBuf)> = files
let hash_paths: Vec<(ImageHash, PathBuf)> = files
.par_iter()
.filter_map(|file_path| match open_image(file_path) {
Ok(image) => {
let hash = match algo {
"dhash" => {
let normalized = normalize::proc(&image, filter, 9, 8).ok()?;
ImageHash::dhash(&normalized).ok()?.get_hash()
ImageHash::dhash(&normalized).ok()?
}
"ahash" => {
let normalized = normalize::proc(&image, filter, 8, 8).ok()?;
ImageHash::ahash(&normalized).ok()?.get_hash()
ImageHash::ahash(&normalized).ok()?
}
"mhash" => {
let normalized = normalize::proc(&image, filter, 8, 8).ok()?;
ImageHash::mhash(&normalized).ok()?.get_hash()
ImageHash::mhash(&normalized).ok()?
}
"phash" => {
let hash_size = hash_size.unwrap_or(8);
let normalized = normalize::proc(&image, filter, 32, 32).ok()?;
ImageHash::phash(&normalized).ok()?.get_hash()
ImageHash::phash(&normalized, hash_size).ok()?
}
"whash" => {
let normalized = normalize::proc(&image, filter, 8, 8).ok()?;
ImageHash::whash(&normalized).ok()?.get_hash()
ImageHash::whash(&normalized).ok()?
}
_ => panic!("Unsupported hashing algorithm: {}", algo),
};
Expand All @@ -80,8 +83,8 @@ pub fn collect_hashes(
///
/// * `hash_paths` - A mutable reference to a vector of hash-path tuples.
#[inline]
pub fn sort_hashes(hash_paths: &mut Vec<(u64, PathBuf)>) {
hash_paths.sort_by_key(|(hash, _)| *hash);
pub fn sort_hashes(hash_paths: &mut Vec<(ImageHash, PathBuf)>) {
hash_paths.sort_by_key(|(hash, _)| hash.clone());
}

/// Opens an image file and decodes it.
Expand Down Expand Up @@ -120,16 +123,16 @@ pub fn open_image(file_path: &PathBuf) -> Result<DynamicImage> {
///
/// Returns an error if a file fails to be removed when `remove` is set to `true`.
pub fn find_duplicates(
hash_paths: &[(u64, PathBuf)],
hash_paths: &[(ImageHash, PathBuf)],
remove: bool,
) -> Result<HashMap<u64, Vec<PathBuf>>, Error> {
let mut duplicates_map: HashMap<u64, Vec<PathBuf>> = HashMap::new();
) -> Result<HashMap<ImageHash, Vec<PathBuf>>, Error> {
let mut duplicates_map: HashMap<ImageHash, Vec<PathBuf>> = HashMap::new();

for window in hash_paths.windows(2) {
if let [(hash1, path1), (hash2, path2)] = window {
if hash1 == hash2 {
duplicates_map
.entry(*hash1)
.entry(hash1.clone())
.or_insert_with(Vec::new)
.extend(vec![path1.clone(), path2.clone()]);
}
Expand Down
Loading