From 3959dce5d16e274477b3690b14447942391bcd6b Mon Sep 17 00:00:00 2001 From: Max Crone Date: Mon, 25 Mar 2024 13:25:28 +0100 Subject: [PATCH] Replace unmaintained unidecode with deunicode --- Cargo.lock | 14 +++++++------- Cargo.toml | 2 +- src/comparison.rs | 2 +- src/lib.rs | 2 +- src/transliterate.rs | 19 ++++++++++--------- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 18b20b1e..72b65778 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,6 +61,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "deunicode" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6e854126756c496b8c81dec88f9a706b15b875c5849d4097a3854476b9fdf94" + [[package]] name = "human_name" version = "2.0.3" @@ -68,6 +74,7 @@ dependencies = [ "alloc_counter", "compact_str", "crossbeam-utils", + "deunicode", "libc", "phf", "phf_codegen", @@ -77,7 +84,6 @@ dependencies = [ "unicode-case-mapping", "unicode-normalization", "unicode-segmentation", - "unidecode", ] [[package]] @@ -282,9 +288,3 @@ name = "unicode-segmentation" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a" - -[[package]] -name = "unidecode" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402bb19d8e03f1d1a7450e2bd613980869438e0666331be3e073089124aa1adc" diff --git a/Cargo.toml b/Cargo.toml index 315661c2..3bbdf7f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,13 +16,13 @@ crossbeam-utils = "0.8" unicode-segmentation = "1.9" unicode-normalization = "0.1" unicode-case-mapping = "0.4" -unidecode = "0.3" libc = { version = "0.2", optional = true } phf = "0.11" serde = { version = "1.0", features = ["derive"], optional = true } serde_json = { version = "1.0", optional = true } smallvec = { features = ["union"], version = "1.9" } compact_str = { version = "0.7.1", features = ["serde"] } +deunicode = "1.4.3" [dev-dependencies] alloc_counter = "0.0" diff --git a/src/comparison.rs b/src/comparison.rs index f0efe1d1..f1f13175 100644 --- a/src/comparison.rs +++ b/src/comparison.rs @@ -43,7 +43,7 @@ impl Name { /// names and/or suffixes are present in both names, they must match as well. /// /// Transliterates everything to ASCII before comparison using the naive - /// algorithm of [unidecode](https://github.com/chowdhurya/rust-unidecode/) + /// algorithm of [deunicode](https://github.com/kornelski/deunicode/blob/main/README.md) /// (which ignores context), and ignores case, accents and combining marks. /// /// In the case of given and middle names, allows one name to be a prefix of diff --git a/src/lib.rs b/src/lib.rs index 275a844e..229b25f7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,10 +6,10 @@ #![cfg_attr(feature = "bench", feature(test))] extern crate crossbeam_utils; +extern crate deunicode; extern crate smallvec; extern crate unicode_normalization; extern crate unicode_segmentation; -extern crate unidecode; #[cfg(test)] #[cfg(feature = "bench")] diff --git a/src/transliterate.rs b/src/transliterate.rs index 3d916af6..f515b058 100644 --- a/src/transliterate.rs +++ b/src/transliterate.rs @@ -1,14 +1,12 @@ +use deunicode::deunicode_char; use std::str::Chars; -use unidecode::unidecode_char; #[inline] -fn transliterate(c: char) -> Chars<'static> { - let s = unidecode_char(c); - if s.is_empty() {}; +fn transliterate(c: char) -> Option> { // We should maybe use unicode case folding here as an initial pass, // but without a concrete motivating case (yet) it doesn't seem worth // the cost. - unidecode_char(c).chars() + deunicode_char(c).map(|s| s.chars()) } #[inline] @@ -41,14 +39,15 @@ fn ascii_to_upper_if_alpha(c: char) -> Option { pub fn to_ascii_initial(c: char) -> Option { match c { 'A'..='Z' => Some(c), - _ => transliterate(c).find_map(ascii_to_upper_if_alpha), + _ => transliterate(c)?.find_map(ascii_to_upper_if_alpha), } } pub fn to_ascii_casefolded(text: &str) -> Option + '_> { let mut result = text .chars() - .flat_map(transliterate) + .filter_map(transliterate) + .flatten() .filter_map(ascii_to_lower_if_alpha) .peekable(); @@ -63,7 +62,8 @@ pub fn to_ascii_casefolded(text: &str) -> Option + '_ pub fn to_ascii_casefolded_reversed(text: &str) -> Option + '_> { let mut result = text .chars() - .flat_map(transliterate) + .filter_map(transliterate) + .flatten() .rev() .filter_map(ascii_to_lower_if_alpha) .peekable(); @@ -79,7 +79,8 @@ pub fn to_ascii_casefolded_reversed(text: &str) -> Option Option { let mut result = s .chars() - .flat_map(transliterate) + .filter_map(transliterate) + .flatten() .filter_map(ascii_to_lower_if_alpha); result.next().map(|initial| {