Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ crossbeam-utils = "0.8"
unicode-segmentation = "1.9"
unicode-normalization = "0.1"
unicode-case-mapping = "0.4"
unidecode = "0.3"
libc = { version = "0.2", optional = true }
phf = "0.11"
serde = { version = "1.0", features = ["derive"], optional = true }
serde_json = { version = "1.0", optional = true }
smallvec = { features = ["union"], version = "1.9" }
compact_str = { version = "0.7.1", features = ["serde"] }
deunicode = "1.4.3"

[dev-dependencies]
alloc_counter = "0.0"
Expand Down
2 changes: 1 addition & 1 deletion src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ impl Name {
/// names and/or suffixes are present in both names, they must match as well.
///
/// Transliterates everything to ASCII before comparison using the naive
/// algorithm of [unidecode](https://github.com/chowdhurya/rust-unidecode/)
/// algorithm of [deunicode](https://github.com/kornelski/deunicode/blob/main/README.md)
/// (which ignores context), and ignores case, accents and combining marks.
///
/// In the case of given and middle names, allows one name to be a prefix of
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
#![cfg_attr(feature = "bench", feature(test))]

extern crate crossbeam_utils;
extern crate deunicode;
extern crate smallvec;
extern crate unicode_normalization;
extern crate unicode_segmentation;
extern crate unidecode;

#[cfg(test)]
#[cfg(feature = "bench")]
Expand Down
19 changes: 10 additions & 9 deletions src/transliterate.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
use deunicode::deunicode_char;
use std::str::Chars;
use unidecode::unidecode_char;

#[inline]
fn transliterate(c: char) -> Chars<'static> {
let s = unidecode_char(c);
if s.is_empty() {};
fn transliterate(c: char) -> Option<Chars<'static>> {
// We should maybe use unicode case folding here as an initial pass,
// but without a concrete motivating case (yet) it doesn't seem worth
// the cost.
unidecode_char(c).chars()
deunicode_char(c).map(|s| s.chars())
}

#[inline]
Expand Down Expand Up @@ -41,14 +39,15 @@ fn ascii_to_upper_if_alpha(c: char) -> Option<char> {
pub fn to_ascii_initial(c: char) -> Option<char> {
match c {
'A'..='Z' => Some(c),
_ => transliterate(c).find_map(ascii_to_upper_if_alpha),
_ => transliterate(c)?.find_map(ascii_to_upper_if_alpha),
}
}

pub fn to_ascii_casefolded(text: &str) -> Option<impl Iterator<Item = char> + '_> {
let mut result = text
.chars()
.flat_map(transliterate)
.filter_map(transliterate)
.flatten()
.filter_map(ascii_to_lower_if_alpha)
.peekable();

Expand All @@ -63,7 +62,8 @@ pub fn to_ascii_casefolded(text: &str) -> Option<impl Iterator<Item = char> + '_
pub fn to_ascii_casefolded_reversed(text: &str) -> Option<impl Iterator<Item = char> + '_> {
let mut result = text
.chars()
.flat_map(transliterate)
.filter_map(transliterate)
.flatten()
.rev()
.filter_map(ascii_to_lower_if_alpha)
.peekable();
Expand All @@ -79,7 +79,8 @@ pub fn to_ascii_casefolded_reversed(text: &str) -> Option<impl Iterator<Item = c
pub fn to_ascii_titlecase(s: &str) -> Option<String> {
let mut result = s
.chars()
.flat_map(transliterate)
.filter_map(transliterate)
.flatten()
.filter_map(ascii_to_lower_if_alpha);

result.next().map(|initial| {
Expand Down