diff --git a/src/censor.rs b/src/censor.rs index 2294047..abf65bb 100644 --- a/src/censor.rs +++ b/src/censor.rs @@ -164,10 +164,12 @@ impl> Censor { fn filter_char(c: &char) -> bool { use finl_unicode::categories::{CharacterCategories, MinorCategory}; let category = c.get_minor_category(); + // Preserve Japanese dakuten/handakuten so kana aren't turned into their unvoiced forms. + let preserve_japanese = matches!(*c, '\u{3099}' | '\u{309A}'); let nok = matches!( category, MinorCategory::Cn | MinorCategory::Co | MinorCategory::Mn - ); + ) && !preserve_japanese; !(nok || BANNED.deref().deref().contains(*c)) } @@ -1275,6 +1277,15 @@ mod tests { ); } + #[test] + #[serial] + fn japanese_diacritics_preserved() { + assert_eq!("パピプペポ", "パピプペポ".censor()); + assert_eq!("バビブベボ", "バビブベボ".censor()); + assert_eq!("ぱぴぷぺぽ", "ぱぴぷぺぽ".censor()); + assert_eq!("ばびぶべぼ", "ばびぶべぼ".censor()); + } + #[test] #[serial] fn bandwidth() {