From df8191362e50c68dfb0b677c806085cf931dedcf Mon Sep 17 00:00:00 2001 From: Martin Habovstiak Date: Sun, 10 Oct 2021 20:04:06 +0200 Subject: [PATCH] Implement conversion using an iterator This adds `BytesToPassPhraseIter` which implements `Iterator` and related traits. This should make it possible to perform the conversion without allocations. Part of #6 --- src/lib.rs | 206 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 199 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d178153..092dedb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,6 +20,10 @@ //! println!("Passphrase: {}", rust_niceware::generate_passphrase(8).unwrap().join(" ")); //! ``` +use std::fmt; +use std::convert::TryFrom; +use std::iter::FusedIterator; + pub use error::{UnknownWordError, RNGError}; mod error; @@ -36,14 +40,170 @@ const MAX_WORD_LEN: usize = 28; /// /// This function panics if the length of slice is odd. pub fn bytes_to_pass_phrase(bytes: &[u8]) -> Vec<&'static str> { + bytes_to_pass_phrase_iter_from_slice(bytes) + .collect() +} + +fn byte_pair_to_word(pair: [u8; 2]) -> &'static str { + let word_index = usize::from(pair[0]) * 256 + usize::from(pair[1]); + words::ALL_WORDS[word_index] +} + +/// Represents an iterator of words being converted from bytes. +/// +/// This is created by `bytes_to_pass_phrase_iter*` functions and enables you to avoid allocating a +/// `Vec` if you don't need it. +// this is basically just a glorified `std::iter::Map` :) +#[derive(Clone, Debug)] +pub struct BytesToPassphraseIter> { + iter: T, +} + +impl From for BytesToPassphraseIter where T: Iterator { + fn from(iter: T) -> Self { + BytesToPassphraseIter { + iter, + } + } +} + + +impl BytesToPassphraseIter where T: Iterator + Clone { + /// Creates a string with words separated by the given separator. + /// + /// This function pre-allocates [`String`] so that writing is fast. + pub fn join(self, separator: impl fmt::Display) -> String { + use fmt::Write; + + struct Counter(usize); + impl fmt::Write for Counter { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.0 = self.0.saturating_add(s.len()); + Ok(()) + } + } + + let mut counter = Counter(0); + write!(&mut counter, "{}", separator).expect("counter never fails"); + let mut string = String::with_capacity(self.clone().bytes_hint(counter.0)); + self.write(&mut string, separator).expect("string allocation never fails"); + + string + } +} + +impl BytesToPassphraseIter where T: Iterator { + /// Calculates the number of bytes occupied by string representation if separated by a + /// separator of given length. + /// + /// This can be used as a size hint for [`String`] or similar types when implementing custom + /// formatting. + pub fn bytes_hint(mut self, separator_len: usize) -> usize { + match self.next() { + Some(word) => { + let mut size = word.len(); + for word in self { + size = size.saturating_add(word.len()).saturating_add(separator_len); + } + size + }, + None => 0, + } + } + + /// Write the words into the `writer` separated by the `separator`. + /// + /// This can be used with generic writers avoiding allocations. Note that while this takes + /// `writer` by value you can still pass a mutable reference. + pub fn write(mut self, mut writer: W, separator: impl fmt::Display) -> fmt::Result { + if let Some(word) = self.next() { + writer.write_str(word)?; + for word in self { + write!(writer, "{}{}", separator, word)?; + } + } + Ok(()) + } +} + +impl Iterator for BytesToPassphraseIter where T: Iterator { + type Item = &'static str; + + fn next(&mut self) -> Option { + self.iter.next().map(byte_pair_to_word) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + // Overriding this can lead to faster code despite `TrustedLen` not being implemented. + fn collect(self) -> B where B: std::iter::FromIterator { + self.iter.map(byte_pair_to_word).collect() + } +} + +impl DoubleEndedIterator for BytesToPassphraseIter where T: Iterator + DoubleEndedIterator { + fn next_back(&mut self) -> Option { + self.iter.next_back().map(byte_pair_to_word) + } +} + +/// Prints the words separated by space or comma and a space (alternative representation). +/// +/// As should be obvious from signature this performs a clone of the iterator. +/// This is OK for things like slice iterators because those are cheap but be careful when using +/// something else as it may affect performance. +/// +/// Note: if you intend to create a string `join(" ")` is faster than `to_string()`. +impl fmt::Display for BytesToPassphraseIter where T: Iterator + Clone { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let separator = match f.alternate() { + false => " ", + true => ", ", + }; + + self.clone().write(f, separator) + } +} + +// correct because we just forward size hint +impl ExactSizeIterator for BytesToPassphraseIter where T: Iterator + ExactSizeIterator {} +impl FusedIterator for BytesToPassphraseIter where T: Iterator + FusedIterator {} + +// Ideally we would implement TrustedLen as well but that one is nightly :( +// Hopefully overriding collect can help too. + +/// Convert iterator of byte pairs to iterator of words. +/// +/// This is similar to [`bytes_to_pass_phrase`] but it operates on iterator instead of slice/vec +/// so it may allow more efficient processing (e.g. avoiding allocations). +/// +/// The returned iterator has a bunch of convenience functions that should help fast and easy +/// processing. +pub fn bytes_to_pass_phrase_iter(bytes: I) -> BytesToPassphraseIter where I: IntoIterator { + BytesToPassphraseIter { + iter: bytes.into_iter(), + } +} + +/// Convert slice of bytes to iterator of words. +/// +/// This is a convenience function that converts slice of bytes to iterator of pairs and passes it +/// to [`bytes_to_pass_phrase_iter`]. +/// +/// ## Panics +/// +/// This function panics if the length of slice is odd. +pub fn bytes_to_pass_phrase_iter_from_slice(bytes: &[u8]) -> BytesToPassphraseIter + ExactSizeIterator + Clone + FusedIterator + Send + Sync> { if bytes.len() % 2 != 0 { panic!("only even-sized byte arrays are supported") } - bytes.chunks_exact(2).map(|pair| { - let word_index = usize::from(pair[0]) * 256 + usize::from(pair[1]); - words::ALL_WORDS[word_index] - }) - .collect() + let iter = bytes + .chunks_exact(2) + .map(|pair| *<&[u8; 2]>::try_from(pair).expect("chunks_exact returned invalid slice")); + + bytes_to_pass_phrase_iter(iter) } /// Decode words into bytes @@ -85,7 +245,7 @@ pub fn passphrase_to_bytes(words: &[&str]) -> Result, UnknownWordError> /// Convenience funtion to generate a passphrase using OS RNG /// -/// This is a shorthand for generating random bytes, and feeding them to `bytes_to_passphrase`. +/// This is a shorthand for generating random bytes, and feeding them to [`bytes_to_passphrase`]. /// /// **Important**: As opposed to the original implementation this takes number of words instead of /// number of bytes. This should be more natural and avoids panics. @@ -112,7 +272,7 @@ pub fn generate_passphrase(num_words: u16) -> Result, RNGError #[cfg(test)] mod tests { - use crate::{bytes_to_pass_phrase, generate_passphrase, passphrase_to_bytes}; + use crate::{bytes_to_pass_phrase, bytes_to_pass_phrase_iter_from_slice, generate_passphrase, passphrase_to_bytes}; // generate_passphrase @@ -198,4 +358,36 @@ mod tests { // makes sure assumption holds assert!(crate::words::ALL_WORDS.iter().copied().all(str::is_ascii)); } + + #[test] + fn test_passphrase_iter_empty() { + let bytes = &[]; + + assert_eq!(bytes_to_pass_phrase_iter_from_slice(bytes).to_string(), ""); + assert_eq!(format!("{:#}", bytes_to_pass_phrase_iter_from_slice(bytes)), ""); + } + + #[test] + fn test_passphrase_iter_one() { + let bytes = &[0, 0]; + + assert_eq!(bytes_to_pass_phrase_iter_from_slice(bytes).to_string(), "a"); + assert_eq!(format!("{:#}", bytes_to_pass_phrase_iter_from_slice(bytes)), "a"); + } + + #[test] + fn test_passphrase_iter_two() { + let bytes = &[0, 0, 255, 255]; + + assert_eq!(bytes_to_pass_phrase_iter_from_slice(bytes).to_string(), "a zyzzyva"); + assert_eq!(format!("{:#}", bytes_to_pass_phrase_iter_from_slice(bytes)), "a, zyzzyva"); + } + + #[test] + fn test_passphrase_iter_long() { + let bytes = &[0, 0, 17, 212, 12, 140, 90, 246, 46, 83, 254, 60, 54, 169, 255, 255]; + + assert_eq!(bytes_to_pass_phrase_iter_from_slice(bytes).to_string(), "a bioengineering balloted gobbled creneled written depriving zyzzyva"); + assert_eq!(format!("{:#}", bytes_to_pass_phrase_iter_from_slice(bytes)), "a, bioengineering, balloted, gobbled, creneled, written, depriving, zyzzyva"); + } }