diff --git a/Cargo.lock b/Cargo.lock index 64b1817..697fce6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,24 +75,24 @@ checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "clap" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" dependencies = [ "anstream", "anstyle", @@ -102,9 +102,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "colorchoice" @@ -218,15 +218,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "format_num" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14ac05eb8d2eb4ed1eeff847911deae077b0b53332465de9d6a26b0ea9961bc8" -dependencies = [ - "regex", -] - [[package]] name = "getrandom" version = "0.2.16" @@ -310,7 +301,6 @@ dependencies = [ "clap", "crossterm", "dirs", - "format_num", "is_executable", "regex", "serde", @@ -539,9 +529,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.15.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "strsim" @@ -551,9 +541,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.101" +version = "2.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index baf0d75..dfe8018 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,10 +12,9 @@ repository = "https://github.com/ReagentX/Logria" version = "0.0.0" [dependencies] -clap = { version = "=4.5.39", features = ["cargo"] } +clap = { version = "=4.5.40", features = ["cargo"] } crossterm = "=0.29.0" dirs = "=6.0.0" -format_num = "=0.1.0" is_executable = "=1.0.4" regex = "=1.11.1" serde = { version = "=1.0.219", features = ["derive"] } diff --git a/src/communication/handlers/highlight.rs b/src/communication/handlers/highlight.rs index bd34967..9a4100d 100644 --- a/src/communication/handlers/highlight.rs +++ b/src/communication/handlers/highlight.rs @@ -6,13 +6,15 @@ use regex::bytes::Regex; use super::{handler::Handler, processor::ProcessorMethods}; use crate::{ communication::{ - handlers::user_input::UserInputHandler, input::InputType::Normal, reader::MainWindow, + handlers::{processor::update_progress, user_input::UserInputHandler}, + input::InputType::Normal, + reader::MainWindow, }, constants::cli::{ cli_chars::{COMMAND_CHAR, HIGHLIGHT_CHAR, NORMAL_STR, TOGGLE_HIGHLIGHT_CHAR}, patterns::ANSI_COLOR_PATTERN, }, - ui::scroll::{self, update_current_match_index, ScrollState}, + ui::scroll::{self, ScrollState, update_current_match_index}, }; pub struct HighlightHandler { @@ -73,21 +75,26 @@ impl HighlightHandler { impl ProcessorMethods for HighlightHandler { /// Process matches, loading the buffer of indexes to matched messages in the main buffer fn process_matches(&mut self, window: &mut MainWindow) -> Result<()> { - // TODO: Possibly async? Possibly loading indicator for large jobs? + let mut wrote_progress = false; if self.current_pattern.is_some() { // Start from where we left off to the most recent message - let buf_range = (window.config.last_index_regexed, window.messages().len()); + let start = window.config.last_index_regexed; + let end = window.messages().len(); - // Iterate "forever", skipping to the start and taking up till end-start - // TODO: Something to indicate progress - for index in (0..).skip(buf_range.0).take(buf_range.1 - buf_range.0) { + for index in start..end { if self.test(&window.messages()[index]) { window.config.matched_rows.push(index); } + // Update the user interface with the current state + wrote_progress = update_progress(window, start, end, index)?; + // Update the last spot so we know where to start next time window.config.last_index_regexed = index + 1; } + if wrote_progress { + window.write_status()?; + } } Ok(()) } @@ -95,12 +102,7 @@ impl ProcessorMethods for HighlightHandler { /// Return the app to a normal input state fn return_to_normal(&mut self, window: &mut MainWindow) -> Result<()> { self.clear_matches(window)?; - // Handle reset of scroll state window.config.current_matched_row = 0; - if matches!(window.config.scroll_state, ScrollState::Centered) { - window.config.scroll_state = ScrollState::Free; - } - window.config.current_status = None; window.update_input_type(Normal)?; window.set_cli_cursor(None)?; @@ -116,7 +118,9 @@ impl ProcessorMethods for HighlightHandler { window.config.matched_rows.clear(); window.config.last_index_regexed = 0; window.config.highlight_match = false; - window.config.scroll_state = ScrollState::Free; + if matches!(window.config.scroll_state, ScrollState::Centered) { + window.config.scroll_state = ScrollState::Free; + } window.reset_command_line()?; Ok(()) } diff --git a/src/communication/handlers/parser.rs b/src/communication/handlers/parser.rs index 71e8116..58d35e8 100644 --- a/src/communication/handlers/parser.rs +++ b/src/communication/handlers/parser.rs @@ -6,7 +6,9 @@ use regex::Regex; use crate::{ communication::{ handlers::{ - handler::Handler, multiple_choice::MultipleChoiceHandler, processor::ProcessorMethods, + handler::Handler, + multiple_choice::MultipleChoiceHandler, + processor::{ProcessorMethods, update_progress}, }, input::{InputType::Normal, StreamType}, reader::MainWindow, @@ -219,21 +221,14 @@ impl ProcessorMethods for ParserHandler { fn process_matches(&mut self, window: &mut MainWindow) -> Result<()> { // Only process if the parser is set up properly if let ParserState::Full = window.config.parser_state { - // TODO: Possibly async? Possibly loading indicator for large jobs? if self.parser.is_some() { + let mut wrote_progress = false; // Start from where we left off to the most recent message - let buf_range = ( - window.config.last_index_processed, - window.previous_messages().len(), - ); - - // Iterate "forever", skipping to the start and taking up till end-start - // TODO: Something to indicate progress - let last = buf_range.1.checked_sub(1).unwrap_or(buf_range.0); - for index in (0..) - .skip(buf_range.0) - .take(buf_range.1.checked_sub(buf_range.0).unwrap_or(buf_range.0)) - { + let start = window.config.last_index_processed; + let end = window.previous_messages().len(); + + let last = end.checked_sub(1).unwrap_or(end); + for index in start..end { if window.config.aggregation_enabled { match self.aggregate_handle( &window.previous_messages()[index], @@ -260,9 +255,16 @@ impl ProcessorMethods for ParserHandler { ) { window.config.auxiliary_messages.push(message); } + + // Update the user interface with the current state + wrote_progress = update_progress(window, start, end, index)?; + // Update the last spot so we know where to start next time window.config.last_index_processed = index + 1; } + if wrote_progress { + window.write_status()?; + } } } Ok(()) @@ -394,6 +396,11 @@ impl Handler for ParserHandler { if window.config.aggregation_enabled { window.config.current_status = Some(self.status.clone()); window.config.aggregation_enabled = false; + if let Some(parser) = &mut self.parser { + parser.aggregator_map.values_mut().for_each(|agg| { + agg.reset(); + }); + } } else { let new_status = self.status.clone(); window.config.current_status = Some(new_status.replace( @@ -531,13 +538,13 @@ mod parse_tests { "Sum", " Total: 5,850", "Count", - " 10\u{1b}[0m: 1 (1%)", - " 100\u{1b}[0m: 1 (1%)", - " 101\u{1b}[0m: 1 (1%)", - " 102\u{1b}[0m: 1 (1%)", - " 103\u{1b}[0m: 1 (1%)", + " 95\u{1b}[0m: 1 (1%)", + " 96\u{1b}[0m: 1 (1%)", + " 97\u{1b}[0m: 1 (1%)", + " 98\u{1b}[0m: 1 (1%)", + " 99\u{1b}[0m: 1 (1%)", "Mode", - " 10\u{1b}[0m: 1 (1%)", + " 99\u{1b}[0m: 1 (1%)", ] ); } diff --git a/src/communication/handlers/processor.rs b/src/communication/handlers/processor.rs index b999443..f638ac6 100644 --- a/src/communication/handlers/processor.rs +++ b/src/communication/handlers/processor.rs @@ -7,3 +7,33 @@ pub trait ProcessorMethods { fn clear_matches(&mut self, window: &mut MainWindow) -> Result<()>; fn process_matches(&mut self, window: &mut MainWindow) -> Result<()>; } + +/// The step size for progress indicator updates. +const STEP: usize = 999; +/// Threshold for when to print progress updates in the aggregator. +const THRESHOLD: usize = 25_000; + +#[inline(always)] +pub fn update_progress( + window: &mut MainWindow, + start: usize, + end: usize, + index: usize, +) -> Result { + // Update the user interface with the current state + if end - start > THRESHOLD && (index % STEP == 0 || index == end - 1) { + let word = if index == end - 1 { + "Processed" + } else { + "Processing" + }; + window.write_to_command_line(&format!( + "{word} messages: {}/{} ({}%)", + (index + 1) - start, + end - start, + ((index + 1 - start) * 100) / (end - start) + ))?; + return Ok(true); + } + Ok(false) +} diff --git a/src/communication/handlers/regex.rs b/src/communication/handlers/regex.rs index fd5a207..b9ac7e0 100644 --- a/src/communication/handlers/regex.rs +++ b/src/communication/handlers/regex.rs @@ -6,7 +6,9 @@ use regex::bytes::Regex; use super::{handler::Handler, processor::ProcessorMethods}; use crate::{ communication::{ - handlers::user_input::UserInputHandler, input::InputType::Normal, reader::MainWindow, + handlers::{processor::update_progress, user_input::UserInputHandler}, + input::InputType::Normal, + reader::MainWindow, }, constants::cli::{ cli_chars::{COMMAND_CHAR, NORMAL_STR, REGEX_CHAR, TOGGLE_HIGHLIGHT_CHAR}, @@ -64,21 +66,27 @@ impl RegexHandler { impl ProcessorMethods for RegexHandler { /// Process matches, loading the buffer of indexes to matched messages in the main buffer fn process_matches(&mut self, window: &mut MainWindow) -> Result<()> { - // TODO: Possibly async? Possibly loading indicator for large jobs? + let mut wrote_progress = false; if self.current_pattern.is_some() { // Start from where we left off to the most recent message - let buf_range = (window.config.last_index_regexed, window.messages().len()); + // Start from where we left off to the most recent message + let start = window.config.last_index_regexed; + let end = window.messages().len(); - // Iterate "forever", skipping to the start and taking up till end-start - // TODO: Something to indicate progress - for index in (0..).skip(buf_range.0).take(buf_range.1 - buf_range.0) { + for index in start..end { if self.test(&window.messages()[index]) { window.config.matched_rows.push(index); } + // Update the user interface with the current state + wrote_progress = update_progress(window, start, end, index)?; + // Update the last spot so we know where to start next time window.config.last_index_regexed = index + 1; } + if wrote_progress { + window.write_status()?; + } } Ok(()) } diff --git a/src/extensions/parser.rs b/src/extensions/parser.rs index 05f3482..7913366 100644 --- a/src/extensions/parser.rs +++ b/src/extensions/parser.rs @@ -163,7 +163,7 @@ impl Parser { } AggregationMethod::Mode => { self.aggregator_map - .insert(method_name.to_string(), Box::new(Counter::new(Some(1)))); + .insert(method_name.to_string(), Box::new(Counter::mean())); } AggregationMethod::Sum => { self.aggregator_map @@ -171,7 +171,7 @@ impl Parser { } AggregationMethod::Count => { self.aggregator_map - .insert(method_name.to_string(), Box::new(Counter::new(None))); + .insert(method_name.to_string(), Box::new(Counter::new())); } AggregationMethod::Date(format) => { self.aggregator_map.insert( diff --git a/src/util/aggregators/aggregator.rs b/src/util/aggregators/aggregator.rs index 4069daa..4a3a40c 100644 --- a/src/util/aggregators/aggregator.rs +++ b/src/util/aggregators/aggregator.rs @@ -34,11 +34,54 @@ pub fn extract_number(message: &str) -> Option { result.parse::().ok() } +/// Formats a float as a string with commas separating thousands +pub fn format_float(n: f64) -> String { + let int_part = n.trunc() as i64; + let abs_str = int_part.abs().to_string(); + let mut result = String::new(); + + let chars: Vec = abs_str.chars().collect(); + let len = chars.len(); + + for (i, c) in chars.iter().enumerate() { + if i != 0 && (len - i) % 3 == 0 { + result.push(','); + } + result.push(*c); + } + + if int_part < 0 { + format!("-{}", result) + } else { + result + } +} + +/// Formats an integer as a string with commas separating thousands +pub fn format_int(n: usize) -> String { + let digits = n.to_string(); + let mut result = String::new(); + + let chars: Vec = digits.chars().collect(); + let len = chars.len(); + + for (i, c) in chars.iter().enumerate() { + if i != 0 && (len - i) % 3 == 0 { + result.push(','); + } + result.push(*c); + } + + result +} + pub trait Aggregator { /// Insert an item into the aggregator, updating it's internal tracking data fn update(&mut self, message: &str) -> Result<(), LogriaError>; /// Expensive function that generates messages to render fn messages(&self, n: &usize) -> Vec; + /// Reset the aggregator, clearing all internal data + fn reset(&mut self); } #[derive(Eq, PartialEq, Serialize, Deserialize, Debug)] @@ -154,3 +197,59 @@ mod extract_tests { assert!(result.unwrap() - 1337. == 0.); } } + +#[cfg(test)] +mod format_float_tests { + use crate::util::aggregators::aggregator::format_float; + + #[test] + fn test_basic_positive() { + assert_eq!(format_float(1234.56), "1,234"); + assert_eq!(format_float(1000000.0), "1,000,000"); + assert_eq!(format_float(0.0), "0"); + } + + #[test] + fn test_basic_negative() { + assert_eq!(format_float(-1234.56), "-1,234"); + assert_eq!(format_float(-1000000.99), "-1,000,000"); + } + + #[test] + fn test_truncation() { + assert_eq!(format_float(999.999), "999"); + assert_eq!(format_float(-999.999), "-999"); + } + + #[test] + fn test_small_numbers() { + assert_eq!(format_float(9.99), "9"); + assert_eq!(format_float(-9.99), "-9"); + assert_eq!(format_float(0.99), "0"); + } + + #[test] + fn test_large_number() { + assert_eq!(format_float(1234567890.123), "1,234,567,890"); + } +} + +#[cfg(test)] +mod tests { + use crate::util::aggregators::aggregator::format_int; + + #[test] + fn test_format_usize_basic() { + assert_eq!(format_int(0), "0"); + assert_eq!(format_int(12), "12"); + assert_eq!(format_int(123), "123"); + } + + #[test] + fn test_format_usize_commas() { + assert_eq!(format_int(1234), "1,234"); + assert_eq!(format_int(12345), "12,345"); + assert_eq!(format_int(1234567), "1,234,567"); + assert_eq!(format_int(1234567890), "1,234,567,890"); + } +} diff --git a/src/util/aggregators/counter.rs b/src/util/aggregators/counter.rs index dc27e4c..105c3e0 100644 --- a/src/util/aggregators/counter.rs +++ b/src/util/aggregators/counter.rs @@ -1,160 +1,156 @@ -use std::collections::{BTreeSet, HashMap}; +use std::{ + cmp::Reverse, + collections::{BinaryHeap, HashMap}, +}; use crate::{ constants::cli::colors::RESET_COLOR, - util::{aggregators::aggregator::Aggregator, error::LogriaError}, + util::{ + aggregators::aggregator::{Aggregator, format_int}, + error::LogriaError, + }, }; -use format_num::format_num; -/// Counter struct inspired by Python's stdlib Counter class +/// A counter for tracking occurrences of messages, similar to Python's `Counter`. pub struct Counter { - state: HashMap, - order: HashMap>, - num_to_get: Option, + /// Map of message strings to their occurrence counts. + counts: HashMap, + /// Total number of messages processed. + total_count: u64, + /// Optional limit on the number of top messages to display. + frozen_n: Option, } impl Aggregator for Counter { + /// Implements [`Aggregator::update`]: increments the count for the given message. fn update(&mut self, message: &str) -> Result<(), LogriaError> { self.increment(message); Ok(()) } + /// Implements [`Aggregator::messages`]: returns the top `n` messages. fn messages(&self, n: &usize) -> Vec { - // Place to store the result - let num = &self.num_to_get.unwrap_or(*n); - let mut result = Vec::with_capacity(*num); - if *num == 0_usize { - return result; - } - - // Keep track of how many items we have added - let mut total_added = 0; + self.get_top_messages(*n) + } - // Get the keys sorted from highest to lowest - let mut counts: Vec = self - .order - .keys() - .map(std::borrow::ToOwned::to_owned) - .collect(); - counts.sort_unstable(); - - // Get the value under each key - for count in counts.iter().rev() { - let items = self.order.get(count).unwrap(); - for item in items { - let total = self.total() as f64; - result.push(format!( - " {}{}: {} ({:.0}%)", - item.trim(), - RESET_COLOR, - format_num!(",d", *count as f64), - (*count as f64 / total) * 100_f64 - )); - total_added += 1; - if total_added == *num { - return result; - } - } - } - result + /// Implements [`Aggregator::reset`]: clears all message counts. + fn reset(&mut self) { + self.counts.clear(); + self.total_count = 0; } } impl Counter { - pub fn new(num_to_get: Option) -> Counter { + /// Creates a new `Counter` with no messages counted and no limits. + pub fn new() -> Counter { + Counter { + counts: HashMap::new(), + total_count: 0, + frozen_n: None, + } + } + + /// Creates a new `Counter` configured to return only the top message. + pub fn mean() -> Counter { Counter { - state: HashMap::new(), - order: HashMap::new(), - num_to_get, + counts: HashMap::new(), + total_count: 0, + frozen_n: Some(1), } } - /// Determine the total number of items in the Counter - fn total(&self) -> u64 { - self.state.values().sum() + /// Increments the count for `item`, adding it if not already present. + pub fn increment(&mut self, item: &str) { + let count = self.counts.entry(item.to_string()).or_insert(0); + *count += 1; + self.total_count += 1; } - /// Remove an item from the internal order - fn purge_from_order(&mut self, item: &str, count: &u64) { - if let Some(order) = self.order.get_mut(count) { - // If there was data there, remove the existing item - if !order.is_empty() { - order.remove(item); - if order.is_empty() { - self.order.remove(count); - } + /// Decrements the count for `item`, removing it if its count reaches zero. + pub fn decrement(&mut self, item: &str) { + if let Some(count) = self.counts.get_mut(item) { + if *count > 1 { + *count -= 1; + self.total_count -= 1; + } else { + self.counts.remove(item); + self.total_count -= 1; } } } - /// Remove an item from the internal state - fn purge_from_state(&mut self, item: &str) { - self.state.remove(item); + /// Removes `item` entirely from the counter, subtracting its count from the total. + pub fn delete(&mut self, item: &str) { + if let Some(count) = self.counts.remove(item) { + self.total_count -= count; + } } - /// Update the internal item order `HashMap` - fn update_order(&mut self, item: &str, old_count: &u64, new_count: &u64) { - self.purge_from_order(item, old_count); - if let Some(v) = self.order.get_mut(new_count) { - v.insert(item.to_owned()); - } else { - let mut set = BTreeSet::new(); - set.insert(item.to_owned()); - self.order.insert(*new_count, set); + /// Retrieves the top `n` messages, respecting `frozen_n` if set. + fn get_top_messages(&self, n: usize) -> Vec { + let actual_num = self.frozen_n.unwrap_or(n).min(self.counts.len()); + + if actual_num == 0 || self.total_count == 0 { + return Vec::new(); } - } - /// Increment an item into the counter, creating if it does not exist - fn increment(&mut self, item: &str) { - let old_count = self.state.get(item).unwrap_or(&0).to_owned(); - let new_count = old_count.checked_add(1).unwrap_or(old_count); - self.state.insert(item.to_owned(), new_count); - self.update_order(item, &old_count, &new_count); + self.compute_top_messages(actual_num) } - /// Reduce an item from the counter, removing if it becomes 0 - fn decrement(&mut self, item: &str) { - let old_count = self.state.get(item).unwrap_or(&0).to_owned(); - let new_count = old_count.checked_sub(1); - match new_count { - Some(count) => { - if count > 0 { - self.state.insert(item.to_owned(), count); - self.update_order(item, &old_count, &count); - } else { - self.delete(item); - } - } - None => { - self.delete(item); + /// Computes the top `n` messages sorted by count (descending) and message text. + fn compute_top_messages(&self, n: usize) -> Vec { + // A min-heap that only ever holds the top n entries. + let total = self.total_count as f64; + let mut heap = BinaryHeap::with_capacity(n + 1); + + for (item, &count) in &self.counts { + // Reverse so that smallest count is at the top—and will get popped when > n + heap.push(Reverse((count, item.as_str()))); + if heap.len() > n { + heap.pop(); } } - } - /// Remove an item from the counter completely - fn delete(&mut self, item: &str) { - let count = self.state.get(item).unwrap().to_owned(); - self.purge_from_order(item, &count); - self.purge_from_state(item); + // Drain heap into a Vec, sort descending by count then key + let mut top: Vec<(u64, &str)> = heap + .into_iter() + .map(|Reverse((count, item))| (count, item)) + .collect(); + + top.sort_unstable_by(|(ca, a), (cb, b)| cb.cmp(ca).then_with(|| a.cmp(b))); + + // Format output + top.into_iter() + .map(|(count, item)| { + let pct = (count as f64 / total) * 100.0; + format!( + " {}{}: {} ({:.0}%)", + item.trim(), + RESET_COLOR, + format_int(count as usize), + pct + ) + }) + .collect() } } #[cfg(test)] mod behavior_tests { use crate::util::aggregators::{aggregator::Aggregator, counter::Counter}; - use std::collections::{BTreeSet, HashMap}; + use std::collections::HashMap; static A: &str = "a"; static B: &str = "b"; #[test] fn can_construct_counter() { - Counter::new(None); + Counter::new(); } #[test] fn can_count_int() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.increment("1"); c.increment("1"); c.increment("1"); @@ -165,21 +161,14 @@ mod behavior_tests { expected_count.insert("1".to_string(), 3); expected_count.insert("2".to_string(), 2); - let mut expected_order: HashMap> = HashMap::new(); - let mut a = BTreeSet::new(); - let mut b = BTreeSet::new(); - a.insert("1".to_string()); - b.insert("2".to_string()); - expected_order.insert(3, a); - expected_order.insert(2, b); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get("1"), Some(&3)); + assert_eq!(c.counts.get("2"), Some(&2)); + assert_eq!(c.total_count, 5); } #[test] fn can_count() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -190,21 +179,14 @@ mod behavior_tests { expected_count.insert(A.to_owned(), 3); expected_count.insert(B.to_owned(), 2); - let mut expected_order: HashMap> = HashMap::new(); - let mut a = BTreeSet::new(); - let mut b = BTreeSet::new(); - a.insert(A.to_owned()); - b.insert(B.to_owned()); - expected_order.insert(3, a); - expected_order.insert(2, b); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get(A), Some(&3)); + assert_eq!(c.counts.get(B), Some(&2)); + assert_eq!(c.total_count, 5); } #[test] fn can_sum() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.update(A).unwrap(); c.update(A).unwrap(); c.update(A).unwrap(); @@ -215,12 +197,12 @@ mod behavior_tests { expected.insert(A.to_owned(), 3); expected.insert(B.to_owned(), 2); - assert_eq!(c.total(), 5); + assert_eq!(c.total_count, 5); } #[test] fn can_decrement() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -232,19 +214,14 @@ mod behavior_tests { expected_count.insert(A.to_owned(), 2); expected_count.insert(B.to_owned(), 2); - let mut expected_order: HashMap> = HashMap::new(); - let mut a = BTreeSet::new(); - a.insert(A.to_owned()); - a.insert(B.to_owned()); - expected_order.insert(2, a); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get(A), Some(&2)); + assert_eq!(c.counts.get(B), Some(&2)); + assert_eq!(c.total_count, 4); } #[test] fn can_decrement_auto_remove() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(B); c.increment(B); @@ -253,18 +230,14 @@ mod behavior_tests { let mut expected_count = HashMap::new(); expected_count.insert(B.to_owned(), 2); - let mut expected_order: HashMap> = HashMap::new(); - let mut b = BTreeSet::new(); - b.insert(B.to_owned()); - expected_order.insert(2, b); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get(B), Some(&2)); + assert_eq!(c.counts.get(A), None); + assert_eq!(c.total_count, 2); } #[test] fn can_delete() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -272,16 +245,9 @@ mod behavior_tests { c.increment(B); c.delete(A); - let mut expected_count = HashMap::new(); - expected_count.insert(B.to_owned(), 2); - - let mut expected_order: HashMap> = HashMap::new(); - let mut b = BTreeSet::new(); - b.insert(B.to_owned()); - expected_order.insert(2, b); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get(B), Some(&2)); + assert_eq!(c.counts.get(A), None); + assert_eq!(c.total_count, 2); } } @@ -296,7 +262,7 @@ mod message_tests { #[test] fn can_get_top_0() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -314,7 +280,8 @@ mod message_tests { #[test] fn can_get_top_1() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); + c.increment(A); c.increment(A); c.increment(A); c.increment(A); @@ -325,14 +292,14 @@ mod message_tests { c.increment(C); c.increment(D); - let expected = vec![String::from(" a\u{1b}[0m: 3 (33%)")]; + let expected = vec![String::from(" a\u{1b}[0m: 4 (40%)")]; assert_eq!(c.messages(&1), expected); } #[test] fn can_get_top_2() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -353,7 +320,7 @@ mod message_tests { #[test] fn can_get_top_3() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -375,7 +342,7 @@ mod message_tests { #[test] fn can_get_top_4() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); diff --git a/src/util/aggregators/date.rs b/src/util/aggregators/date.rs index ca5d932..6d1019e 100644 --- a/src/util/aggregators/date.rs +++ b/src/util/aggregators/date.rs @@ -1,44 +1,57 @@ use std::cmp::{max, min}; -use crate::util::{aggregators::aggregator::Aggregator, error::LogriaError}; -use format_num::format_num; -use time::{Date as Dt, PrimitiveDateTime as DateTime, Time as Tm, format_description::parse}; +use time::{ + Date as Dt, PrimitiveDateTime as DateTime, Time as Tm, + format_description::{OwnedFormatItem, parse_owned}, +}; +use crate::util::{ + aggregators::aggregator::{Aggregator, format_int}, + error::LogriaError, +}; + +#[derive(Clone, Debug)] pub enum DateParserType { Date, Time, DateTime, } +/// Aggregator for tracking temporal data: records earliest and latest timestamps, +/// counts entries, and computes rate over a chosen unit (day, hour, etc.). pub struct Date { - format: String, + /// Parsed format items for the date/time format. + format: Option, + /// The minimum timestamp observed. earliest: DateTime, + /// The maximum timestamp observed. latest: DateTime, + /// Number of parsed timestamps. count: i64, - rate: i64, - unit: String, + /// Specifies whether to parse as Date, Time, or DateTime. parser_type: DateParserType, } impl Aggregator for Date { + /// Parses and ingests a new timestamp from `message`, updating internal state. fn update(&mut self, message: &str) -> Result<(), LogriaError> { - match parse(&self.format) { - Ok(parser) => match self.parser_type { - DateParserType::Date => match Dt::parse(message, &parser) { + match &self.format { + Some(format) => match self.parser_type { + DateParserType::Date => match Dt::parse(message, format) { Ok(date) => { self.upsert(DateTime::new(date, Tm::MIDNIGHT)); Ok(()) } Err(why) => Err(LogriaError::CannotParseDate(why.to_string())), }, - DateParserType::Time => match Tm::parse(message, &parser) { + DateParserType::Time => match Tm::parse(message, format) { Ok(time) => { self.upsert(DateTime::new(Dt::MIN, time)); Ok(()) } Err(why) => Err(LogriaError::CannotParseDate(why.to_string())), }, - DateParserType::DateTime => match DateTime::parse(message, &parser) { + DateParserType::DateTime => match DateTime::parse(message, format) { Ok(date) => { self.upsert(date); Ok(()) @@ -46,18 +59,18 @@ impl Aggregator for Date { Err(why) => Err(LogriaError::CannotParseDate(why.to_string())), }, }, - Err(why) => Err(LogriaError::CannotParseDate(why.to_string())), + None => Err(LogriaError::CannotParseDate( + "No date format string specified!".to_string(), + )), } } + /// Returns vector of formatted output lines: rate, count, earliest, and latest. fn messages(&self, _: &usize) -> Vec { + let (rate, unit) = self.determine_rate(); let mut out_v = vec![ - format!( - " Rate: {} {}", - format_num!(",.0f", self.rate as u32), - self.unit - ), - format!(" Count: {}", format_num!(",d", self.count as u32)), + format!(" Rate: {} {}", format_int(rate as usize), unit), + format!(" Count: {}", format_int(self.count as usize)), ]; match self.parser_type { DateParserType::Date => { @@ -75,53 +88,55 @@ impl Aggregator for Date { } out_v } + + /// Resets the aggregator to its initial state, preserving `format` and `parser_type`. + fn reset(&mut self) { + self.count = 0; + let (earliest, latest) = Self::default_datetimes(&self.parser_type); + self.earliest = earliest; + self.latest = latest; + } } impl Date { - pub fn new(format: &str, format_type: DateParserType) -> Self { - match format_type { - // If we only care about the date, set the time to midnight - DateParserType::Date => Date { - format: format.to_owned(), - earliest: DateTime::new(Dt::MAX, Tm::MIDNIGHT), - latest: DateTime::new(Dt::MIN, Tm::MIDNIGHT), - count: 0, - rate: 0, - unit: String::new(), - parser_type: DateParserType::Date, - }, - // If we only care about the time, use the same date and the latest/earliest possible times - DateParserType::Time => Date { - format: format.to_owned(), - earliest: DateTime::new(Dt::MIN, Tm::from_hms(23, 59, 59).unwrap()), - latest: DateTime::new(Dt::MIN, Tm::MIDNIGHT), - count: 0, - rate: 0, - unit: String::new(), - parser_type: DateParserType::Time, - }, - DateParserType::DateTime => Date { - format: format.to_owned(), - earliest: DateTime::new(Dt::MAX, Tm::MIDNIGHT), - latest: DateTime::new(Dt::MIN, Tm::MIDNIGHT), - count: 0, - rate: 0, - unit: String::new(), - parser_type: DateParserType::DateTime, - }, + /// Constructs a new `Date` aggregator with the given `format` and `parser_type`. + pub fn new(format: &str, parser_type: DateParserType) -> Self { + let (earliest, latest) = Self::default_datetimes(&parser_type); + + Self { + format: parse_owned::<2>(format).ok(), + earliest, + latest, + count: 0, + parser_type, } } + fn default_datetimes(parser_type: &DateParserType) -> (DateTime, DateTime) { + match parser_type { + DateParserType::Date => ( + DateTime::new(Dt::MAX, Tm::MIDNIGHT), + DateTime::new(Dt::MIN, Tm::MIDNIGHT), + ), + DateParserType::Time => ( + DateTime::new(Dt::MIN, Tm::from_hms(23, 59, 59).unwrap()), + DateTime::new(Dt::MIN, Tm::MIDNIGHT), + ), + DateParserType::DateTime => ( + DateTime::new(Dt::MAX, Tm::MIDNIGHT), + DateTime::new(Dt::MIN, Tm::MIDNIGHT), + ), + } + } + + /// Inserts `new_date` into the aggregator, adjusting earliest/latest and recalculating rate. fn upsert(&mut self, new_date: DateTime) { self.earliest = min(new_date, self.earliest); self.latest = max(new_date, self.latest); self.count += 1; - let rate_data = self.determine_rate(); - self.rate = rate_data.0; - self.unit = rate_data.1; } - /// Determine the rate at which messages are received + /// Calculates the entry rate based on the span between earliest and latest timestamps. fn determine_rate(&self) -> (i64, String) { let difference = self.latest - self.earliest; let mut denominator = difference.whole_weeks(); @@ -157,7 +172,9 @@ mod use_tests { aggregator::Aggregator, date::{Date, DateParserType}, }; - use time::{Date as Dt, PrimitiveDateTime as DateTime, Time as Tm}; + use time::{ + Date as Dt, PrimitiveDateTime as DateTime, Time as Tm, format_description::parse_owned, + }; #[test] fn can_construct() { @@ -178,21 +195,20 @@ mod use_tests { d.update("01/04/2021").unwrap(); let expected = Date { - format: "[month]/[day]/[year]".to_string(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 4).unwrap(), Tm::MIDNIGHT), count: 4, - rate: 1, - unit: String::from("per day"), parser_type: DateParserType::Date, + format: parse_owned::<2>("[month]/[day]/[year]").ok(), }; assert_eq!(d.format, expected.format); assert_eq!(d.earliest, expected.earliest); assert_eq!(d.latest, expected.latest); assert_eq!(d.count, expected.count); - assert_eq!(d.unit, expected.unit); - assert_eq!(d.rate, expected.rate); + let (rate, unit) = d.determine_rate(); + assert_eq!(unit, "per day"); + assert_eq!(rate, 1); } #[test] @@ -204,21 +220,21 @@ mod use_tests { d.update("04:01:00").unwrap(); let expected = Date { - format: "[hour]:[minute]:[second]".to_string(), earliest: DateTime::new(Dt::MIN, Tm::from_hms(1, 1, 0).unwrap()), latest: DateTime::new(Dt::MIN, Tm::from_hms(4, 1, 0).unwrap()), count: 4, - rate: 1, - unit: String::from("per hour"), parser_type: DateParserType::Time, + format: parse_owned::<2>("[hour]:[minute]:[second]").ok(), }; assert_eq!(d.format, expected.format); assert_eq!(d.earliest, expected.earliest); assert_eq!(d.latest, expected.latest); assert_eq!(d.count, expected.count); - assert_eq!(d.unit, expected.unit); - assert_eq!(d.rate, expected.rate); + + let (rate, unit) = d.determine_rate(); + assert_eq!(unit, "per hour"); + assert_eq!(rate, 1); } #[test] @@ -234,7 +250,6 @@ mod use_tests { d.update("01/04/2021 04:01:00").unwrap(); let expected = Date { - format: "[month]/[day]/[year] [hour]:[minute]:[second]".to_string(), earliest: DateTime::new( Dt::from_ordinal_date(2021, 1).unwrap(), Tm::from_hms(1, 1, 0).unwrap(), @@ -244,17 +259,18 @@ mod use_tests { Tm::from_hms(4, 1, 0).unwrap(), ), count: 4, - rate: 1, - unit: String::from("per day"), parser_type: DateParserType::DateTime, + format: parse_owned::<2>("[month]/[day]/[year] [hour]:[minute]:[second]").ok(), }; assert_eq!(d.format, expected.format); assert_eq!(d.earliest, expected.earliest); assert_eq!(d.latest, expected.latest); assert_eq!(d.count, expected.count); - assert_eq!(d.unit, expected.unit); - assert_eq!(d.rate, expected.rate); + + let (rate, unit) = d.determine_rate(); + assert_eq!(unit, "per day"); + assert_eq!(rate, 1); } } @@ -334,13 +350,11 @@ mod rate_tests { #[test] fn weekly() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 15).unwrap(), Tm::MIDNIGHT), count: 10, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (5, "per week".to_string())); } @@ -348,13 +362,11 @@ mod rate_tests { #[test] fn daily() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 15).unwrap(), Tm::MIDNIGHT), count: 15, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (1, "per day".to_string())); } @@ -362,13 +374,11 @@ mod rate_tests { #[test] fn hourly() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 3).unwrap(), Tm::MIDNIGHT), count: 150, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (3, "per hour".to_string())); } @@ -376,13 +386,11 @@ mod rate_tests { #[test] fn minutely() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 2).unwrap(), Tm::MIDNIGHT), count: 1500, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (1, "per minute".to_string())); } @@ -390,13 +398,11 @@ mod rate_tests { #[test] fn secondly() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 2).unwrap(), Tm::MIDNIGHT), count: 100000, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (1, "per second".to_string())); } diff --git a/src/util/aggregators/mean.rs b/src/util/aggregators/mean.rs index b10195a..9a62f4d 100644 --- a/src/util/aggregators/mean.rs +++ b/src/util/aggregators/mean.rs @@ -1,16 +1,20 @@ use crate::util::{ - aggregators::aggregator::{Aggregator, extract_number}, + aggregators::aggregator::{Aggregator, extract_number, format_float}, error::LogriaError, }; -use format_num::format_num; +/// Aggregator that computes the running mean of numeric messages. pub struct Mean { + /// Number of numeric messages processed. count: f64, + /// Sum of parsed numeric message values. total: f64, } /// Float implementation of Mean impl Aggregator for Mean { + /// Parses `message`, updates the running count, and accumulates the total. + /// Saturates `count` and `total` at `f64::MAX` to prevent overflow. fn update(&mut self, message: &str) -> Result<(), LogriaError> { if self.count >= f64::MAX { self.count = f64::MAX; @@ -34,16 +38,24 @@ impl Aggregator for Mean { Ok(()) } + /// Returns formatted output: current mean (two decimals), count, and total. fn messages(&self, _: &usize) -> Vec { vec![ format!(" Mean: {:.2}", self.mean()), - format!(" Count: {}", format_num!(",d", self.count)), - format!(" Total: {}", format_num!(",d", self.total)), + format!(" Count: {}", format_float(self.count)), + format!(" Total: {}", format_float(self.total)), ] } + + /// Resets both `count` and `total` back to zero. + fn reset(&mut self) { + self.count = 0.; + self.total = 0.; + } } impl Mean { + /// Creates a new `Mean` aggregator with zero count and total. pub fn new() -> Mean { Mean { count: 0., @@ -51,10 +63,13 @@ impl Mean { } } + /// Attempts to parse a floating-point number from `message`. + /// Returns `None` if parsing fails. fn parse(&self, message: &str) -> Option { extract_number(message) } + /// Computes the current average; returns `total` if no values have been aggregated. fn mean(&self) -> f64 { if self.count == 0. { self.total diff --git a/src/util/aggregators/none.rs b/src/util/aggregators/none.rs index 1373bfb..bb9bc38 100644 --- a/src/util/aggregators/none.rs +++ b/src/util/aggregators/none.rs @@ -10,6 +10,8 @@ impl Aggregator for NoneAg { fn messages(&self, _: &usize) -> Vec { vec![" Disabled".to_owned()] } + + fn reset(&mut self) {} } impl NoneAg { diff --git a/src/util/aggregators/sum.rs b/src/util/aggregators/sum.rs index d9688f7..5245ea4 100644 --- a/src/util/aggregators/sum.rs +++ b/src/util/aggregators/sum.rs @@ -1,14 +1,17 @@ use crate::util::{ - aggregators::aggregator::{Aggregator, extract_number}, + aggregators::aggregator::{Aggregator, extract_number, format_float}, error::LogriaError, }; -use format_num::format_num; +/// Aggregator that accumulates numeric values from messages into a running total. pub struct Sum { + /// Running total of numeric messages. total: f64, } impl Aggregator for Sum { + /// Parses `message`, extracts a number if present, and adds it to the total. + /// Saturates at `f64::MAX` on overflow. fn update(&mut self, message: &str) -> Result<(), LogriaError> { if self.total >= f64::MAX { self.total = f64::MAX; @@ -18,16 +21,24 @@ impl Aggregator for Sum { Ok(()) } + /// Returns the current total formatted as a string message. fn messages(&self, _: &usize) -> Vec { - vec![format!(" Total: {}", format_num!(",d", self.total))] + vec![format!(" Total: {}", format_float(self.total))] + } + + /// Resets the running total back to zero. + fn reset(&mut self) { + self.total = 0.; } } impl Sum { + /// Creates a new `Sum` aggregator with an initial total of zero. pub fn new() -> Self { Sum { total: 0. } } + /// Attempts to parse a numeric value from `message`, returning `None` if parsing fails. fn parse(&self, message: &str) -> Option { extract_number(message) }