From ee07f46b3e59209c1415a057ad3db55d3b97b90c Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 09:26:29 -0700 Subject: [PATCH 01/16] Simplify iteration, update status window with progress for large jobs --- src/communication/handlers/highlight.rs | 35 +++++++++++++++-------- src/communication/handlers/parser.rs | 38 +++++++++++++++++-------- src/communication/handlers/regex.rs | 25 ++++++++++++---- 3 files changed, 69 insertions(+), 29 deletions(-) diff --git a/src/communication/handlers/highlight.rs b/src/communication/handlers/highlight.rs index bd34967..1a5c0ca 100644 --- a/src/communication/handlers/highlight.rs +++ b/src/communication/handlers/highlight.rs @@ -12,7 +12,7 @@ use crate::{ cli_chars::{COMMAND_CHAR, HIGHLIGHT_CHAR, NORMAL_STR, TOGGLE_HIGHLIGHT_CHAR}, patterns::ANSI_COLOR_PATTERN, }, - ui::scroll::{self, update_current_match_index, ScrollState}, + ui::scroll::{self, ScrollState, update_current_match_index}, }; pub struct HighlightHandler { @@ -73,21 +73,35 @@ impl HighlightHandler { impl ProcessorMethods for HighlightHandler { /// Process matches, loading the buffer of indexes to matched messages in the main buffer fn process_matches(&mut self, window: &mut MainWindow) -> Result<()> { - // TODO: Possibly async? Possibly loading indicator for large jobs? if self.current_pattern.is_some() { // Start from where we left off to the most recent message - let buf_range = (window.config.last_index_regexed, window.messages().len()); + let start = window.config.last_index_regexed; + let end = window.messages().len(); - // Iterate "forever", skipping to the start and taking up till end-start - // TODO: Something to indicate progress - for index in (0..).skip(buf_range.0).take(buf_range.1 - buf_range.0) { + for index in start..end { if self.test(&window.messages()[index]) { window.config.matched_rows.push(index); } + // Update the user interface with the current state + if end - start > 10_000 && (index % 99 == 0 || index == end - 1) { + let word = if index == end - 1 { + "Processed" + } else { + "Processing" + }; + window.write_to_command_line(&format!( + "{word} messages: {}/{} ({}%)", + (index + 1) - start, + end - start, + ((index + 1 - start) * 100) / (end - start) + ))?; + } + // Update the last spot so we know where to start next time window.config.last_index_regexed = index + 1; } + window.write_status()?; } Ok(()) } @@ -95,12 +109,7 @@ impl ProcessorMethods for HighlightHandler { /// Return the app to a normal input state fn return_to_normal(&mut self, window: &mut MainWindow) -> Result<()> { self.clear_matches(window)?; - // Handle reset of scroll state window.config.current_matched_row = 0; - if matches!(window.config.scroll_state, ScrollState::Centered) { - window.config.scroll_state = ScrollState::Free; - } - window.config.current_status = None; window.update_input_type(Normal)?; window.set_cli_cursor(None)?; @@ -116,7 +125,9 @@ impl ProcessorMethods for HighlightHandler { window.config.matched_rows.clear(); window.config.last_index_regexed = 0; window.config.highlight_match = false; - window.config.scroll_state = ScrollState::Free; + if matches!(window.config.scroll_state, ScrollState::Centered) { + window.config.scroll_state = ScrollState::Free; + } window.reset_command_line()?; Ok(()) } diff --git a/src/communication/handlers/parser.rs b/src/communication/handlers/parser.rs index 71e8116..9c9f3a8 100644 --- a/src/communication/handlers/parser.rs +++ b/src/communication/handlers/parser.rs @@ -222,18 +222,11 @@ impl ProcessorMethods for ParserHandler { // TODO: Possibly async? Possibly loading indicator for large jobs? if self.parser.is_some() { // Start from where we left off to the most recent message - let buf_range = ( - window.config.last_index_processed, - window.previous_messages().len(), - ); - - // Iterate "forever", skipping to the start and taking up till end-start - // TODO: Something to indicate progress - let last = buf_range.1.checked_sub(1).unwrap_or(buf_range.0); - for index in (0..) - .skip(buf_range.0) - .take(buf_range.1.checked_sub(buf_range.0).unwrap_or(buf_range.0)) - { + let start = window.config.last_index_processed; + let end = window.previous_messages().len(); + + let last = end.checked_sub(1).unwrap_or(end); + for index in start..end { if window.config.aggregation_enabled { match self.aggregate_handle( &window.previous_messages()[index], @@ -260,9 +253,25 @@ impl ProcessorMethods for ParserHandler { ) { window.config.auxiliary_messages.push(message); } + + // Update the user interface with the current state + if end - start > 10_000 && (index % 99 == 0 || index == end - 1) { + let word = if index == end - 1 { + "Processed" + } else { + "Processing" + }; + window.write_to_command_line(&format!( + "{word} messages: {}/{} ({}%)", + (index + 1) - start, + end - start, + ((index + 1 - start) * 100) / (end - start) + ))?; + } // Update the last spot so we know where to start next time window.config.last_index_processed = index + 1; } + window.write_status()?; } } Ok(()) @@ -394,6 +403,11 @@ impl Handler for ParserHandler { if window.config.aggregation_enabled { window.config.current_status = Some(self.status.clone()); window.config.aggregation_enabled = false; + if let Some(parser) = &mut self.parser { + parser.aggregator_map.values_mut().for_each(|agg| { + agg.reset(); + }); + } } else { let new_status = self.status.clone(); window.config.current_status = Some(new_status.replace( diff --git a/src/communication/handlers/regex.rs b/src/communication/handlers/regex.rs index fd5a207..ff63a57 100644 --- a/src/communication/handlers/regex.rs +++ b/src/communication/handlers/regex.rs @@ -64,21 +64,36 @@ impl RegexHandler { impl ProcessorMethods for RegexHandler { /// Process matches, loading the buffer of indexes to matched messages in the main buffer fn process_matches(&mut self, window: &mut MainWindow) -> Result<()> { - // TODO: Possibly async? Possibly loading indicator for large jobs? if self.current_pattern.is_some() { // Start from where we left off to the most recent message - let buf_range = (window.config.last_index_regexed, window.messages().len()); + // Start from where we left off to the most recent message + let start = window.config.last_index_regexed; + let end = window.messages().len(); - // Iterate "forever", skipping to the start and taking up till end-start - // TODO: Something to indicate progress - for index in (0..).skip(buf_range.0).take(buf_range.1 - buf_range.0) { + for index in start..end { if self.test(&window.messages()[index]) { window.config.matched_rows.push(index); } + // Update the user interface with the current state + if end - start > 10_000 && (index % 99 == 0 || index == end - 1) { + let word = if index == end - 1 { + "Processed" + } else { + "Processing" + }; + window.write_to_command_line(&format!( + "{word} messages: {}/{} ({}%)", + (index + 1) - start, + end - start, + ((index + 1 - start) * 100) / (end - start) + ))?; + } + // Update the last spot so we know where to start next time window.config.last_index_regexed = index + 1; } + window.write_status()?; } Ok(()) } From a9b6cd6a5751c089848e32e7172b43b7df4e6f57 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 17:41:40 -0700 Subject: [PATCH 02/16] Add `reset()` to trait --- src/util/aggregators/aggregator.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/util/aggregators/aggregator.rs b/src/util/aggregators/aggregator.rs index 4069daa..f6a4ad5 100644 --- a/src/util/aggregators/aggregator.rs +++ b/src/util/aggregators/aggregator.rs @@ -39,6 +39,8 @@ pub trait Aggregator { fn update(&mut self, message: &str) -> Result<(), LogriaError>; /// Expensive function that generates messages to render fn messages(&self, n: &usize) -> Vec; + /// Reset the aggregator, clearing all internal data + fn reset(&mut self); } #[derive(Eq, PartialEq, Serialize, Deserialize, Debug)] From 50cd6315085c73e9e59c04a5ad6573324d467b2d Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 17:41:59 -0700 Subject: [PATCH 03/16] Implement `reset()` for aggregators --- src/util/aggregators/mean.rs | 16 ++++++++++++++++ src/util/aggregators/none.rs | 2 ++ src/util/aggregators/sum.rs | 12 ++++++++++++ 3 files changed, 30 insertions(+) diff --git a/src/util/aggregators/mean.rs b/src/util/aggregators/mean.rs index b10195a..631d451 100644 --- a/src/util/aggregators/mean.rs +++ b/src/util/aggregators/mean.rs @@ -4,13 +4,18 @@ use crate::util::{ }; use format_num::format_num; +/// Aggregator that computes the running mean of numeric messages. pub struct Mean { + /// Number of numeric messages processed. count: f64, + /// Sum of parsed numeric message values. total: f64, } /// Float implementation of Mean impl Aggregator for Mean { + /// Parses `message`, updates the running count, and accumulates the total. + /// Saturates `count` and `total` at `f64::MAX` to prevent overflow. fn update(&mut self, message: &str) -> Result<(), LogriaError> { if self.count >= f64::MAX { self.count = f64::MAX; @@ -34,6 +39,7 @@ impl Aggregator for Mean { Ok(()) } + /// Returns formatted output: current mean (two decimals), count, and total. fn messages(&self, _: &usize) -> Vec { vec![ format!(" Mean: {:.2}", self.mean()), @@ -41,9 +47,16 @@ impl Aggregator for Mean { format!(" Total: {}", format_num!(",d", self.total)), ] } + + /// Resets both `count` and `total` back to zero. + fn reset(&mut self) { + self.count = 0.; + self.total = 0.; + } } impl Mean { + /// Creates a new `Mean` aggregator with zero count and total. pub fn new() -> Mean { Mean { count: 0., @@ -51,10 +64,13 @@ impl Mean { } } + /// Attempts to parse a floating-point number from `message`. + /// Returns `None` if parsing fails. fn parse(&self, message: &str) -> Option { extract_number(message) } + /// Computes the current average; returns `total` if no values have been aggregated. fn mean(&self) -> f64 { if self.count == 0. { self.total diff --git a/src/util/aggregators/none.rs b/src/util/aggregators/none.rs index 1373bfb..bb9bc38 100644 --- a/src/util/aggregators/none.rs +++ b/src/util/aggregators/none.rs @@ -10,6 +10,8 @@ impl Aggregator for NoneAg { fn messages(&self, _: &usize) -> Vec { vec![" Disabled".to_owned()] } + + fn reset(&mut self) {} } impl NoneAg { diff --git a/src/util/aggregators/sum.rs b/src/util/aggregators/sum.rs index d9688f7..d9b3a58 100644 --- a/src/util/aggregators/sum.rs +++ b/src/util/aggregators/sum.rs @@ -4,11 +4,15 @@ use crate::util::{ }; use format_num::format_num; +/// Aggregator that accumulates numeric values from messages into a running total. pub struct Sum { + /// Running total of numeric messages. total: f64, } impl Aggregator for Sum { + /// Parses `message`, extracts a number if present, and adds it to the total. + /// Saturates at `f64::MAX` on overflow. fn update(&mut self, message: &str) -> Result<(), LogriaError> { if self.total >= f64::MAX { self.total = f64::MAX; @@ -18,16 +22,24 @@ impl Aggregator for Sum { Ok(()) } + /// Returns the current total formatted as a string message. fn messages(&self, _: &usize) -> Vec { vec![format!(" Total: {}", format_num!(",d", self.total))] } + + /// Resets the running total back to zero. + fn reset(&mut self) { + self.total = 0.; + } } impl Sum { + /// Creates a new `Sum` aggregator with an initial total of zero. pub fn new() -> Self { Sum { total: 0. } } + /// Attempts to parse a numeric value from `message`, returning `None` if parsing fails. fn parse(&self, message: &str) -> Option { extract_number(message) } From d038942de23a76f3ff5f663e61bf44f874658ae7 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 17:42:10 -0700 Subject: [PATCH 04/16] Implement `reset()` for Date, refactor for performance --- src/util/aggregators/date.rs | 168 ++++++++++++++++++----------------- 1 file changed, 87 insertions(+), 81 deletions(-) diff --git a/src/util/aggregators/date.rs b/src/util/aggregators/date.rs index ca5d932..dfbc4ef 100644 --- a/src/util/aggregators/date.rs +++ b/src/util/aggregators/date.rs @@ -2,43 +2,53 @@ use std::cmp::{max, min}; use crate::util::{aggregators::aggregator::Aggregator, error::LogriaError}; use format_num::format_num; -use time::{Date as Dt, PrimitiveDateTime as DateTime, Time as Tm, format_description::parse}; +use time::{ + Date as Dt, PrimitiveDateTime as DateTime, Time as Tm, + format_description::{OwnedFormatItem, parse_owned}, +}; +#[derive(Clone, Debug)] pub enum DateParserType { Date, Time, DateTime, } +/// Aggregator for tracking temporal data: records earliest and latest timestamps, +/// counts entries, and computes rate over a chosen unit (day, hour, etc.). pub struct Date { - format: String, + /// Parsed format items for the date/time format. + format: Option, + /// The minimum timestamp observed. earliest: DateTime, + /// The maximum timestamp observed. latest: DateTime, + /// Number of parsed timestamps. count: i64, - rate: i64, - unit: String, + /// Specifies whether to parse as Date, Time, or DateTime. parser_type: DateParserType, } impl Aggregator for Date { + /// Parses and ingests a new timestamp from `message`, updating internal state. fn update(&mut self, message: &str) -> Result<(), LogriaError> { - match parse(&self.format) { - Ok(parser) => match self.parser_type { - DateParserType::Date => match Dt::parse(message, &parser) { + match &self.format { + Some(format) => match self.parser_type { + DateParserType::Date => match Dt::parse(message, format) { Ok(date) => { self.upsert(DateTime::new(date, Tm::MIDNIGHT)); Ok(()) } Err(why) => Err(LogriaError::CannotParseDate(why.to_string())), }, - DateParserType::Time => match Tm::parse(message, &parser) { + DateParserType::Time => match Tm::parse(message, format) { Ok(time) => { self.upsert(DateTime::new(Dt::MIN, time)); Ok(()) } Err(why) => Err(LogriaError::CannotParseDate(why.to_string())), }, - DateParserType::DateTime => match DateTime::parse(message, &parser) { + DateParserType::DateTime => match DateTime::parse(message, format) { Ok(date) => { self.upsert(date); Ok(()) @@ -46,17 +56,17 @@ impl Aggregator for Date { Err(why) => Err(LogriaError::CannotParseDate(why.to_string())), }, }, - Err(why) => Err(LogriaError::CannotParseDate(why.to_string())), + None => Err(LogriaError::CannotParseDate( + "No date format string specified!".to_string(), + )), } } + /// Returns vector of formatted output lines: rate, count, earliest, and latest. fn messages(&self, _: &usize) -> Vec { + let (rate, unit) = self.determine_rate(); let mut out_v = vec![ - format!( - " Rate: {} {}", - format_num!(",.0f", self.rate as u32), - self.unit - ), + format!(" Rate: {} {}", format_num!(",.0f", rate as u32), unit), format!(" Count: {}", format_num!(",d", self.count as u32)), ]; match self.parser_type { @@ -75,53 +85,58 @@ impl Aggregator for Date { } out_v } + + /// Resets the aggregator to its initial state, preserving `format` and `parser_type`. + fn reset(&mut self) { + self.count = 0; + self.earliest = match self.parser_type { + DateParserType::Date => DateTime::new(Dt::MAX, Tm::MIDNIGHT), + DateParserType::Time => DateTime::new(Dt::MIN, Tm::from_hms(23, 59, 59).unwrap()), + DateParserType::DateTime => DateTime::new(Dt::MAX, Tm::MIDNIGHT), + }; + self.latest = match self.parser_type { + DateParserType::Date => DateTime::new(Dt::MIN, Tm::MIDNIGHT), + DateParserType::Time => DateTime::new(Dt::MIN, Tm::MIDNIGHT), + DateParserType::DateTime => DateTime::new(Dt::MIN, Tm::MIDNIGHT), + }; + } } impl Date { - pub fn new(format: &str, format_type: DateParserType) -> Self { - match format_type { - // If we only care about the date, set the time to midnight - DateParserType::Date => Date { - format: format.to_owned(), - earliest: DateTime::new(Dt::MAX, Tm::MIDNIGHT), - latest: DateTime::new(Dt::MIN, Tm::MIDNIGHT), - count: 0, - rate: 0, - unit: String::new(), - parser_type: DateParserType::Date, - }, - // If we only care about the time, use the same date and the latest/earliest possible times - DateParserType::Time => Date { - format: format.to_owned(), - earliest: DateTime::new(Dt::MIN, Tm::from_hms(23, 59, 59).unwrap()), - latest: DateTime::new(Dt::MIN, Tm::MIDNIGHT), - count: 0, - rate: 0, - unit: String::new(), - parser_type: DateParserType::Time, - }, - DateParserType::DateTime => Date { - format: format.to_owned(), - earliest: DateTime::new(Dt::MAX, Tm::MIDNIGHT), - latest: DateTime::new(Dt::MIN, Tm::MIDNIGHT), - count: 0, - rate: 0, - unit: String::new(), - parser_type: DateParserType::DateTime, - }, + /// Constructs a new `Date` aggregator with the given `format` and `parser_type`. + pub fn new(format: &str, parser_type: DateParserType) -> Self { + let (earliest, latest) = match parser_type { + DateParserType::Date => ( + DateTime::new(Dt::MAX, Tm::MIDNIGHT), + DateTime::new(Dt::MIN, Tm::MIDNIGHT), + ), + DateParserType::Time => ( + DateTime::new(Dt::MIN, Tm::from_hms(23, 59, 59).unwrap()), + DateTime::new(Dt::MIN, Tm::MIDNIGHT), + ), + DateParserType::DateTime => ( + DateTime::new(Dt::MAX, Tm::MIDNIGHT), + DateTime::new(Dt::MIN, Tm::MIDNIGHT), + ), + }; + + Self { + format: parse_owned::<2>(format).ok(), + earliest, + latest, + count: 0, + parser_type, } } + /// Inserts `new_date` into the aggregator, adjusting earliest/latest and recalculating rate. fn upsert(&mut self, new_date: DateTime) { self.earliest = min(new_date, self.earliest); self.latest = max(new_date, self.latest); self.count += 1; - let rate_data = self.determine_rate(); - self.rate = rate_data.0; - self.unit = rate_data.1; } - /// Determine the rate at which messages are received + /// Calculates the entry rate based on the span between earliest and latest timestamps. fn determine_rate(&self) -> (i64, String) { let difference = self.latest - self.earliest; let mut denominator = difference.whole_weeks(); @@ -157,7 +172,9 @@ mod use_tests { aggregator::Aggregator, date::{Date, DateParserType}, }; - use time::{Date as Dt, PrimitiveDateTime as DateTime, Time as Tm}; + use time::{ + Date as Dt, PrimitiveDateTime as DateTime, Time as Tm, format_description::parse_owned, + }; #[test] fn can_construct() { @@ -178,21 +195,20 @@ mod use_tests { d.update("01/04/2021").unwrap(); let expected = Date { - format: "[month]/[day]/[year]".to_string(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 4).unwrap(), Tm::MIDNIGHT), count: 4, - rate: 1, - unit: String::from("per day"), parser_type: DateParserType::Date, + format: parse_owned::<2>("[month]/[day]/[year]").ok(), }; assert_eq!(d.format, expected.format); assert_eq!(d.earliest, expected.earliest); assert_eq!(d.latest, expected.latest); assert_eq!(d.count, expected.count); - assert_eq!(d.unit, expected.unit); - assert_eq!(d.rate, expected.rate); + let (rate, unit) = d.determine_rate(); + assert_eq!(unit, "per day"); + assert_eq!(rate, 1); } #[test] @@ -204,21 +220,21 @@ mod use_tests { d.update("04:01:00").unwrap(); let expected = Date { - format: "[hour]:[minute]:[second]".to_string(), earliest: DateTime::new(Dt::MIN, Tm::from_hms(1, 1, 0).unwrap()), latest: DateTime::new(Dt::MIN, Tm::from_hms(4, 1, 0).unwrap()), count: 4, - rate: 1, - unit: String::from("per hour"), parser_type: DateParserType::Time, + format: parse_owned::<2>("[hour]:[minute]:[second]").ok(), }; assert_eq!(d.format, expected.format); assert_eq!(d.earliest, expected.earliest); assert_eq!(d.latest, expected.latest); assert_eq!(d.count, expected.count); - assert_eq!(d.unit, expected.unit); - assert_eq!(d.rate, expected.rate); + + let (rate, unit) = d.determine_rate(); + assert_eq!(unit, "per hour"); + assert_eq!(rate, 1); } #[test] @@ -234,7 +250,6 @@ mod use_tests { d.update("01/04/2021 04:01:00").unwrap(); let expected = Date { - format: "[month]/[day]/[year] [hour]:[minute]:[second]".to_string(), earliest: DateTime::new( Dt::from_ordinal_date(2021, 1).unwrap(), Tm::from_hms(1, 1, 0).unwrap(), @@ -244,17 +259,18 @@ mod use_tests { Tm::from_hms(4, 1, 0).unwrap(), ), count: 4, - rate: 1, - unit: String::from("per day"), parser_type: DateParserType::DateTime, + format: parse_owned::<2>("[month]/[day]/[year] [hour]:[minute]:[second]").ok(), }; assert_eq!(d.format, expected.format); assert_eq!(d.earliest, expected.earliest); assert_eq!(d.latest, expected.latest); assert_eq!(d.count, expected.count); - assert_eq!(d.unit, expected.unit); - assert_eq!(d.rate, expected.rate); + + let (rate, unit) = d.determine_rate(); + assert_eq!(unit, "per day"); + assert_eq!(rate, 1); } } @@ -334,13 +350,11 @@ mod rate_tests { #[test] fn weekly() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 15).unwrap(), Tm::MIDNIGHT), count: 10, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (5, "per week".to_string())); } @@ -348,13 +362,11 @@ mod rate_tests { #[test] fn daily() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 15).unwrap(), Tm::MIDNIGHT), count: 15, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (1, "per day".to_string())); } @@ -362,13 +374,11 @@ mod rate_tests { #[test] fn hourly() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 3).unwrap(), Tm::MIDNIGHT), count: 150, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (3, "per hour".to_string())); } @@ -376,13 +386,11 @@ mod rate_tests { #[test] fn minutely() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 2).unwrap(), Tm::MIDNIGHT), count: 1500, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (1, "per minute".to_string())); } @@ -390,13 +398,11 @@ mod rate_tests { #[test] fn secondly() { let d = Date { - format: String::new(), earliest: DateTime::new(Dt::from_ordinal_date(2021, 1).unwrap(), Tm::MIDNIGHT), latest: DateTime::new(Dt::from_ordinal_date(2021, 2).unwrap(), Tm::MIDNIGHT), count: 100000, - rate: 0, - unit: String::new(), parser_type: DateParserType::Date, + format: None, }; assert_eq!(d.determine_rate(), (1, "per second".to_string())); } From 2588fa63c10a9f4e5afb73b97a898999cc06de1b Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 17:42:20 -0700 Subject: [PATCH 05/16] Implement `reset()` for Counter, refactor for performance --- src/util/aggregators/counter.rs | 280 ++++++++++++++------------------ 1 file changed, 125 insertions(+), 155 deletions(-) diff --git a/src/util/aggregators/counter.rs b/src/util/aggregators/counter.rs index dc27e4c..c2d75d5 100644 --- a/src/util/aggregators/counter.rs +++ b/src/util/aggregators/counter.rs @@ -1,160 +1,160 @@ -use std::collections::{BTreeSet, HashMap}; +use std::collections::{BTreeMap, HashMap}; + +use format_num::format_num; use crate::{ constants::cli::colors::RESET_COLOR, util::{aggregators::aggregator::Aggregator, error::LogriaError}, }; -use format_num::format_num; -/// Counter struct inspired by Python's stdlib Counter class +/// A counter for tracking occurrences of messages, similar to Python's `Counter`. pub struct Counter { - state: HashMap, - order: HashMap>, - num_to_get: Option, + /// Map of message strings to their occurrence counts. + counts: HashMap, + /// Total number of messages processed. + total_count: u64, + /// Optional limit on the number of top messages to display. + frozen_n: Option, } impl Aggregator for Counter { + /// Implements [`Aggregator::update`]: increments the count for the given message. fn update(&mut self, message: &str) -> Result<(), LogriaError> { self.increment(message); Ok(()) } + /// Implements [`Aggregator::messages`]: returns the top `n` messages. fn messages(&self, n: &usize) -> Vec { - // Place to store the result - let num = &self.num_to_get.unwrap_or(*n); - let mut result = Vec::with_capacity(*num); - if *num == 0_usize { - return result; - } - - // Keep track of how many items we have added - let mut total_added = 0; - - // Get the keys sorted from highest to lowest - let mut counts: Vec = self - .order - .keys() - .map(std::borrow::ToOwned::to_owned) - .collect(); - counts.sort_unstable(); + self.get_top_messages(*n) + } - // Get the value under each key - for count in counts.iter().rev() { - let items = self.order.get(count).unwrap(); - for item in items { - let total = self.total() as f64; - result.push(format!( - " {}{}: {} ({:.0}%)", - item.trim(), - RESET_COLOR, - format_num!(",d", *count as f64), - (*count as f64 / total) * 100_f64 - )); - total_added += 1; - if total_added == *num { - return result; - } - } - } - result + /// Implements [`Aggregator::reset`]: clears all message counts. + fn reset(&mut self) { + self.counts.clear(); + self.total_count = 0; } } impl Counter { - pub fn new(num_to_get: Option) -> Counter { + /// Creates a new `Counter` with no messages counted and no limits. + pub fn new() -> Counter { + Counter { + counts: HashMap::new(), + total_count: 0, + frozen_n: None, + } + } + + /// Creates a new `Counter` configured to return only the top message. + pub fn mean() -> Counter { Counter { - state: HashMap::new(), - order: HashMap::new(), - num_to_get, + counts: HashMap::new(), + total_count: 0, + frozen_n: Some(1), } } - /// Determine the total number of items in the Counter - fn total(&self) -> u64 { - self.state.values().sum() + /// Increments the count for `item`, adding it if not already present. + pub fn increment(&mut self, item: &str) { + let count = self.counts.entry(item.to_string()).or_insert(0); + *count += 1; + self.total_count += 1; } - /// Remove an item from the internal order - fn purge_from_order(&mut self, item: &str, count: &u64) { - if let Some(order) = self.order.get_mut(count) { - // If there was data there, remove the existing item - if !order.is_empty() { - order.remove(item); - if order.is_empty() { - self.order.remove(count); - } + /// Decrements the count for `item`, removing it if its count reaches zero. + pub fn decrement(&mut self, item: &str) { + if let Some(count) = self.counts.get_mut(item) { + if *count > 1 { + *count -= 1; + self.total_count -= 1; + } else { + self.counts.remove(item); + self.total_count -= 1; } } } - /// Remove an item from the internal state - fn purge_from_state(&mut self, item: &str) { - self.state.remove(item); + /// Removes `item` entirely from the counter, subtracting its count from the total. + pub fn delete(&mut self, item: &str) { + if let Some(count) = self.counts.remove(item) { + self.total_count -= count; + } } - /// Update the internal item order `HashMap` - fn update_order(&mut self, item: &str, old_count: &u64, new_count: &u64) { - self.purge_from_order(item, old_count); - if let Some(v) = self.order.get_mut(new_count) { - v.insert(item.to_owned()); - } else { - let mut set = BTreeSet::new(); - set.insert(item.to_owned()); - self.order.insert(*new_count, set); + /// Retrieves the top `n` messages, respecting `frozen_n` if set. + fn get_top_messages(&self, n: usize) -> Vec { + let actual_num = self.frozen_n.unwrap_or(n).min(self.counts.len()); + + if actual_num == 0 || self.total_count == 0 { + return Vec::new(); } - } - /// Increment an item into the counter, creating if it does not exist - fn increment(&mut self, item: &str) { - let old_count = self.state.get(item).unwrap_or(&0).to_owned(); - let new_count = old_count.checked_add(1).unwrap_or(old_count); - self.state.insert(item.to_owned(), new_count); - self.update_order(item, &old_count, &new_count); + self.compute_top_messages(actual_num) } - /// Reduce an item from the counter, removing if it becomes 0 - fn decrement(&mut self, item: &str) { - let old_count = self.state.get(item).unwrap_or(&0).to_owned(); - let new_count = old_count.checked_sub(1); - match new_count { - Some(count) => { - if count > 0 { - self.state.insert(item.to_owned(), count); - self.update_order(item, &old_count, &count); - } else { - self.delete(item); + /// Computes the top `n` messages sorted by count (descending) and message text. + fn compute_top_messages(&self, n: usize) -> Vec { + // Use BTreeMap to maintain sorted order by count (descending) + let mut sorted_counts: BTreeMap, Vec<&str>> = BTreeMap::new(); + + for (item, &count) in &self.counts { + sorted_counts + .entry(std::cmp::Reverse(count)) + .or_default() + .push(item.as_str()); + } + + // Sort items within each count group for consistent ordering + for items in sorted_counts.values_mut() { + items.sort_unstable(); + } + + let mut result = Vec::with_capacity(n); + let total_f64 = self.total_count as f64; + let mut added = 0; + + for (std::cmp::Reverse(count), items) in sorted_counts { + for item in items { + if added >= n { + break; } + + let percentage = (count as f64 / total_f64) * 100.0; + result.push(format!( + " {}{}: {} ({:.0}%)", + item.trim(), + RESET_COLOR, + format_num!(",d", count as f64), + percentage + )); + added += 1; } - None => { - self.delete(item); + if added >= n { + break; } } - } - /// Remove an item from the counter completely - fn delete(&mut self, item: &str) { - let count = self.state.get(item).unwrap().to_owned(); - self.purge_from_order(item, &count); - self.purge_from_state(item); + result } } #[cfg(test)] mod behavior_tests { use crate::util::aggregators::{aggregator::Aggregator, counter::Counter}; - use std::collections::{BTreeSet, HashMap}; + use std::collections::HashMap; static A: &str = "a"; static B: &str = "b"; #[test] fn can_construct_counter() { - Counter::new(None); + Counter::new(); } #[test] fn can_count_int() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.increment("1"); c.increment("1"); c.increment("1"); @@ -165,21 +165,14 @@ mod behavior_tests { expected_count.insert("1".to_string(), 3); expected_count.insert("2".to_string(), 2); - let mut expected_order: HashMap> = HashMap::new(); - let mut a = BTreeSet::new(); - let mut b = BTreeSet::new(); - a.insert("1".to_string()); - b.insert("2".to_string()); - expected_order.insert(3, a); - expected_order.insert(2, b); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get("1"), Some(&3)); + assert_eq!(c.counts.get("2"), Some(&2)); + assert_eq!(c.total_count, 5); } #[test] fn can_count() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -190,21 +183,14 @@ mod behavior_tests { expected_count.insert(A.to_owned(), 3); expected_count.insert(B.to_owned(), 2); - let mut expected_order: HashMap> = HashMap::new(); - let mut a = BTreeSet::new(); - let mut b = BTreeSet::new(); - a.insert(A.to_owned()); - b.insert(B.to_owned()); - expected_order.insert(3, a); - expected_order.insert(2, b); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get(A), Some(&3)); + assert_eq!(c.counts.get(B), Some(&2)); + assert_eq!(c.total_count, 5); } #[test] fn can_sum() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.update(A).unwrap(); c.update(A).unwrap(); c.update(A).unwrap(); @@ -215,12 +201,12 @@ mod behavior_tests { expected.insert(A.to_owned(), 3); expected.insert(B.to_owned(), 2); - assert_eq!(c.total(), 5); + assert_eq!(c.total_count, 5); } #[test] fn can_decrement() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -232,19 +218,14 @@ mod behavior_tests { expected_count.insert(A.to_owned(), 2); expected_count.insert(B.to_owned(), 2); - let mut expected_order: HashMap> = HashMap::new(); - let mut a = BTreeSet::new(); - a.insert(A.to_owned()); - a.insert(B.to_owned()); - expected_order.insert(2, a); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get(A), Some(&2)); + assert_eq!(c.counts.get(B), Some(&2)); + assert_eq!(c.total_count, 4); } #[test] fn can_decrement_auto_remove() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(B); c.increment(B); @@ -253,18 +234,14 @@ mod behavior_tests { let mut expected_count = HashMap::new(); expected_count.insert(B.to_owned(), 2); - let mut expected_order: HashMap> = HashMap::new(); - let mut b = BTreeSet::new(); - b.insert(B.to_owned()); - expected_order.insert(2, b); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get(B), Some(&2)); + assert_eq!(c.counts.get(A), None); + assert_eq!(c.total_count, 2); } #[test] fn can_delete() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -272,16 +249,9 @@ mod behavior_tests { c.increment(B); c.delete(A); - let mut expected_count = HashMap::new(); - expected_count.insert(B.to_owned(), 2); - - let mut expected_order: HashMap> = HashMap::new(); - let mut b = BTreeSet::new(); - b.insert(B.to_owned()); - expected_order.insert(2, b); - - assert_eq!(c.state, expected_count); - assert_eq!(c.order, expected_order); + assert_eq!(c.counts.get(B), Some(&2)); + assert_eq!(c.counts.get(A), None); + assert_eq!(c.total_count, 2); } } @@ -296,7 +266,7 @@ mod message_tests { #[test] fn can_get_top_0() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -314,7 +284,7 @@ mod message_tests { #[test] fn can_get_top_1() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -332,7 +302,7 @@ mod message_tests { #[test] fn can_get_top_2() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -353,7 +323,7 @@ mod message_tests { #[test] fn can_get_top_3() { - let mut c: Counter = Counter::new(None); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); @@ -375,7 +345,7 @@ mod message_tests { #[test] fn can_get_top_4() { - let mut c: Counter = Counter::new(Some(5)); + let mut c: Counter = Counter::new(); c.increment(A); c.increment(A); c.increment(A); From 9ef8724211d62ed3ef090b53fc38d0ccc2d20a73 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 17:42:33 -0700 Subject: [PATCH 06/16] Add consts, update constructors --- src/extensions/parser.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/extensions/parser.rs b/src/extensions/parser.rs index 05f3482..51cf7cd 100644 --- a/src/extensions/parser.rs +++ b/src/extensions/parser.rs @@ -26,6 +26,11 @@ use crate::{ }, }; +/// The step size for progress indicator updates. +pub const STEP: usize = 999; +/// Threshold for when to print progress updates in the aggregator. +pub const THRESHOLD: usize = 25_000; + #[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug)] pub enum PatternType { Split, @@ -163,7 +168,7 @@ impl Parser { } AggregationMethod::Mode => { self.aggregator_map - .insert(method_name.to_string(), Box::new(Counter::new(Some(1)))); + .insert(method_name.to_string(), Box::new(Counter::mean())); } AggregationMethod::Sum => { self.aggregator_map @@ -171,7 +176,7 @@ impl Parser { } AggregationMethod::Count => { self.aggregator_map - .insert(method_name.to_string(), Box::new(Counter::new(None))); + .insert(method_name.to_string(), Box::new(Counter::new())); } AggregationMethod::Date(format) => { self.aggregator_map.insert( From 24fcaf11ffba091e2e032439faf0feca21954ef1 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 17:42:46 -0700 Subject: [PATCH 07/16] Use consts --- src/communication/handlers/highlight.rs | 3 ++- src/communication/handlers/parser.rs | 5 ++--- src/communication/handlers/regex.rs | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/communication/handlers/highlight.rs b/src/communication/handlers/highlight.rs index 1a5c0ca..6646e12 100644 --- a/src/communication/handlers/highlight.rs +++ b/src/communication/handlers/highlight.rs @@ -12,6 +12,7 @@ use crate::{ cli_chars::{COMMAND_CHAR, HIGHLIGHT_CHAR, NORMAL_STR, TOGGLE_HIGHLIGHT_CHAR}, patterns::ANSI_COLOR_PATTERN, }, + extensions::parser::{STEP, THRESHOLD}, ui::scroll::{self, ScrollState, update_current_match_index}, }; @@ -84,7 +85,7 @@ impl ProcessorMethods for HighlightHandler { } // Update the user interface with the current state - if end - start > 10_000 && (index % 99 == 0 || index == end - 1) { + if end - start > THRESHOLD && (index % STEP == 0 || index == end - 1) { let word = if index == end - 1 { "Processed" } else { diff --git a/src/communication/handlers/parser.rs b/src/communication/handlers/parser.rs index 9c9f3a8..6ece331 100644 --- a/src/communication/handlers/parser.rs +++ b/src/communication/handlers/parser.rs @@ -14,7 +14,7 @@ use crate::{ constants::cli::cli_chars::{AGGREGATION_CHAR, COMMAND_CHAR, PARSER_CHAR}, extensions::{ extension::ExtensionMethods, - parser::{Parser, PatternType}, + parser::{Parser, PatternType, STEP, THRESHOLD}, }, ui::scroll, util::error::LogriaError, @@ -219,7 +219,6 @@ impl ProcessorMethods for ParserHandler { fn process_matches(&mut self, window: &mut MainWindow) -> Result<()> { // Only process if the parser is set up properly if let ParserState::Full = window.config.parser_state { - // TODO: Possibly async? Possibly loading indicator for large jobs? if self.parser.is_some() { // Start from where we left off to the most recent message let start = window.config.last_index_processed; @@ -255,7 +254,7 @@ impl ProcessorMethods for ParserHandler { } // Update the user interface with the current state - if end - start > 10_000 && (index % 99 == 0 || index == end - 1) { + if end - start > THRESHOLD && (index % STEP == 0 || index == end - 1) { let word = if index == end - 1 { "Processed" } else { diff --git a/src/communication/handlers/regex.rs b/src/communication/handlers/regex.rs index ff63a57..9828094 100644 --- a/src/communication/handlers/regex.rs +++ b/src/communication/handlers/regex.rs @@ -12,6 +12,7 @@ use crate::{ cli_chars::{COMMAND_CHAR, NORMAL_STR, REGEX_CHAR, TOGGLE_HIGHLIGHT_CHAR}, patterns::ANSI_COLOR_PATTERN, }, + extensions::parser::{STEP, THRESHOLD}, ui::scroll, }; @@ -76,7 +77,7 @@ impl ProcessorMethods for RegexHandler { } // Update the user interface with the current state - if end - start > 10_000 && (index % 99 == 0 || index == end - 1) { + if end - start > THRESHOLD && (index % STEP == 0 || index == end - 1) { let word = if index == end - 1 { "Processed" } else { From 2b0397965f8e99a4baaea7be7517242cadfc3c9c Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 18:07:46 -0700 Subject: [PATCH 08/16] Move constants, add progress function --- src/communication/handlers/processor.rs | 30 +++++++++++++++++++++++++ src/extensions/parser.rs | 5 ----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/communication/handlers/processor.rs b/src/communication/handlers/processor.rs index b999443..f638ac6 100644 --- a/src/communication/handlers/processor.rs +++ b/src/communication/handlers/processor.rs @@ -7,3 +7,33 @@ pub trait ProcessorMethods { fn clear_matches(&mut self, window: &mut MainWindow) -> Result<()>; fn process_matches(&mut self, window: &mut MainWindow) -> Result<()>; } + +/// The step size for progress indicator updates. +const STEP: usize = 999; +/// Threshold for when to print progress updates in the aggregator. +const THRESHOLD: usize = 25_000; + +#[inline(always)] +pub fn update_progress( + window: &mut MainWindow, + start: usize, + end: usize, + index: usize, +) -> Result { + // Update the user interface with the current state + if end - start > THRESHOLD && (index % STEP == 0 || index == end - 1) { + let word = if index == end - 1 { + "Processed" + } else { + "Processing" + }; + window.write_to_command_line(&format!( + "{word} messages: {}/{} ({}%)", + (index + 1) - start, + end - start, + ((index + 1 - start) * 100) / (end - start) + ))?; + return Ok(true); + } + Ok(false) +} diff --git a/src/extensions/parser.rs b/src/extensions/parser.rs index 51cf7cd..7913366 100644 --- a/src/extensions/parser.rs +++ b/src/extensions/parser.rs @@ -26,11 +26,6 @@ use crate::{ }, }; -/// The step size for progress indicator updates. -pub const STEP: usize = 999; -/// Threshold for when to print progress updates in the aggregator. -pub const THRESHOLD: usize = 25_000; - #[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug)] pub enum PatternType { Split, From c21ac53f0ac4e277f1dd5528a193f1521188f496 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 18:08:09 -0700 Subject: [PATCH 09/16] Use `update_progress()`, cache render --- src/communication/handlers/highlight.rs | 24 ++++++++--------------- src/communication/handlers/parser.rs | 26 ++++++++++--------------- src/communication/handlers/regex.rs | 24 ++++++++--------------- 3 files changed, 26 insertions(+), 48 deletions(-) diff --git a/src/communication/handlers/highlight.rs b/src/communication/handlers/highlight.rs index 6646e12..9a4100d 100644 --- a/src/communication/handlers/highlight.rs +++ b/src/communication/handlers/highlight.rs @@ -6,13 +6,14 @@ use regex::bytes::Regex; use super::{handler::Handler, processor::ProcessorMethods}; use crate::{ communication::{ - handlers::user_input::UserInputHandler, input::InputType::Normal, reader::MainWindow, + handlers::{processor::update_progress, user_input::UserInputHandler}, + input::InputType::Normal, + reader::MainWindow, }, constants::cli::{ cli_chars::{COMMAND_CHAR, HIGHLIGHT_CHAR, NORMAL_STR, TOGGLE_HIGHLIGHT_CHAR}, patterns::ANSI_COLOR_PATTERN, }, - extensions::parser::{STEP, THRESHOLD}, ui::scroll::{self, ScrollState, update_current_match_index}, }; @@ -74,6 +75,7 @@ impl HighlightHandler { impl ProcessorMethods for HighlightHandler { /// Process matches, loading the buffer of indexes to matched messages in the main buffer fn process_matches(&mut self, window: &mut MainWindow) -> Result<()> { + let mut wrote_progress = false; if self.current_pattern.is_some() { // Start from where we left off to the most recent message let start = window.config.last_index_regexed; @@ -85,24 +87,14 @@ impl ProcessorMethods for HighlightHandler { } // Update the user interface with the current state - if end - start > THRESHOLD && (index % STEP == 0 || index == end - 1) { - let word = if index == end - 1 { - "Processed" - } else { - "Processing" - }; - window.write_to_command_line(&format!( - "{word} messages: {}/{} ({}%)", - (index + 1) - start, - end - start, - ((index + 1 - start) * 100) / (end - start) - ))?; - } + wrote_progress = update_progress(window, start, end, index)?; // Update the last spot so we know where to start next time window.config.last_index_regexed = index + 1; } - window.write_status()?; + if wrote_progress { + window.write_status()?; + } } Ok(()) } diff --git a/src/communication/handlers/parser.rs b/src/communication/handlers/parser.rs index 6ece331..937fb59 100644 --- a/src/communication/handlers/parser.rs +++ b/src/communication/handlers/parser.rs @@ -6,7 +6,9 @@ use regex::Regex; use crate::{ communication::{ handlers::{ - handler::Handler, multiple_choice::MultipleChoiceHandler, processor::ProcessorMethods, + handler::Handler, + multiple_choice::MultipleChoiceHandler, + processor::{ProcessorMethods, update_progress}, }, input::{InputType::Normal, StreamType}, reader::MainWindow, @@ -14,7 +16,7 @@ use crate::{ constants::cli::cli_chars::{AGGREGATION_CHAR, COMMAND_CHAR, PARSER_CHAR}, extensions::{ extension::ExtensionMethods, - parser::{Parser, PatternType, STEP, THRESHOLD}, + parser::{Parser, PatternType}, }, ui::scroll, util::error::LogriaError, @@ -220,6 +222,7 @@ impl ProcessorMethods for ParserHandler { // Only process if the parser is set up properly if let ParserState::Full = window.config.parser_state { if self.parser.is_some() { + let mut wrote_progress = false; // Start from where we left off to the most recent message let start = window.config.last_index_processed; let end = window.previous_messages().len(); @@ -254,23 +257,14 @@ impl ProcessorMethods for ParserHandler { } // Update the user interface with the current state - if end - start > THRESHOLD && (index % STEP == 0 || index == end - 1) { - let word = if index == end - 1 { - "Processed" - } else { - "Processing" - }; - window.write_to_command_line(&format!( - "{word} messages: {}/{} ({}%)", - (index + 1) - start, - end - start, - ((index + 1 - start) * 100) / (end - start) - ))?; - } + wrote_progress = update_progress(window, start, end, index)?; + // Update the last spot so we know where to start next time window.config.last_index_processed = index + 1; } - window.write_status()?; + if wrote_progress { + window.write_status()?; + } } } Ok(()) diff --git a/src/communication/handlers/regex.rs b/src/communication/handlers/regex.rs index 9828094..b9ac7e0 100644 --- a/src/communication/handlers/regex.rs +++ b/src/communication/handlers/regex.rs @@ -6,13 +6,14 @@ use regex::bytes::Regex; use super::{handler::Handler, processor::ProcessorMethods}; use crate::{ communication::{ - handlers::user_input::UserInputHandler, input::InputType::Normal, reader::MainWindow, + handlers::{processor::update_progress, user_input::UserInputHandler}, + input::InputType::Normal, + reader::MainWindow, }, constants::cli::{ cli_chars::{COMMAND_CHAR, NORMAL_STR, REGEX_CHAR, TOGGLE_HIGHLIGHT_CHAR}, patterns::ANSI_COLOR_PATTERN, }, - extensions::parser::{STEP, THRESHOLD}, ui::scroll, }; @@ -65,6 +66,7 @@ impl RegexHandler { impl ProcessorMethods for RegexHandler { /// Process matches, loading the buffer of indexes to matched messages in the main buffer fn process_matches(&mut self, window: &mut MainWindow) -> Result<()> { + let mut wrote_progress = false; if self.current_pattern.is_some() { // Start from where we left off to the most recent message // Start from where we left off to the most recent message @@ -77,24 +79,14 @@ impl ProcessorMethods for RegexHandler { } // Update the user interface with the current state - if end - start > THRESHOLD && (index % STEP == 0 || index == end - 1) { - let word = if index == end - 1 { - "Processed" - } else { - "Processing" - }; - window.write_to_command_line(&format!( - "{word} messages: {}/{} ({}%)", - (index + 1) - start, - end - start, - ((index + 1 - start) * 100) / (end - start) - ))?; - } + wrote_progress = update_progress(window, start, end, index)?; // Update the last spot so we know where to start next time window.config.last_index_regexed = index + 1; } - window.write_status()?; + if wrote_progress { + window.write_status()?; + } } Ok(()) } From 10d6908b83b069d982a73b58181d50fcd8cd228c Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 19:17:37 -0700 Subject: [PATCH 10/16] Use min-heap for counter render --- src/communication/handlers/parser.rs | 12 +++--- src/util/aggregators/counter.rs | 59 ++++++++++++---------------- 2 files changed, 32 insertions(+), 39 deletions(-) diff --git a/src/communication/handlers/parser.rs b/src/communication/handlers/parser.rs index 937fb59..58d35e8 100644 --- a/src/communication/handlers/parser.rs +++ b/src/communication/handlers/parser.rs @@ -538,13 +538,13 @@ mod parse_tests { "Sum", " Total: 5,850", "Count", - " 10\u{1b}[0m: 1 (1%)", - " 100\u{1b}[0m: 1 (1%)", - " 101\u{1b}[0m: 1 (1%)", - " 102\u{1b}[0m: 1 (1%)", - " 103\u{1b}[0m: 1 (1%)", + " 95\u{1b}[0m: 1 (1%)", + " 96\u{1b}[0m: 1 (1%)", + " 97\u{1b}[0m: 1 (1%)", + " 98\u{1b}[0m: 1 (1%)", + " 99\u{1b}[0m: 1 (1%)", "Mode", - " 10\u{1b}[0m: 1 (1%)", + " 99\u{1b}[0m: 1 (1%)", ] ); } diff --git a/src/util/aggregators/counter.rs b/src/util/aggregators/counter.rs index c2d75d5..d0340a9 100644 --- a/src/util/aggregators/counter.rs +++ b/src/util/aggregators/counter.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashMap}; +use std::{cmp::Reverse, collections::HashMap}; use format_num::format_num; @@ -95,47 +95,39 @@ impl Counter { /// Computes the top `n` messages sorted by count (descending) and message text. fn compute_top_messages(&self, n: usize) -> Vec { - // Use BTreeMap to maintain sorted order by count (descending) - let mut sorted_counts: BTreeMap, Vec<&str>> = BTreeMap::new(); + // A min-heap that only ever holds the top n entries. + let total = self.total_count as f64; + let mut heap = std::collections::BinaryHeap::with_capacity(n + 1); for (item, &count) in &self.counts { - sorted_counts - .entry(std::cmp::Reverse(count)) - .or_default() - .push(item.as_str()); - } - - // Sort items within each count group for consistent ordering - for items in sorted_counts.values_mut() { - items.sort_unstable(); + // Reverse so that smallest count is at the top—and will get popped when > n + heap.push(Reverse((count, item.as_str()))); + if heap.len() > n { + heap.pop(); + } } - let mut result = Vec::with_capacity(n); - let total_f64 = self.total_count as f64; - let mut added = 0; + // Drain heap into a Vec, sort descending by count then key + let mut top: Vec<(u64, &str)> = heap + .into_iter() + .map(|Reverse((count, item))| (count, item)) + .collect(); - for (std::cmp::Reverse(count), items) in sorted_counts { - for item in items { - if added >= n { - break; - } + top.sort_unstable_by(|(ca, a), (cb, b)| cb.cmp(ca).then_with(|| a.cmp(b))); - let percentage = (count as f64 / total_f64) * 100.0; - result.push(format!( + // Format output + top.into_iter() + .map(|(count, item)| { + let pct = (count as f64 / total) * 100.0; + format!( " {}{}: {} ({:.0}%)", item.trim(), RESET_COLOR, format_num!(",d", count as f64), - percentage - )); - added += 1; - } - if added >= n { - break; - } - } - - result + pct + ) + }) + .collect() } } @@ -288,6 +280,7 @@ mod message_tests { c.increment(A); c.increment(A); c.increment(A); + c.increment(A); c.increment(B); c.increment(B); c.increment(B); @@ -295,7 +288,7 @@ mod message_tests { c.increment(C); c.increment(D); - let expected = vec![String::from(" a\u{1b}[0m: 3 (33%)")]; + let expected = vec![String::from(" a\u{1b}[0m: 4 (40%)")]; assert_eq!(c.messages(&1), expected); } From 01f538b849f512479d66a9274af1a852eec336f0 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 19:37:01 -0700 Subject: [PATCH 11/16] Add `format_float()` and `format_int()` to replace `format_num` crate --- Cargo.lock | 10 --- Cargo.toml | 1 - src/util/aggregators/aggregator.rs | 97 ++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 64b1817..ee4705e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -218,15 +218,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "format_num" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14ac05eb8d2eb4ed1eeff847911deae077b0b53332465de9d6a26b0ea9961bc8" -dependencies = [ - "regex", -] - [[package]] name = "getrandom" version = "0.2.16" @@ -310,7 +301,6 @@ dependencies = [ "clap", "crossterm", "dirs", - "format_num", "is_executable", "regex", "serde", diff --git a/Cargo.toml b/Cargo.toml index baf0d75..3d5d55c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ version = "0.0.0" clap = { version = "=4.5.39", features = ["cargo"] } crossterm = "=0.29.0" dirs = "=6.0.0" -format_num = "=0.1.0" is_executable = "=1.0.4" regex = "=1.11.1" serde = { version = "=1.0.219", features = ["derive"] } diff --git a/src/util/aggregators/aggregator.rs b/src/util/aggregators/aggregator.rs index f6a4ad5..7dec059 100644 --- a/src/util/aggregators/aggregator.rs +++ b/src/util/aggregators/aggregator.rs @@ -34,6 +34,47 @@ pub fn extract_number(message: &str) -> Option { result.parse::().ok() } +/// Formats a float as a string with commas separating thousands +pub fn format_float(n: f64) -> String { + let int_part = n.trunc() as i64; + let abs_str = int_part.abs().to_string(); + let mut result = String::new(); + + let chars: Vec = abs_str.chars().collect(); + let len = chars.len(); + + for (i, c) in chars.iter().enumerate() { + if i != 0 && (len - i) % 3 == 0 { + result.push(','); + } + result.push(*c); + } + + if int_part < 0 { + format!("-{}", result) + } else { + result + } +} + +/// Formats a float as a string with commas separating thousands +pub fn format_int(n: usize) -> String { + let digits = n.to_string(); + let mut result = String::new(); + + let chars: Vec = digits.chars().collect(); + let len = chars.len(); + + for (i, c) in chars.iter().enumerate() { + if i != 0 && (len - i) % 3 == 0 { + result.push(','); + } + result.push(*c); + } + + result +} + pub trait Aggregator { /// Insert an item into the aggregator, updating it's internal tracking data fn update(&mut self, message: &str) -> Result<(), LogriaError>; @@ -156,3 +197,59 @@ mod extract_tests { assert!(result.unwrap() - 1337. == 0.); } } + +#[cfg(test)] +mod format_float_tests { + use crate::util::aggregators::aggregator::format_float; + + #[test] + fn test_basic_positive() { + assert_eq!(format_float(1234.56), "1,234"); + assert_eq!(format_float(1000000.0), "1,000,000"); + assert_eq!(format_float(0.0), "0"); + } + + #[test] + fn test_basic_negative() { + assert_eq!(format_float(-1234.56), "-1,234"); + assert_eq!(format_float(-1000000.99), "-1,000,000"); + } + + #[test] + fn test_truncation() { + assert_eq!(format_float(999.999), "999"); + assert_eq!(format_float(-999.999), "-999"); + } + + #[test] + fn test_small_numbers() { + assert_eq!(format_float(9.99), "9"); + assert_eq!(format_float(-9.99), "-9"); + assert_eq!(format_float(0.99), "0"); + } + + #[test] + fn test_large_number() { + assert_eq!(format_float(1234567890.123), "1,234,567,890"); + } +} + +#[cfg(test)] +mod tests { + use crate::util::aggregators::aggregator::format_int; + + #[test] + fn test_format_usize_basic() { + assert_eq!(format_int(0), "0"); + assert_eq!(format_int(12), "12"); + assert_eq!(format_int(123), "123"); + } + + #[test] + fn test_format_usize_commas() { + assert_eq!(format_int(1234), "1,234"); + assert_eq!(format_int(12345), "12,345"); + assert_eq!(format_int(1234567), "1,234,567"); + assert_eq!(format_int(1234567890), "1,234,567,890"); + } +} From 9e55c92a267004d2beb801c93e13bf71cdca5920 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 19:44:25 -0700 Subject: [PATCH 12/16] Use new format functions --- Cargo.lock | 16 ++++++++-------- src/util/aggregators/counter.rs | 9 +++++---- src/util/aggregators/date.rs | 11 +++++++---- src/util/aggregators/mean.rs | 7 +++---- src/util/aggregators/sum.rs | 5 ++--- 5 files changed, 25 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ee4705e..70554ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,9 +75,9 @@ checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "clap" @@ -102,9 +102,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "colorchoice" @@ -529,9 +529,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.15.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "strsim" @@ -541,9 +541,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.101" +version = "2.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462" dependencies = [ "proc-macro2", "quote", diff --git a/src/util/aggregators/counter.rs b/src/util/aggregators/counter.rs index d0340a9..9d643e5 100644 --- a/src/util/aggregators/counter.rs +++ b/src/util/aggregators/counter.rs @@ -1,10 +1,11 @@ use std::{cmp::Reverse, collections::HashMap}; -use format_num::format_num; - use crate::{ constants::cli::colors::RESET_COLOR, - util::{aggregators::aggregator::Aggregator, error::LogriaError}, + util::{ + aggregators::aggregator::{Aggregator, format_int}, + error::LogriaError, + }, }; /// A counter for tracking occurrences of messages, similar to Python's `Counter`. @@ -123,7 +124,7 @@ impl Counter { " {}{}: {} ({:.0}%)", item.trim(), RESET_COLOR, - format_num!(",d", count as f64), + format_int(count as usize), pct ) }) diff --git a/src/util/aggregators/date.rs b/src/util/aggregators/date.rs index dfbc4ef..87d0f9b 100644 --- a/src/util/aggregators/date.rs +++ b/src/util/aggregators/date.rs @@ -1,12 +1,15 @@ use std::cmp::{max, min}; -use crate::util::{aggregators::aggregator::Aggregator, error::LogriaError}; -use format_num::format_num; use time::{ Date as Dt, PrimitiveDateTime as DateTime, Time as Tm, format_description::{OwnedFormatItem, parse_owned}, }; +use crate::util::{ + aggregators::aggregator::{Aggregator, format_int}, + error::LogriaError, +}; + #[derive(Clone, Debug)] pub enum DateParserType { Date, @@ -66,8 +69,8 @@ impl Aggregator for Date { fn messages(&self, _: &usize) -> Vec { let (rate, unit) = self.determine_rate(); let mut out_v = vec![ - format!(" Rate: {} {}", format_num!(",.0f", rate as u32), unit), - format!(" Count: {}", format_num!(",d", self.count as u32)), + format!(" Rate: {} {}", format_int(rate as usize), unit), + format!(" Count: {}", format_int(self.count as usize)), ]; match self.parser_type { DateParserType::Date => { diff --git a/src/util/aggregators/mean.rs b/src/util/aggregators/mean.rs index 631d451..9a62f4d 100644 --- a/src/util/aggregators/mean.rs +++ b/src/util/aggregators/mean.rs @@ -1,8 +1,7 @@ use crate::util::{ - aggregators::aggregator::{Aggregator, extract_number}, + aggregators::aggregator::{Aggregator, extract_number, format_float}, error::LogriaError, }; -use format_num::format_num; /// Aggregator that computes the running mean of numeric messages. pub struct Mean { @@ -43,8 +42,8 @@ impl Aggregator for Mean { fn messages(&self, _: &usize) -> Vec { vec![ format!(" Mean: {:.2}", self.mean()), - format!(" Count: {}", format_num!(",d", self.count)), - format!(" Total: {}", format_num!(",d", self.total)), + format!(" Count: {}", format_float(self.count)), + format!(" Total: {}", format_float(self.total)), ] } diff --git a/src/util/aggregators/sum.rs b/src/util/aggregators/sum.rs index d9b3a58..5245ea4 100644 --- a/src/util/aggregators/sum.rs +++ b/src/util/aggregators/sum.rs @@ -1,8 +1,7 @@ use crate::util::{ - aggregators::aggregator::{Aggregator, extract_number}, + aggregators::aggregator::{Aggregator, extract_number, format_float}, error::LogriaError, }; -use format_num::format_num; /// Aggregator that accumulates numeric values from messages into a running total. pub struct Sum { @@ -24,7 +23,7 @@ impl Aggregator for Sum { /// Returns the current total formatted as a string message. fn messages(&self, _: &usize) -> Vec { - vec![format!(" Total: {}", format_num!(",d", self.total))] + vec![format!(" Total: {}", format_float(self.total))] } /// Resets the running total back to zero. From 3ded191ec1ebbaec09990a6777712d6a7c058143 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 19:50:32 -0700 Subject: [PATCH 13/16] Fix comment --- src/util/aggregators/aggregator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/aggregators/aggregator.rs b/src/util/aggregators/aggregator.rs index 7dec059..4a3a40c 100644 --- a/src/util/aggregators/aggregator.rs +++ b/src/util/aggregators/aggregator.rs @@ -57,7 +57,7 @@ pub fn format_float(n: f64) -> String { } } -/// Formats a float as a string with commas separating thousands +/// Formats an integer as a string with commas separating thousands pub fn format_int(n: usize) -> String { let digits = n.to_string(); let mut result = String::new(); From 6a048628a6ae9340eb2db85b4155a31bda66b300 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 19:51:48 -0700 Subject: [PATCH 14/16] Bump deps --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 70554ff..697fce6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -81,18 +81,18 @@ checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "clap" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" dependencies = [ "anstream", "anstyle", diff --git a/Cargo.toml b/Cargo.toml index 3d5d55c..dfe8018 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ repository = "https://github.com/ReagentX/Logria" version = "0.0.0" [dependencies] -clap = { version = "=4.5.39", features = ["cargo"] } +clap = { version = "=4.5.40", features = ["cargo"] } crossterm = "=0.29.0" dirs = "=6.0.0" is_executable = "=1.0.4" From 6e6b14d75df2e86a0df5dd5c2bdc6f5280649c52 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 20:01:05 -0700 Subject: [PATCH 15/16] Simplify defaults --- src/util/aggregators/date.rs | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/util/aggregators/date.rs b/src/util/aggregators/date.rs index 87d0f9b..6d1019e 100644 --- a/src/util/aggregators/date.rs +++ b/src/util/aggregators/date.rs @@ -92,23 +92,28 @@ impl Aggregator for Date { /// Resets the aggregator to its initial state, preserving `format` and `parser_type`. fn reset(&mut self) { self.count = 0; - self.earliest = match self.parser_type { - DateParserType::Date => DateTime::new(Dt::MAX, Tm::MIDNIGHT), - DateParserType::Time => DateTime::new(Dt::MIN, Tm::from_hms(23, 59, 59).unwrap()), - DateParserType::DateTime => DateTime::new(Dt::MAX, Tm::MIDNIGHT), - }; - self.latest = match self.parser_type { - DateParserType::Date => DateTime::new(Dt::MIN, Tm::MIDNIGHT), - DateParserType::Time => DateTime::new(Dt::MIN, Tm::MIDNIGHT), - DateParserType::DateTime => DateTime::new(Dt::MIN, Tm::MIDNIGHT), - }; + let (earliest, latest) = Self::default_datetimes(&self.parser_type); + self.earliest = earliest; + self.latest = latest; } } impl Date { /// Constructs a new `Date` aggregator with the given `format` and `parser_type`. pub fn new(format: &str, parser_type: DateParserType) -> Self { - let (earliest, latest) = match parser_type { + let (earliest, latest) = Self::default_datetimes(&parser_type); + + Self { + format: parse_owned::<2>(format).ok(), + earliest, + latest, + count: 0, + parser_type, + } + } + + fn default_datetimes(parser_type: &DateParserType) -> (DateTime, DateTime) { + match parser_type { DateParserType::Date => ( DateTime::new(Dt::MAX, Tm::MIDNIGHT), DateTime::new(Dt::MIN, Tm::MIDNIGHT), @@ -121,14 +126,6 @@ impl Date { DateTime::new(Dt::MAX, Tm::MIDNIGHT), DateTime::new(Dt::MIN, Tm::MIDNIGHT), ), - }; - - Self { - format: parse_owned::<2>(format).ok(), - earliest, - latest, - count: 0, - parser_type, } } From c317a173958c6814d72cb2c0d178527026ad4ec6 Mon Sep 17 00:00:00 2001 From: Christopher Sardegna Date: Mon, 9 Jun 2025 20:06:33 -0700 Subject: [PATCH 16/16] Use import --- src/util/aggregators/counter.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/util/aggregators/counter.rs b/src/util/aggregators/counter.rs index 9d643e5..105c3e0 100644 --- a/src/util/aggregators/counter.rs +++ b/src/util/aggregators/counter.rs @@ -1,4 +1,7 @@ -use std::{cmp::Reverse, collections::HashMap}; +use std::{ + cmp::Reverse, + collections::{BinaryHeap, HashMap}, +}; use crate::{ constants::cli::colors::RESET_COLOR, @@ -98,7 +101,7 @@ impl Counter { fn compute_top_messages(&self, n: usize) -> Vec { // A min-heap that only ever holds the top n entries. let total = self.total_count as f64; - let mut heap = std::collections::BinaryHeap::with_capacity(n + 1); + let mut heap = BinaryHeap::with_capacity(n + 1); for (item, &count) in &self.counts { // Reverse so that smallest count is at the top—and will get popped when > n