From 2c494016e624db19f36fcee42ac7c3c251622d4b Mon Sep 17 00:00:00 2001 From: Node0 <2231752+Node0@users.noreply.github.com> Date: Thu, 13 Mar 2025 03:40:01 -0700 Subject: [PATCH 1/4] Added inline tokenizer and decompression libraries to text stats analyzer --- single-page-apps/text_stats_analyzer.html | 1 + 1 file changed, 1 insertion(+) diff --git a/single-page-apps/text_stats_analyzer.html b/single-page-apps/text_stats_analyzer.html index 8465253..4a301d7 100644 --- a/single-page-apps/text_stats_analyzer.html +++ b/single-page-apps/text_stats_analyzer.html @@ -55,6 +55,7 @@
Analysis Results
const print = console.log; + From 0780a3f631a69cbc0e657d11b90be66197182148 Mon Sep 17 00:00:00 2001 From: Node0 <2231752+Node0@users.noreply.github.com> Date: Wed, 2 Apr 2025 22:11:12 -0700 Subject: [PATCH 4/4] Added nicer Source Code Pro fonts for output --- single-page-apps/text_stats_analyzer.html | 41 ++++++++++++++--------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/single-page-apps/text_stats_analyzer.html b/single-page-apps/text_stats_analyzer.html index c6de1d6..46a80ee 100644 --- a/single-page-apps/text_stats_analyzer.html +++ b/single-page-apps/text_stats_analyzer.html @@ -94,11 +94,11 @@
Analysis Results
// Basic stats const numCharacters = text.length; const numLines = text.split(/\r?\n/).length; - + // Word count using regex to split on whitespace const numWords = text.trim().split(/\s+/).filter(word => word.length > 0).length; - // Check if GPTTokenizer_o200k_base is loaded and has the encode method + // Check if GPTTokenizer_o200k_base is loaded if ( typeof GPTTokenizer_o200k_base === 'undefined' || typeof GPTTokenizer_o200k_base.encode !== 'function' @@ -107,20 +107,27 @@
Analysis Results
return; } - // Directly call the encode function on the global object (not a constructor) + // Directly call the encode function on the global object const tokens = GPTTokenizer_o200k_base.encode(text); const numTokens = tokens.length; - // Create table with invisible borders for perfect alignment using Source Code Pro + // Create table with Source Code Pro Medium and SemiBold const output = ` - - + + - - + + - - + + - - + +
Lines:${numLines}Lines:${numLines}
Words:${numWords}Words:${numWords}
Tokens:${numTokens}, *Per GPT Tokenizer (4o, o1, etc)Tokens:${numTokens}, *Per GPT Tokenizer (4o, o1, etc)
Characters:${numCharacters}Characters:${numCharacters}
`;