From 2c494016e624db19f36fcee42ac7c3c251622d4b Mon Sep 17 00:00:00 2001
From: Node0 <2231752+Node0@users.noreply.github.com>
Date: Thu, 13 Mar 2025 03:40:01 -0700
Subject: [PATCH 1/4] Added inline tokenizer and decompression libraries to
text stats analyzer
---
single-page-apps/text_stats_analyzer.html | 1 +
1 file changed, 1 insertion(+)
diff --git a/single-page-apps/text_stats_analyzer.html b/single-page-apps/text_stats_analyzer.html
index 8465253..4a301d7 100644
--- a/single-page-apps/text_stats_analyzer.html
+++ b/single-page-apps/text_stats_analyzer.html
@@ -55,6 +55,7 @@
Analysis Results
const print = console.log;
+
From 0780a3f631a69cbc0e657d11b90be66197182148 Mon Sep 17 00:00:00 2001
From: Node0 <2231752+Node0@users.noreply.github.com>
Date: Wed, 2 Apr 2025 22:11:12 -0700
Subject: [PATCH 4/4] Added nicer Source Code Pro fonts for output
---
single-page-apps/text_stats_analyzer.html | 41 ++++++++++++++---------
1 file changed, 26 insertions(+), 15 deletions(-)
diff --git a/single-page-apps/text_stats_analyzer.html b/single-page-apps/text_stats_analyzer.html
index c6de1d6..46a80ee 100644
--- a/single-page-apps/text_stats_analyzer.html
+++ b/single-page-apps/text_stats_analyzer.html
@@ -94,11 +94,11 @@ Analysis Results
// Basic stats
const numCharacters = text.length;
const numLines = text.split(/\r?\n/).length;
-
+
// Word count using regex to split on whitespace
const numWords = text.trim().split(/\s+/).filter(word => word.length > 0).length;
- // Check if GPTTokenizer_o200k_base is loaded and has the encode method
+ // Check if GPTTokenizer_o200k_base is loaded
if (
typeof GPTTokenizer_o200k_base === 'undefined' ||
typeof GPTTokenizer_o200k_base.encode !== 'function'
@@ -107,20 +107,27 @@ Analysis Results
return;
}
- // Directly call the encode function on the global object (not a constructor)
+ // Directly call the encode function on the global object
const tokens = GPTTokenizer_o200k_base.encode(text);
const numTokens = tokens.length;
- // Create table with invisible borders for perfect alignment using Source Code Pro
+ // Create table with Source Code Pro Medium and SemiBold
const output = `
- | Lines: |
- ${numLines} |
+ Lines: |
+ ${numLines} |
- | Words: |
- ${numWords} |
+ Words: |
+ ${numWords} |
- | Tokens: |
- ${numTokens}, *Per GPT Tokenizer (4o, o1, etc) |
+ Tokens: |
+ ${numTokens}, *Per GPT Tokenizer (4o, o1, etc) |
- | Characters: |
- ${numCharacters} |
+ Characters: |
+ ${numCharacters} |
`;