From 596d6affe16832ab5f0a0a39b486c2caefc1dbf7 Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Thu, 20 Feb 2025 10:31:05 +0000 Subject: [PATCH 1/2] parse_wordlist: use csv reader --- src/diffenator2/shape.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/diffenator2/shape.py b/src/diffenator2/shape.py index d9e5f88..99bb503 100644 --- a/src/diffenator2/shape.py +++ b/src/diffenator2/shape.py @@ -13,6 +13,7 @@ import tqdm from diffenator2.segmenting import textSegments from collections import defaultdict +import csv # Hashing strategies for elements of a Harfbuzz buffer @@ -123,8 +124,10 @@ def parse_wordlist(fp): results = [] with open(fp, encoding="utf8") as doc: lines = doc.read().split("\n") - for line in lines: - items = line.split(",") + parsed = csv.reader(lines) + for items in parsed: + if len(items) == 0: + continue try: results.append( TemplateWord( From 5d2af321d97035387e1e22b3fe05bdfab977d986 Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Thu, 20 Feb 2025 10:31:29 +0000 Subject: [PATCH 2/2] Common.txt: Include more fig strings --- src/diffenator2/data/wordlists/Common.txt | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/diffenator2/data/wordlists/Common.txt b/src/diffenator2/data/wordlists/Common.txt index b6ffa0c..3ae5165 100644 --- a/src/diffenator2/data/wordlists/Common.txt +++ b/src/diffenator2/data/wordlists/Common.txt @@ -8,4 +8,20 @@ 0a,,,ordn 0o,,,ordn 1/234,latn,dflt,frac -1/4,latn,dflt,frac \ No newline at end of file +1/4,latn,dflt,frac +0123456789 +0/0/1/2/3/4/5/6/7/8/9/0 +0#0#1#2#3#4#5#6#7#8#9#0 +"0,0,1,2,3,4,5,6,7,8,9,0" +0.0.1.2.3.4.5.6.7.8.9.0 +0:0:1:2:3:4:5:6:7:8:9:0 +0;0;1;2;3;4;5;6;7;8;9;0 +0-0-1-2-3-4-5-6-7-8-9-0 +0123456789,,,pnum +0/0/1/2/3/4/5/6/7/8/9/0,,,pnum +0#0#1#2#3#4#5#6#7#8#9#0,,,pnum +"0,0,1,2,3,4,5,6,7,8,9,0",,,pnum +0.0.1.2.3.4.5.6.7.8.9.0,,,pnum +0:0:1:2:3:4:5:6:7:8:9:0,,,pnum +0;0;1;2;3;4;5;6;7;8;9;0,,,pnum +0-0-1-2-3-4-5-6-7-8-9-0,,,pnum \ No newline at end of file