diff --git a/src/diffenator2/data/wordlists/Common.txt b/src/diffenator2/data/wordlists/Common.txt index b6ffa0c..3ae5165 100644 --- a/src/diffenator2/data/wordlists/Common.txt +++ b/src/diffenator2/data/wordlists/Common.txt @@ -8,4 +8,20 @@ 0a,,,ordn 0o,,,ordn 1/234,latn,dflt,frac -1/4,latn,dflt,frac \ No newline at end of file +1/4,latn,dflt,frac +0123456789 +0/0/1/2/3/4/5/6/7/8/9/0 +0#0#1#2#3#4#5#6#7#8#9#0 +"0,0,1,2,3,4,5,6,7,8,9,0" +0.0.1.2.3.4.5.6.7.8.9.0 +0:0:1:2:3:4:5:6:7:8:9:0 +0;0;1;2;3;4;5;6;7;8;9;0 +0-0-1-2-3-4-5-6-7-8-9-0 +0123456789,,,pnum +0/0/1/2/3/4/5/6/7/8/9/0,,,pnum +0#0#1#2#3#4#5#6#7#8#9#0,,,pnum +"0,0,1,2,3,4,5,6,7,8,9,0",,,pnum +0.0.1.2.3.4.5.6.7.8.9.0,,,pnum +0:0:1:2:3:4:5:6:7:8:9:0,,,pnum +0;0;1;2;3;4;5;6;7;8;9;0,,,pnum +0-0-1-2-3-4-5-6-7-8-9-0,,,pnum \ No newline at end of file diff --git a/src/diffenator2/shape.py b/src/diffenator2/shape.py index d9e5f88..99bb503 100644 --- a/src/diffenator2/shape.py +++ b/src/diffenator2/shape.py @@ -13,6 +13,7 @@ import tqdm from diffenator2.segmenting import textSegments from collections import defaultdict +import csv # Hashing strategies for elements of a Harfbuzz buffer @@ -123,8 +124,10 @@ def parse_wordlist(fp): results = [] with open(fp, encoding="utf8") as doc: lines = doc.read().split("\n") - for line in lines: - items = line.split(",") + parsed = csv.reader(lines) + for items in parsed: + if len(items) == 0: + continue try: results.append( TemplateWord(