From b6bd345af5e63f4123bbd00a8f72a0eefa5063bb Mon Sep 17 00:00:00 2001 From: Daniyal Abbasi Date: Wed, 29 Jan 2020 01:57:36 +0530 Subject: [PATCH 1/2] Added Python3 Support --- train.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/train.py b/train.py index 0caa6f49..74e5a584 100755 --- a/train.py +++ b/train.py @@ -5,7 +5,7 @@ import sys -TESSERACT_DIR='/storage/projects/alpr/libraries/tesseract-ocr' +TESSERACT_DIR='/usr/bin/tesseract-ocr' os.environ["TESSDATA_PREFIX"] = TESSERACT_DIR #os.system("export TESSDATA_PREFIX=" + TESSERACT_DIR) @@ -14,13 +14,13 @@ TESSERACT_TRAINDIR= TESSERACT_DIR + '/training' -country = raw_input("Two-Letter Country Code to Train: ").lower() +country = input("Two-Letter Country Code to Train: ").lower() LANGUAGE_NAME='l' + country box_files = glob.glob('./' + country + '/input/*.box') if not box_files: - print "Cannot find input files" + print( "Cannot find input files") sys.exit(1) os.system("rm ./tmp/*") @@ -28,7 +28,7 @@ font_properties_file = open('./tmp/font_properties','w') for box_file in box_files: - print "Processing: " + box_file + print( "Processing: " + box_file) file_without_dir = os.path.split(box_file)[1] file_without_ext = os.path.splitext(file_without_dir)[0] @@ -37,7 +37,7 @@ tif_file = input_dir + '/' + file_without_ext + ".tif" train_cmd = "%s -l eng %s %s nobatch box.train.stderr" % (TESSERACT_BIN, tif_file, file_without_ext) - print "Executing: " + train_cmd + print( "Executing: " + train_cmd ) os.system(train_cmd) os.system("mv ./" + file_without_ext + ".tr ./tmp/" + file_without_ext + ".tr") os.system("mv ./" + file_without_ext + ".txt ./tmp/" + file_without_ext + ".txt") @@ -52,12 +52,12 @@ # Shape clustering should currently only be used for the "indic" languages #train_cmd = TESSERACT_TRAINDIR + '/shapeclustering -F ./' + country + '/input/font_properties -U unicharset ./' + country + '/input/*.tr' -#print "Executing: " + train_cmd +#print( "Executing: " + train_cmd) #os.system(train_cmd) train_cmd = TESSERACT_TRAINDIR + '/mftraining -F ./tmp/font_properties -U unicharset -O ./tmp/' + LANGUAGE_NAME + '.unicharset ./tmp/*.tr' -print "Executing: " + train_cmd +print( "Executing: " + train_cmd) os.system(train_cmd) os.system("rm ./unicharset") os.system("mv ./tmp/" + LANGUAGE_NAME + ".unicharset ./") @@ -79,7 +79,7 @@ # If a config file is in the country's directory, use that. config_file = os.path.join('./', country, country + '.config') if os.path.isfile(config_file): - print "Applying config file: " + config_file + print( "Applying config file: " + config_file) trainedata_file = LANGUAGE_NAME + '.traineddata' os.system(TESSERACT_TRAINDIR + '/combine_tessdata -o ' + trainedata_file + ' ' + config_file ) From 83c7dbdcbc25c235ea9c754e09e4e82823b1ee66 Mon Sep 17 00:00:00 2001 From: Daniyal Abbasi Date: Wed, 29 Jan 2020 02:00:07 +0530 Subject: [PATCH 2/2] minor fix --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 74e5a584..27506549 100755 --- a/train.py +++ b/train.py @@ -5,7 +5,7 @@ import sys -TESSERACT_DIR='/usr/bin/tesseract-ocr' +TESSERACT_DIR='/storage/projects/alpr/libraries/tesseract-ocr' os.environ["TESSDATA_PREFIX"] = TESSERACT_DIR #os.system("export TESSDATA_PREFIX=" + TESSERACT_DIR)