From f75feda2ce9dbade48042292491be9de5e811ed9 Mon Sep 17 00:00:00 2001 From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com> Date: Fri, 14 Apr 2023 22:09:25 -0500 Subject: [PATCH 1/6] Add to README and create file of unanalyzed words --- README.md | 10 ++++++++++ tests/coverage-test.py | 9 ++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 237b844..30c028a 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,16 @@ The analyser is being developed with lot of tests. To run tests : $ make test ``` +```bash +$ make coverage-test +``` +runs a coverage-tests and creates unanalyzed.lex file with unanalyzed words. +## Dataset +```bash +$ make dataset +``` +creates a .csv file with words from tests/coverage/*.txt files. + ## Citation Please cite the following publication in order to refer to the mlmorph: diff --git a/tests/coverage-test.py b/tests/coverage-test.py index ad8114d..af75761 100644 --- a/tests/coverage-test.py +++ b/tests/coverage-test.py @@ -29,8 +29,9 @@ def test_total_coverage(self): start = clock() print("%40s\t%8s\t%8s\t%s" % ('File name', 'Words', 'Analysed', 'Percentage')) - for filename in glob.glob(os.path.join(CURR_DIR, "coverage", "*.txt")): - with open(filename, 'r') as file: + with open("./tests/unanalyzed.lex", "w+") as unanFile: + for filename in glob.glob(os.path.join(CURR_DIR, "coverage", "*.txt")): + with open(filename, 'r') as file: tokens_count = 0 analysed_tokens_count = 0 for line in file: @@ -41,12 +42,14 @@ def test_total_coverage(self): analysis = self.analyser.analyse(word, False) if len(analysis) > 0: analysed_tokens_count += 1 + else: + unanFile.write(word+"\n") percentage = (analysed_tokens_count/tokens_count)*100 total_tokens_count += tokens_count total_analysed_tokens_count += analysed_tokens_count print("%40s\t%8d\t%8d\t%3.2f%%" % (os.path.basename( filename), tokens_count, analysed_tokens_count, percentage)) - file.close() + file.close; percentage = (total_analysed_tokens_count/total_tokens_count)*100 time_taken = clock() - start print('%40s\t%8d\t%8d\t%3.2f%%' % From 6169910c76bebd6a46c42a68a5d280ed06e886a4 Mon Sep 17 00:00:00 2001 From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com> Date: Fri, 14 Apr 2023 22:17:01 -0500 Subject: [PATCH 2/6] close() --- tests/coverage-test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/coverage-test.py b/tests/coverage-test.py index af75761..72399c7 100644 --- a/tests/coverage-test.py +++ b/tests/coverage-test.py @@ -49,7 +49,7 @@ def test_total_coverage(self): total_analysed_tokens_count += analysed_tokens_count print("%40s\t%8d\t%8d\t%3.2f%%" % (os.path.basename( filename), tokens_count, analysed_tokens_count, percentage)) - file.close; + file.close(); percentage = (total_analysed_tokens_count/total_tokens_count)*100 time_taken = clock() - start print('%40s\t%8d\t%8d\t%3.2f%%' % From 87ad1dc052677ba2fbfd8ff4fc0858fab984f453 Mon Sep 17 00:00:00 2001 From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com> Date: Fri, 14 Apr 2023 22:35:21 -0500 Subject: [PATCH 3/6] sort unique --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 58cabfd..ab0e515 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,8 @@ test: malayalam.a python coverage-analysis: malayalam.a python @python tests/coverage-test.py + sort -u tests/unanalyzed.lex --output=tests/unanalyzed.lex dataset: pip install tqdm - python scripts/create-dataset.py \ No newline at end of file + python scripts/create-dataset.py From ed4ef2d209394847343ef0551b10014957ba7a52 Mon Sep 17 00:00:00 2001 From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com> Date: Fri, 14 Apr 2023 23:30:56 -0500 Subject: [PATCH 4/6] Modify README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 30c028a..8b92113 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ $ make test ```bash $ make coverage-test ``` -runs a coverage-tests and creates unanalyzed.lex file with unanalyzed words. +runs a coverage-tests and creates unanalyzed.lex file in tests with unanalyzed words. ## Dataset ```bash $ make dataset From 109bfa6ea8295c36fc38964ea0b7189b759df8c9 Mon Sep 17 00:00:00 2001 From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com> Date: Fri, 14 Apr 2023 23:40:10 -0500 Subject: [PATCH 5/6] Modify Makefile --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index ab0e515..3576839 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,7 @@ test: malayalam.a python coverage-analysis: malayalam.a python @python tests/coverage-test.py sort -u tests/unanalyzed.lex --output=tests/unanalyzed.lex + wc tests/unanalyzed.lex dataset: pip install tqdm From 5799503b5ecb29ba68bb78b67c97c17583a6ab24 Mon Sep 17 00:00:00 2001 From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com> Date: Fri, 14 Apr 2023 23:58:48 -0500 Subject: [PATCH 6/6] Modify Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3576839..84fc600 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ test: malayalam.a python coverage-analysis: malayalam.a python @python tests/coverage-test.py sort -u tests/unanalyzed.lex --output=tests/unanalyzed.lex - wc tests/unanalyzed.lex + wc tests/unanalyzed.lex dataset: pip install tqdm