From f75feda2ce9dbade48042292491be9de5e811ed9 Mon Sep 17 00:00:00 2001
From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com>
Date: Fri, 14 Apr 2023 22:09:25 -0500
Subject: [PATCH 1/6] Add to README and create file of unanalyzed words

---
 README.md              | 10 ++++++++++
 tests/coverage-test.py |  9 ++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 237b844..30c028a 100644
--- a/README.md
+++ b/README.md
@@ -126,6 +126,16 @@ The analyser is being developed with lot of tests. To run tests :
 $ make test
 ```
 
+```bash
+$ make coverage-test
+```
+runs a coverage-tests and creates unanalyzed.lex file with unanalyzed words.
+## Dataset
+```bash
+$ make dataset 
+```
+creates a .csv file with words from tests/coverage/*.txt files. 
+
 ## Citation
 
 Please cite the following publication in order to refer to the mlmorph:
diff --git a/tests/coverage-test.py b/tests/coverage-test.py
index ad8114d..af75761 100644
--- a/tests/coverage-test.py
+++ b/tests/coverage-test.py
@@ -29,8 +29,9 @@ def test_total_coverage(self):
         start = clock()
         print("%40s\t%8s\t%8s\t%s" %
               ('File name', 'Words', 'Analysed', 'Percentage'))
-        for filename in glob.glob(os.path.join(CURR_DIR, "coverage", "*.txt")):
-            with open(filename, 'r') as file:
+        with open("./tests/unanalyzed.lex", "w+") as unanFile:
+            for filename in glob.glob(os.path.join(CURR_DIR, "coverage", "*.txt")):
+              with open(filename, 'r') as file:
                 tokens_count = 0
                 analysed_tokens_count = 0
                 for line in file:
@@ -41,12 +42,14 @@ def test_total_coverage(self):
                         analysis = self.analyser.analyse(word, False)
                         if len(analysis) > 0:
                             analysed_tokens_count += 1
+                        else:
+                            unanFile.write(word+"\n")
                 percentage = (analysed_tokens_count/tokens_count)*100
                 total_tokens_count += tokens_count
                 total_analysed_tokens_count += analysed_tokens_count
                 print("%40s\t%8d\t%8d\t%3.2f%%" % (os.path.basename(
                     filename), tokens_count, analysed_tokens_count, percentage))
-                file.close()
+                file.close;
         percentage = (total_analysed_tokens_count/total_tokens_count)*100
         time_taken = clock() - start
         print('%40s\t%8d\t%8d\t%3.2f%%' %

From 6169910c76bebd6a46c42a68a5d280ed06e886a4 Mon Sep 17 00:00:00 2001
From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com>
Date: Fri, 14 Apr 2023 22:17:01 -0500
Subject: [PATCH 2/6] close()

---
 tests/coverage-test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/coverage-test.py b/tests/coverage-test.py
index af75761..72399c7 100644
--- a/tests/coverage-test.py
+++ b/tests/coverage-test.py
@@ -49,7 +49,7 @@ def test_total_coverage(self):
                 total_analysed_tokens_count += analysed_tokens_count
                 print("%40s\t%8d\t%8d\t%3.2f%%" % (os.path.basename(
                     filename), tokens_count, analysed_tokens_count, percentage))
-                file.close;
+                file.close();
         percentage = (total_analysed_tokens_count/total_tokens_count)*100
         time_taken = clock() - start
         print('%40s\t%8d\t%8d\t%3.2f%%' %

From 87ad1dc052677ba2fbfd8ff4fc0858fab984f453 Mon Sep 17 00:00:00 2001
From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com>
Date: Fri, 14 Apr 2023 22:35:21 -0500
Subject: [PATCH 3/6] sort unique

---
 Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 58cabfd..ab0e515 100644
--- a/Makefile
+++ b/Makefile
@@ -19,7 +19,8 @@ test: malayalam.a python
 
 coverage-analysis: malayalam.a python
 	@python tests/coverage-test.py
+	sort -u tests/unanalyzed.lex --output=tests/unanalyzed.lex
 
 dataset:
 	pip install tqdm
-	python scripts/create-dataset.py
\ No newline at end of file
+	python scripts/create-dataset.py

From ed4ef2d209394847343ef0551b10014957ba7a52 Mon Sep 17 00:00:00 2001
From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com>
Date: Fri, 14 Apr 2023 23:30:56 -0500
Subject: [PATCH 4/6] Modify README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 30c028a..8b92113 100644
--- a/README.md
+++ b/README.md
@@ -129,7 +129,7 @@ $ make test
 ```bash
 $ make coverage-test
 ```
-runs a coverage-tests and creates unanalyzed.lex file with unanalyzed words.
+runs a coverage-tests and creates unanalyzed.lex file in tests with unanalyzed words.
 ## Dataset
 ```bash
 $ make dataset 

From 109bfa6ea8295c36fc38964ea0b7189b759df8c9 Mon Sep 17 00:00:00 2001
From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com>
Date: Fri, 14 Apr 2023 23:40:10 -0500
Subject: [PATCH 5/6] Modify Makefile

---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index ab0e515..3576839 100644
--- a/Makefile
+++ b/Makefile
@@ -20,6 +20,7 @@ test: malayalam.a python
 coverage-analysis: malayalam.a python
 	@python tests/coverage-test.py
 	sort -u tests/unanalyzed.lex --output=tests/unanalyzed.lex
+        wc tests/unanalyzed.lex
 
 dataset:
 	pip install tqdm

From 5799503b5ecb29ba68bb78b67c97c17583a6ab24 Mon Sep 17 00:00:00 2001
From: ProgramComputer <22284856+ProgramComputer@users.noreply.github.com>
Date: Fri, 14 Apr 2023 23:58:48 -0500
Subject: [PATCH 6/6] Modify Makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 3576839..84fc600 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ test: malayalam.a python
 coverage-analysis: malayalam.a python
 	@python tests/coverage-test.py
 	sort -u tests/unanalyzed.lex --output=tests/unanalyzed.lex
-        wc tests/unanalyzed.lex
+	wc tests/unanalyzed.lex
 
 dataset:
 	pip install tqdm