From 57cc715efc012d7710da87ff051a74266c90135f Mon Sep 17 00:00:00 2001 From: Andrew Champion Date: Fri, 4 May 2018 14:54:37 -0700 Subject: [PATCH 1/3] Python 3.6 Compatibility Changed across project: * Convert ```print var``` calls to ```print(var)``` (compat) * Change ```import``` statements (compat) * Fix inconsistent line breaks (format) Changed in ```parser.py``` * Import ```os.path``` more granularly * Replace redundant path bulding Retained: * Python 2.7 compatibility (compat) * non-PEP8 use of camelCase (format) --- test.py | 2 +- textteaser/__init__.py | 3 +-- textteaser/main.py | 8 ++++---- textteaser/parser.py | 13 ++++++++++--- textteaser/summarizer.py | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/test.py b/test.py index 090c028..6587244 100644 --- a/test.py +++ b/test.py @@ -12,4 +12,4 @@ sentences = tt.summarize(title, text) for sentence in sentences: - print sentence \ No newline at end of file + print(sentence) diff --git a/textteaser/__init__.py b/textteaser/__init__.py index 88e71d2..396567e 100644 --- a/textteaser/__init__.py +++ b/textteaser/__init__.py @@ -1,7 +1,6 @@ # !/usr/bin/python # -*- coding: utf-8 -*- - -from summarizer import Summarizer +from .summarizer import Summarizer class TextTeaser(object): diff --git a/textteaser/main.py b/textteaser/main.py index ad8c13f..5f6c2f4 100644 --- a/textteaser/main.py +++ b/textteaser/main.py @@ -1,4 +1,4 @@ -from summarizer import Summarizer +from .summarizer import Summarizer def getInput(): @@ -28,6 +28,6 @@ def getInput(): print 'Summary:' for r in result: - print r['sentence'] - # print r['totalScore'] - # print r['order'] + print(r['sentence']) + # print(r['totalScore']) + # print(r['order']) diff --git a/textteaser/parser.py b/textteaser/parser.py index 0c8f16d..2528f76 100644 --- a/textteaser/parser.py +++ b/textteaser/parser.py @@ -1,11 +1,16 @@ # !/usr/bin/python # -*- coding: utf-8 -*- import nltk.data -import os +import os.path as path class Parser: def __init__(self): + self.basePath = '/'.join([ + path.dirname(path.abspath(__file__)), + 'trainer', + '' + ]) self.ideal = 20.0 self.stopWords = self.getStopWords() @@ -58,7 +63,8 @@ def getTitleScore(self, title, sentence): return len(matchedWords) / (len(title) * 1.0) def splitSentences(self, text): - tokenizer = nltk.data.load('file:' + os.path.dirname(os.path.abspath(__file__)).decode('utf-8') + '/trainer/english.pickle') + path = self.basePath + 'english.pickle' + tokenizer = nltk.data.load('file:' + path) return tokenizer.tokenize(text) @@ -72,7 +78,8 @@ def removeStopWords(self, words): return [word for word in words if word not in self.stopWords] def getStopWords(self): - with open(os.path.dirname(os.path.abspath(__file__)) + '/trainer/stopWords.txt') as file: + path = self.basePath + 'stopWords.txt' + with open(path) as file: words = file.readlines() return [word.replace('\n', '') for word in words] diff --git a/textteaser/summarizer.py b/textteaser/summarizer.py index 22adee5..65a4c0b 100644 --- a/textteaser/summarizer.py +++ b/textteaser/summarizer.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -from parser import Parser +from .parser import Parser class Summarizer: From 630d31ead585abff8320dbc25f1b67987f896b20 Mon Sep 17 00:00:00 2001 From: Andrew Champion Date: Fri, 4 May 2018 14:54:37 -0700 Subject: [PATCH 2/3] Python 3.6 Compatibility Changed across project: * Convert ```print var``` calls to ```print(var)``` (compat) * Change ```import``` statements (compat) * Fix inconsistent line breaks (format) Changed in ```parser.py``` * Import ```os.path``` more granularly * Replace redundant path bulding Retained: * Python 2.7 compatibility (compat) * non-PEP8 use of camelCase (format) --- test.py | 2 +- textteaser/__init__.py | 3 +-- textteaser/main.py | 8 ++++---- textteaser/parser.py | 13 ++++++++++--- textteaser/summarizer.py | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/test.py b/test.py index 090c028..6587244 100644 --- a/test.py +++ b/test.py @@ -12,4 +12,4 @@ sentences = tt.summarize(title, text) for sentence in sentences: - print sentence \ No newline at end of file + print(sentence) diff --git a/textteaser/__init__.py b/textteaser/__init__.py index 88e71d2..396567e 100644 --- a/textteaser/__init__.py +++ b/textteaser/__init__.py @@ -1,7 +1,6 @@ # !/usr/bin/python # -*- coding: utf-8 -*- - -from summarizer import Summarizer +from .summarizer import Summarizer class TextTeaser(object): diff --git a/textteaser/main.py b/textteaser/main.py index ad8c13f..5f6c2f4 100644 --- a/textteaser/main.py +++ b/textteaser/main.py @@ -1,4 +1,4 @@ -from summarizer import Summarizer +from .summarizer import Summarizer def getInput(): @@ -28,6 +28,6 @@ def getInput(): print 'Summary:' for r in result: - print r['sentence'] - # print r['totalScore'] - # print r['order'] + print(r['sentence']) + # print(r['totalScore']) + # print(r['order']) diff --git a/textteaser/parser.py b/textteaser/parser.py index 0c8f16d..2528f76 100644 --- a/textteaser/parser.py +++ b/textteaser/parser.py @@ -1,11 +1,16 @@ # !/usr/bin/python # -*- coding: utf-8 -*- import nltk.data -import os +import os.path as path class Parser: def __init__(self): + self.basePath = '/'.join([ + path.dirname(path.abspath(__file__)), + 'trainer', + '' + ]) self.ideal = 20.0 self.stopWords = self.getStopWords() @@ -58,7 +63,8 @@ def getTitleScore(self, title, sentence): return len(matchedWords) / (len(title) * 1.0) def splitSentences(self, text): - tokenizer = nltk.data.load('file:' + os.path.dirname(os.path.abspath(__file__)).decode('utf-8') + '/trainer/english.pickle') + path = self.basePath + 'english.pickle' + tokenizer = nltk.data.load('file:' + path) return tokenizer.tokenize(text) @@ -72,7 +78,8 @@ def removeStopWords(self, words): return [word for word in words if word not in self.stopWords] def getStopWords(self): - with open(os.path.dirname(os.path.abspath(__file__)) + '/trainer/stopWords.txt') as file: + path = self.basePath + 'stopWords.txt' + with open(path) as file: words = file.readlines() return [word.replace('\n', '') for word in words] diff --git a/textteaser/summarizer.py b/textteaser/summarizer.py index 22adee5..65a4c0b 100644 --- a/textteaser/summarizer.py +++ b/textteaser/summarizer.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -from parser import Parser +from .parser import Parser class Summarizer: From 03091d3e3661938c321f4229fc9b640b5fcced02 Mon Sep 17 00:00:00 2001 From: Andrew Champion Date: Sun, 6 May 2018 12:21:34 -0700 Subject: [PATCH 3/3] forgot to update main.py --- textteaser/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/textteaser/main.py b/textteaser/main.py index 5f6c2f4..b37238e 100644 --- a/textteaser/main.py +++ b/textteaser/main.py @@ -25,7 +25,7 @@ def getInput(): result = summarizer.sortScore(result) result = summarizer.sortSentences(result[:30]) -print 'Summary:' +print('Summary:') for r in result: print(r['sentence'])