diff --git a/test.py b/test.py index 090c028..6587244 100644 --- a/test.py +++ b/test.py @@ -12,4 +12,4 @@ sentences = tt.summarize(title, text) for sentence in sentences: - print sentence \ No newline at end of file + print(sentence) diff --git a/textteaser/__init__.py b/textteaser/__init__.py index 88e71d2..396567e 100644 --- a/textteaser/__init__.py +++ b/textteaser/__init__.py @@ -1,7 +1,6 @@ # !/usr/bin/python # -*- coding: utf-8 -*- - -from summarizer import Summarizer +from .summarizer import Summarizer class TextTeaser(object): diff --git a/textteaser/main.py b/textteaser/main.py index ad8c13f..b37238e 100644 --- a/textteaser/main.py +++ b/textteaser/main.py @@ -1,4 +1,4 @@ -from summarizer import Summarizer +from .summarizer import Summarizer def getInput(): @@ -25,9 +25,9 @@ def getInput(): result = summarizer.sortScore(result) result = summarizer.sortSentences(result[:30]) -print 'Summary:' +print('Summary:') for r in result: - print r['sentence'] - # print r['totalScore'] - # print r['order'] + print(r['sentence']) + # print(r['totalScore']) + # print(r['order']) diff --git a/textteaser/parser.py b/textteaser/parser.py index 0c8f16d..2528f76 100644 --- a/textteaser/parser.py +++ b/textteaser/parser.py @@ -1,11 +1,16 @@ # !/usr/bin/python # -*- coding: utf-8 -*- import nltk.data -import os +import os.path as path class Parser: def __init__(self): + self.basePath = '/'.join([ + path.dirname(path.abspath(__file__)), + 'trainer', + '' + ]) self.ideal = 20.0 self.stopWords = self.getStopWords() @@ -58,7 +63,8 @@ def getTitleScore(self, title, sentence): return len(matchedWords) / (len(title) * 1.0) def splitSentences(self, text): - tokenizer = nltk.data.load('file:' + os.path.dirname(os.path.abspath(__file__)).decode('utf-8') + '/trainer/english.pickle') + path = self.basePath + 'english.pickle' + tokenizer = nltk.data.load('file:' + path) return tokenizer.tokenize(text) @@ -72,7 +78,8 @@ def removeStopWords(self, words): return [word for word in words if word not in self.stopWords] def getStopWords(self): - with open(os.path.dirname(os.path.abspath(__file__)) + '/trainer/stopWords.txt') as file: + path = self.basePath + 'stopWords.txt' + with open(path) as file: words = file.readlines() return [word.replace('\n', '') for word in words] diff --git a/textteaser/summarizer.py b/textteaser/summarizer.py index 22adee5..65a4c0b 100644 --- a/textteaser/summarizer.py +++ b/textteaser/summarizer.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -from parser import Parser +from .parser import Parser class Summarizer: