Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
sentences = tt.summarize(title, text)

for sentence in sentences:
print sentence
print(sentence)
3 changes: 1 addition & 2 deletions textteaser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# !/usr/bin/python
# -*- coding: utf-8 -*-

from summarizer import Summarizer
from .summarizer import Summarizer


class TextTeaser(object):
Expand Down
10 changes: 5 additions & 5 deletions textteaser/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from summarizer import Summarizer
from .summarizer import Summarizer


def getInput():
Expand All @@ -25,9 +25,9 @@ def getInput():
result = summarizer.sortScore(result)
result = summarizer.sortSentences(result[:30])

print 'Summary:'
print('Summary:')

for r in result:
print r['sentence']
# print r['totalScore']
# print r['order']
print(r['sentence'])
# print(r['totalScore'])
# print(r['order'])
13 changes: 10 additions & 3 deletions textteaser/parser.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
# !/usr/bin/python
# -*- coding: utf-8 -*-
import nltk.data
import os
import os.path as path


class Parser:
def __init__(self):
self.basePath = '/'.join([
path.dirname(path.abspath(__file__)),
'trainer',
''
])
self.ideal = 20.0
self.stopWords = self.getStopWords()

Expand Down Expand Up @@ -58,7 +63,8 @@ def getTitleScore(self, title, sentence):
return len(matchedWords) / (len(title) * 1.0)

def splitSentences(self, text):
tokenizer = nltk.data.load('file:' + os.path.dirname(os.path.abspath(__file__)).decode('utf-8') + '/trainer/english.pickle')
path = self.basePath + 'english.pickle'
tokenizer = nltk.data.load('file:' + path)

return tokenizer.tokenize(text)

Expand All @@ -72,7 +78,8 @@ def removeStopWords(self, words):
return [word for word in words if word not in self.stopWords]

def getStopWords(self):
with open(os.path.dirname(os.path.abspath(__file__)) + '/trainer/stopWords.txt') as file:
path = self.basePath + 'stopWords.txt'
with open(path) as file:
words = file.readlines()

return [word.replace('\n', '') for word in words]
2 changes: 1 addition & 1 deletion textteaser/summarizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from parser import Parser
from .parser import Parser


class Summarizer:
Expand Down