Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,12 @@ pylint = "*"
autopep8 = "*"

[packages]
httplib2 = "*"
dateutils = "*"
blessings = "*"
html2text = "*"
peewee = "*"
authenticator = "*"
oauth2client = "*"
jupyter = "*"
notebook = "*"
matplotlib = "*"
scikit-learn = "*"
mpld3 = "*"
google-api-python-client = "*"
emailanalysis = {editable = true,path = "."}

[requires]
python_version = "3.7"
20 changes: 8 additions & 12 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Email.py → emailanalysis/Email.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import re
from peewee import *

from Utils import logger
from SenderMetadata import SenderMetadata
from emailanalysis.utils import logger
from emailanalysis.SenderMetadata import SenderMetadata

db = SqliteDatabase('emails.db')

Expand Down
3 changes: 1 addition & 2 deletions SenderMetadata.py → emailanalysis/SenderMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from blessings import Terminal
from peewee import *

from Utils import logger
from Utils import get_answer
from emailanalysis.utils import logger, get_answer

t = Terminal()
db = SqliteDatabase('emails.db')
Expand Down
1 change: 1 addition & 0 deletions __init__.py → emailanalysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)
4 changes: 2 additions & 2 deletions analyzer.py → emailanalysis/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from sklearn.cross_validation import cross_val_score

from peewee import *
from Email import Email
from SenderMetadata import SenderMetadata
from emailanalysis.Email import Email
from emailanalysis.SenderMetadata import SenderMetadata

t = Terminal()
rootdir = os.path.realpath(os.path.dirname(__file__))
Expand Down
File renamed without changes.
File renamed without changes.
58 changes: 35 additions & 23 deletions downloader.py → emailanalysis/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,36 @@
import re
import email
import sys
import logging

from pprint import pprint
from collections import Counter
from dateutil.parser import parse
from datetime import datetime
from blessings import Terminal

from Utils import logger
from Utils import html_to_text
from Email import Email
from SenderMetadata import SenderMetadata
from emailanalysis.utils import logger, html_to_text
from emailanalysis.Email import Email
from emailanalysis.SenderMetadata import SenderMetadata

from peewee import *

import authenticator
from emailanalysis.authenticator import authenticate_gmail_service

gmail_service = authenticator.authenticate_gmail_service()
gmail_service = authenticate_gmail_service()
t = Terminal()

logfile_path = 'download.log'

# Delete old logfile
if os.path.exists(logfile_path):
os.remove(logfile_path)
print(f"Deleted logfile '{logfile_path}'")

# Setup logger to new file
file_handler = logging.FileHandler(logfile_path)
file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler)

def list_message_ids():
"""
Expand Down Expand Up @@ -90,16 +102,14 @@ def get_text(email_object):
content_type = msg.get_content_type()
payload = msg.get_payload()

# print "blah"
print(content_type)

if msg.is_multipart() and content_type == 'multipart/mixed' or content_type == 'multipart/related':
if msg.is_multipart() and (content_type == 'multipart/mixed' or content_type == 'multipart/related'):
text = ""
for part in payload:
text += get_text(part)
text += get_text(part) + '\n' # Combine the text of each part separated by '\n'
return text
elif msg.is_multipart() and content_type == 'multipart/alternative':
content_types = [x.get_content_type() for x in payload]
logger.debug(f"Detected {content_type} containing: {sorted(content_types)}")
if sorted(content_types) == ['text/html']:
html = payload[0]
return parse_singlepart_text_message(html)
Expand Down Expand Up @@ -137,26 +147,26 @@ def parse_message(gmail_message):

message_id = gmail_message.get('id')
if not message_id:
print(t.red("No message_id"))
logger.warn(t.red("No message_id"))
message_labels = gmail_message.get('labelIds')
if not message_labels:
print(t.red("No message_labels"))
logger.warn(t.red("No message_labels"))
message_to = email_object['To']
if not message_to:
print(t.red("No message_to"))
logger.warn(t.red("No message_to"))
message_from = email_object['From']
if not message_from:
print(t.red("No message_from"))
logger.warn(t.red("No message_from"))
message_subject = email_object['Subject']
if not message_subject:
print(t.red("No message_subject"))
logger.warn(t.red("No message_subject"))
message_date = parse(email_object['date'])
if not message_date:
print(t.red("No message_date"))
logger.warn(t.red("No message_date"))

text = get_text(email_object)
if not text:
print(t.red("No text"))
logger.warn(t.red("No text"))

return {
'message_id': message_id,
Expand Down Expand Up @@ -189,6 +199,8 @@ def download_email(message_id):


def download_all_to_database():
logger.info(f"Starting download at {datetime.now()}")

# Delete 'emails.db' sqlite database
if os.path.exists('emails.db'):
os.remove('emails.db')
Expand All @@ -206,10 +218,10 @@ def download_all_to_database():
try:
download_email(message_id)
except Exception as e:
print(t.red("Error downloading message: %s" % message_id))
print(t.red(str(e)))
logger.error(t.red("Error downloading message: %s" % message_id))
logger.error(t.red(str(e)))
raise
print("")
logger.info("")


if __name__ == '__main__':
Expand All @@ -219,6 +231,6 @@ def download_all_to_database():
# try:
# download_email(message_id)
# except Exception, e:
# print(t.red("FOUND ERROR ! %s" % message_id))
# print(t.red( "Unexpected error: %s" % e ))
# logger.error(t.red("FOUND ERROR ! %s" % message_id))
# logger.error(t.red( "Unexpected error: %s" % e ))
# raise
File renamed without changes.
1 change: 0 additions & 1 deletion Utils.py → emailanalysis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)


Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from setuptools import setup

setup(name='emailanalysis',
version='0.1',
description='Analyze email d',
url='https://github.com/dmil/EmailAnalysis',
author='Dhrumil Mehta',
author_email='dhrumil.mehta@gmail.com',
packages=['.']
)
packages=['emailanalysis'],
install_requires=['httplib2','dateutils','blessings','html2text',
'peewee','oauth2client', 'google-api-python-client']
)
1 change: 1 addition & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from emailanalysis import Email