diff --git a/Pipfile b/Pipfile index b1721fa..903a10e 100644 --- a/Pipfile +++ b/Pipfile @@ -8,19 +8,12 @@ pylint = "*" autopep8 = "*" [packages] -httplib2 = "*" -dateutils = "*" -blessings = "*" -html2text = "*" -peewee = "*" -authenticator = "*" -oauth2client = "*" jupyter = "*" notebook = "*" matplotlib = "*" scikit-learn = "*" mpld3 = "*" -google-api-python-client = "*" +emailanalysis = {editable = true,path = "."} [requires] python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock index 4bcb4da..c38fbfe 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "09eb02067814ce634746073b43d6a8389ee7a323d94713a5daafc4748aac1143" + "sha256": "4fbb7cb056811a24fb65f9c1791e8915bb7121caafc31d573ff59a4f26b030d1" }, "pipfile-spec": 6, "requires": { @@ -50,7 +50,6 @@ "sha256:30b7a84a6983fd9f4b7f91df835ae853e901d301a33a38958f69d9da3c0eba33", "sha256:9157175b6e104ee861df9c577d4f00953c37ead3f5ae145746ce4e470100aa3f" ], - "index": "pypi", "version": "==1.1.3" }, "backcall": { @@ -73,7 +72,6 @@ "sha256:b1fdd7e7a675295630f9ae71527a8ebc10bfefa236b3d6aa4932ee4462c17ba3", "sha256:caad5211e7ba5afe04367cdd4cfc68fa886e2e08f6f35e76b7387d2109ccea6e" ], - "index": "pypi", "version": "==1.7" }, "cachetools": { @@ -151,7 +149,6 @@ "hashes": [ "sha256:c94a8e77d743abac79ed91f99f5ef594a972a527e05145cbb7aba59beced8a71" ], - "index": "pypi", "version": "==0.6.6" }, "decorator": { @@ -168,6 +165,10 @@ ], "version": "==0.6.0" }, + "emailanalysis": { + "editable": true, + "path": "." + }, "entrypoints": { "hashes": [ "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19", @@ -180,7 +181,6 @@ "sha256:06907006ed5ce831018f03af3852d739c0b2489cdacfda6971bcc2075c762858", "sha256:937eabdc3940977f712fa648a096a5142766b6d0a0f58bc603e2ac0687397ef0" ], - "index": "pypi", "version": "==1.7.8" }, "google-auth": { @@ -202,7 +202,6 @@ "sha256:490db40fe5b2cd79c461cf56be4d39eb8ca68191ae41ba3ba79f6cb05b7dd662", "sha256:627514fb30e7566b37be6900df26c2c78a030cc9e6211bda604d8181233bcdd4" ], - "index": "pypi", "version": "==2018.1.9" }, "httplib2": { @@ -210,7 +209,6 @@ "sha256:23914b5487dfe8ef09db6656d6d63afb0cf3054ad9ebc50868ddc8e166b5f8e8", "sha256:a18121c7c72a56689efbf1aef990139ad940fee1e64c6f2458831736cd593600" ], - "index": "pypi", "version": "==0.12.3" }, "ipykernel": { @@ -455,7 +453,6 @@ "sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac", "sha256:d486741e451287f69568a4d26d70d9acd73a2bbfa275746c535b4209891cccc6" ], - "index": "pypi", "version": "==4.1.3" }, "pandocfilters": { @@ -475,7 +472,6 @@ "hashes": [ "sha256:f0249be468e3b119a8ad83f686e7fe161303197e0534e3cdff8fa5a5417c01a5" ], - "index": "pypi", "version": "==3.9.5" }, "pexpect": { @@ -537,10 +533,10 @@ }, "pygments": { "hashes": [ - "sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a", - "sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d" + "sha256:31cba6ffb739f099a85e243eff8cb717089fdd3c7300767d9fc34cb8e1b065f5", + "sha256:5ad302949b3c98dd73f8d9fcdc7e9cb592f120e32a18e23efd7f3dc51194472b" ], - "version": "==2.3.1" + "version": "==2.4.0" }, "pyparsing": { "hashes": [ diff --git a/Email.py b/emailanalysis/Email.py similarity index 95% rename from Email.py rename to emailanalysis/Email.py index 4d7f6c9..1722879 100644 --- a/Email.py +++ b/emailanalysis/Email.py @@ -1,8 +1,8 @@ import re from peewee import * -from Utils import logger -from SenderMetadata import SenderMetadata +from emailanalysis.utils import logger +from emailanalysis.SenderMetadata import SenderMetadata db = SqliteDatabase('emails.db') diff --git a/SenderMetadata.py b/emailanalysis/SenderMetadata.py similarity index 97% rename from SenderMetadata.py rename to emailanalysis/SenderMetadata.py index 6c48259..1e0269e 100644 --- a/SenderMetadata.py +++ b/emailanalysis/SenderMetadata.py @@ -2,8 +2,7 @@ from blessings import Terminal from peewee import * -from Utils import logger -from Utils import get_answer +from emailanalysis.utils import logger, get_answer t = Terminal() db = SqliteDatabase('emails.db') diff --git a/__init__.py b/emailanalysis/__init__.py similarity index 88% rename from __init__.py rename to emailanalysis/__init__.py index 34ac835..9519fcb 100644 --- a/__init__.py +++ b/emailanalysis/__init__.py @@ -1,3 +1,4 @@ +import logging logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) diff --git a/analyzer.py b/emailanalysis/analyzer.py similarity index 95% rename from analyzer.py rename to emailanalysis/analyzer.py index ae94ebf..0aed84e 100644 --- a/analyzer.py +++ b/emailanalysis/analyzer.py @@ -24,8 +24,8 @@ from sklearn.cross_validation import cross_val_score from peewee import * -from Email import Email -from SenderMetadata import SenderMetadata +from emailanalysis.Email import Email +from emailanalysis.SenderMetadata import SenderMetadata t = Terminal() rootdir = os.path.realpath(os.path.dirname(__file__)) diff --git a/authenticator.py b/emailanalysis/authenticator.py similarity index 100% rename from authenticator.py rename to emailanalysis/authenticator.py diff --git a/click_info.py b/emailanalysis/click_info.py similarity index 100% rename from click_info.py rename to emailanalysis/click_info.py diff --git a/downloader.py b/emailanalysis/downloader.py similarity index 79% rename from downloader.py rename to emailanalysis/downloader.py index ca9ce40..77855ee 100644 --- a/downloader.py +++ b/emailanalysis/downloader.py @@ -11,24 +11,36 @@ import re import email import sys +import logging from pprint import pprint from collections import Counter from dateutil.parser import parse +from datetime import datetime from blessings import Terminal -from Utils import logger -from Utils import html_to_text -from Email import Email -from SenderMetadata import SenderMetadata +from emailanalysis.utils import logger, html_to_text +from emailanalysis.Email import Email +from emailanalysis.SenderMetadata import SenderMetadata from peewee import * -import authenticator +from emailanalysis.authenticator import authenticate_gmail_service -gmail_service = authenticator.authenticate_gmail_service() +gmail_service = authenticate_gmail_service() t = Terminal() +logfile_path = 'download.log' + +# Delete old logfile +if os.path.exists(logfile_path): + os.remove(logfile_path) + print(f"Deleted logfile '{logfile_path}'") + +# Setup logger to new file +file_handler = logging.FileHandler(logfile_path) +file_handler.setLevel(logging.DEBUG) +logger.addHandler(file_handler) def list_message_ids(): """ @@ -90,16 +102,14 @@ def get_text(email_object): content_type = msg.get_content_type() payload = msg.get_payload() - # print "blah" - print(content_type) - - if msg.is_multipart() and content_type == 'multipart/mixed' or content_type == 'multipart/related': + if msg.is_multipart() and (content_type == 'multipart/mixed' or content_type == 'multipart/related'): text = "" for part in payload: - text += get_text(part) + text += get_text(part) + '\n' # Combine the text of each part separated by '\n' return text elif msg.is_multipart() and content_type == 'multipart/alternative': content_types = [x.get_content_type() for x in payload] + logger.debug(f"Detected {content_type} containing: {sorted(content_types)}") if sorted(content_types) == ['text/html']: html = payload[0] return parse_singlepart_text_message(html) @@ -137,26 +147,26 @@ def parse_message(gmail_message): message_id = gmail_message.get('id') if not message_id: - print(t.red("No message_id")) + logger.warn(t.red("No message_id")) message_labels = gmail_message.get('labelIds') if not message_labels: - print(t.red("No message_labels")) + logger.warn(t.red("No message_labels")) message_to = email_object['To'] if not message_to: - print(t.red("No message_to")) + logger.warn(t.red("No message_to")) message_from = email_object['From'] if not message_from: - print(t.red("No message_from")) + logger.warn(t.red("No message_from")) message_subject = email_object['Subject'] if not message_subject: - print(t.red("No message_subject")) + logger.warn(t.red("No message_subject")) message_date = parse(email_object['date']) if not message_date: - print(t.red("No message_date")) + logger.warn(t.red("No message_date")) text = get_text(email_object) if not text: - print(t.red("No text")) + logger.warn(t.red("No text")) return { 'message_id': message_id, @@ -189,6 +199,8 @@ def download_email(message_id): def download_all_to_database(): + logger.info(f"Starting download at {datetime.now()}") + # Delete 'emails.db' sqlite database if os.path.exists('emails.db'): os.remove('emails.db') @@ -206,10 +218,10 @@ def download_all_to_database(): try: download_email(message_id) except Exception as e: - print(t.red("Error downloading message: %s" % message_id)) - print(t.red(str(e))) + logger.error(t.red("Error downloading message: %s" % message_id)) + logger.error(t.red(str(e))) raise - print("") + logger.info("") if __name__ == '__main__': @@ -219,6 +231,6 @@ def download_all_to_database(): # try: # download_email(message_id) # except Exception, e: - # print(t.red("FOUND ERROR ! %s" % message_id)) - # print(t.red( "Unexpected error: %s" % e )) + # logger.error(t.red("FOUND ERROR ! %s" % message_id)) + # logger.error(t.red( "Unexpected error: %s" % e )) # raise diff --git a/examples.py b/emailanalysis/examples.py similarity index 100% rename from examples.py rename to emailanalysis/examples.py diff --git a/Utils.py b/emailanalysis/utils.py similarity index 93% rename from Utils.py rename to emailanalysis/utils.py index 5113b23..80d5598 100644 --- a/Utils.py +++ b/emailanalysis/utils.py @@ -3,7 +3,6 @@ import logging logger = logging.getLogger(__name__) -logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) diff --git a/setup.py b/setup.py index 516f35a..a12d7bc 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,11 @@ from setuptools import setup setup(name='emailanalysis', - version='0.1', description='Analyze email d', url='https://github.com/dmil/EmailAnalysis', author='Dhrumil Mehta', author_email='dhrumil.mehta@gmail.com', - packages=['.'] - ) \ No newline at end of file + packages=['emailanalysis'], + install_requires=['httplib2','dateutils','blessings','html2text', + 'peewee','oauth2client', 'google-api-python-client'] + ) diff --git a/test.py b/test.py new file mode 100644 index 0000000..d26c801 --- /dev/null +++ b/test.py @@ -0,0 +1 @@ +from emailanalysis import Email \ No newline at end of file