From 232861a19e6d6054458078b3c5736e4a04312e60 Mon Sep 17 00:00:00 2001 From: Brian Glod Date: Wed, 21 Feb 2018 13:33:59 -0500 Subject: [PATCH 1/6] Cleanup whitespace - Tabs to spaces - Remove all trailing whitespace --- examples/paycheckProcess.py | 205 +++++++++++++------------- paycheckrecords/__init__.py | 1 - paycheckrecords/paycheckrecords.py | 226 ++++++++++++++--------------- paycheckrecords/paystub.py | 26 ++-- 4 files changed, 228 insertions(+), 230 deletions(-) diff --git a/examples/paycheckProcess.py b/examples/paycheckProcess.py index e8e1aa2..088221e 100755 --- a/examples/paycheckProcess.py +++ b/examples/paycheckProcess.py @@ -1,4 +1,3 @@ - from datetime import date, timedelta from dateutil.relativedelta import relativedelta from bs4 import BeautifulSoup @@ -10,109 +9,109 @@ from paycheckrecords import * def checkRowForAll(row): - for col in row.findAll('td'): - if "Federal Income Tax" in str(col): - return True - if "Social Security" in str(col): - return True - if "Medicare" in str(col): - return True - if "NY Income Tax" in str(col): - return True - if "Cell Phone" in str(col): - return True - if "Deductions" in str(col): - return True - if "Taxes" in str(col): - return True - - return False - + for col in row.findAll('td'): + if "Federal Income Tax" in str(col): + return True + if "Social Security" in str(col): + return True + if "Medicare" in str(col): + return True + if "NY Income Tax" in str(col): + return True + if "Cell Phone" in str(col): + return True + if "Deductions" in str(col): + return True + if "Taxes" in str(col): + return True + + return False + def blackOut(html): - soup = BeautifulSoup(html) - - #blackout net pay - tmp = soup.findAll('u') - for tag in tmp: - if "Net Pay" in str(tag.parent): - tag["style"] = "background-color:black; -webkit-print-color-adjust: exact;" - tableList = ["paystub_pay_tbl", "paystub_ee_taxes_tbl", "paystub_summary_tbl"] - - #black out all - for curTable in tableList: - tmpTable = soup.find("table", {"id": curTable}) - allrows = tmpTable.findAll('tr') - for row in allrows: - if checkRowForAll(row): - for col in row.findAll('td'): - if '.' in str(col): - col["style"] = "background-color:black; -webkit-print-color-adjust: exact;" - - - - #black out netthispay - elem = soup.find(text=re.compile('.*Net This Check:.*')) - elem = elem.findNext('td') - elem["style"] = "background-color:black; -webkit-print-color-adjust: exact;" - - #black out account - elem = soup.find(text=re.compile('.*Acct#.*')) - - nelem = elem.findNext('td') - nelem["style"] = "background-color:black; -webkit-print-color-adjust: exact;" - - contents = elem.string - contentsList = contents.split("#") - newcontent = contentsList[0] + "#" - contentsList = contentsList[1].split(":") - newcontent = newcontent + contentsList[0] + ":" + contentsList[1] - elem.replaceWith(newcontent) - - return str(soup.prettify(formatter=None)) + soup = BeautifulSoup(html) + + #blackout net pay + tmp = soup.findAll('u') + for tag in tmp: + if "Net Pay" in str(tag.parent): + tag["style"] = "background-color:black; -webkit-print-color-adjust: exact;" + tableList = ["paystub_pay_tbl", "paystub_ee_taxes_tbl", "paystub_summary_tbl"] + + #black out all + for curTable in tableList: + tmpTable = soup.find("table", {"id": curTable}) + allrows = tmpTable.findAll('tr') + for row in allrows: + if checkRowForAll(row): + for col in row.findAll('td'): + if '.' in str(col): + col["style"] = "background-color:black; -webkit-print-color-adjust: exact;" + + + + #black out netthispay + elem = soup.find(text=re.compile('.*Net This Check:.*')) + elem = elem.findNext('td') + elem["style"] = "background-color:black; -webkit-print-color-adjust: exact;" + + #black out account + elem = soup.find(text=re.compile('.*Acct#.*')) + + nelem = elem.findNext('td') + nelem["style"] = "background-color:black; -webkit-print-color-adjust: exact;" + + contents = elem.string + contentsList = contents.split("#") + newcontent = contentsList[0] + "#" + contentsList = contentsList[1].split(":") + newcontent = newcontent + contentsList[0] + ":" + contentsList[1] + elem.replaceWith(newcontent) + + return str(soup.prettify(formatter=None)) def main(): - - _day = int(input("Day:")) - username = raw_input("Username:") - password = getpass("Password:") - - paycheckinst = paycheckrecords(username, password) - try: - - now = date.today() - - if now.day > _day: - startdate = now.replace(day=_day+1) - enddate = startdate + timedelta(days=32) - enddate = enddate.replace(day = _day) - - else: - - - enddate = now.replace(day=_day) - tmpdate = now.replace(day=1) - timedelta(days=1) - startdate = tmpdate.replace(day=_day+1) - - - - ret = paycheckinst.getPayStubsInRange(startdate, enddate) - gross = 0.0 - for stub in ret: - print "Date: ", stub.PayDate - print "Total Pay: ", stub.TotalPay - print "Net Pay: ", stub.NetPay - print "" - gross = gross + stub.TotalPay - filename = "paystub " + stub.PayDate.strftime("%m-%d-%Y") - out = open(filename + ".html", "w") - out.write(stub.HTML) - out.close() - - out = open(filename + "(blacked out).html", "w") - out.write(blackOut(stub.HTML)) - out.close() - print "Gross: " + str(gross) - finally: - paycheckinst.close() - + + _day = int(input("Day:")) + username = raw_input("Username:") + password = getpass("Password:") + + paycheckinst = paycheckrecords(username, password) + try: + + now = date.today() + + if now.day > _day: + startdate = now.replace(day=_day+1) + enddate = startdate + timedelta(days=32) + enddate = enddate.replace(day = _day) + + else: + + + enddate = now.replace(day=_day) + tmpdate = now.replace(day=1) - timedelta(days=1) + startdate = tmpdate.replace(day=_day+1) + + + + ret = paycheckinst.getPayStubsInRange(startdate, enddate) + gross = 0.0 + for stub in ret: + print "Date: ", stub.PayDate + print "Total Pay: ", stub.TotalPay + print "Net Pay: ", stub.NetPay + print "" + gross = gross + stub.TotalPay + filename = "paystub " + stub.PayDate.strftime("%m-%d-%Y") + out = open(filename + ".html", "w") + out.write(stub.HTML) + out.close() + + out = open(filename + "(blacked out).html", "w") + out.write(blackOut(stub.HTML)) + out.close() + print "Gross: " + str(gross) + finally: + paycheckinst.close() + main() diff --git a/paycheckrecords/__init__.py b/paycheckrecords/__init__.py index 7884ee8..287f67e 100644 --- a/paycheckrecords/__init__.py +++ b/paycheckrecords/__init__.py @@ -1,3 +1,2 @@ import paystub from paycheckrecords import * - diff --git a/paycheckrecords/paycheckrecords.py b/paycheckrecords/paycheckrecords.py index 4722ec6..18495c9 100755 --- a/paycheckrecords/paycheckrecords.py +++ b/paycheckrecords/paycheckrecords.py @@ -8,116 +8,116 @@ class paycheckrecords: - _br = mechanize.Browser() - _browserSem = threading.Semaphore() - _thread = None - _stop = False - _timer = None - _threadSleep = threading.Event() - - def __init__(self, username, password): - self._br.set_handle_robots(False) - self._br.open("https://www.paycheckrecords.com") - self._br.select_form(name="Login_Form") - - self._br.form["userStrId"] = username - self._br.form["password"] = password - - self._br.submit() - - self._thread = threading.Thread(target=self.preventTimeOut) - self._thread.start() - - def preventTimeOut(self): - while not self._stop: - self._browserSem.acquire() -# print "aquired lock" - url = self._br.geturl() - #print "url = ", url - self._br.open(url) -# print "refreshed" - self._browserSem.release() -# print "reload page from thread" - self._threadSleep.wait(30) -# print "awake" - self._threadSleep.clear() - - - - def getLatestPayStub(self): - self._browserSem.acquire() - originalurl = self._br.geturl() - paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp") - - ret = self._getPaystubsFromTable(paystubResponse.read(), range(1, 2)) - - self._br.open(originalurl) - self._browserSem.release() - return ret[0] - - def getPayStubsInRange(self, startDate, endDate, sequence = 0): - self._browserSem.acquire() - originalurl = self._br.geturl() - paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp") - self._br.select_form(name="dateSelect") - self._br.form["startDate"] = startDate.strftime("%m/%d/%Y") - self._br.form["endDate"] = endDate.strftime("%m/%d/%Y") - paystubResponse = self._br.submit() - ret = self._getPaystubsFromTable(paystubResponse.read(),sequence) - - self._br.open(originalurl) - self._browserSem.release() - return ret - - - - def _getPaystubsFromTable(self, html, sequence, GetHtml = True): - soup = BeautifulSoup(html) - PayStubTable = soup.find("table", { "class" : "report" }) - payrows = PayStubTable.findAll('tr') - headerCols = payrows[0].findAll('td') - ret = [] - i = 0 - DateIndex = -1 - NetIndex = -1 - TotalIndex = -1 - - for col in headerCols: - colName = col.string - if colName == u'Pay Date' and DateIndex == -1: - DateIndex = i - elif colName == u'Total Pay' and TotalIndex == -1: - TotalIndex = i - elif colName == u'Net Pay' and NetIndex == -1: - NetIndex = i - i = i + 1 - if sequence == 0: - sequence = range(1, len(payrows)) - for index in sequence: - paystubHtml = None - rowCols = payrows[index].findAll('td') - rowDate = rowCols[DateIndex].a.string.strip() - rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$")) - rowNetPay = float(rowCols[NetIndex].string.strip().strip("$")) - tmpDateTime = datetime.strptime(rowDate, '%m/%d/%Y') - if GetHtml: - paystubResponse = self._br.open(rowCols[DateIndex].a['href']) - paystubHtml = paystubResponse.read() - self._br.back() - tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, paystubHtml) - ret.append(tmpPayStub) - - return ret - - - - def close(self): - #print "Closing Instance" - self._stop = True - #print "_stop set" - self._threadSleep.set() - #print "_threadSleep set" - self._thread.join() - #print "thread joined" - self._br.close() - #print "Closing Done" \ No newline at end of file + _br = mechanize.Browser() + _browserSem = threading.Semaphore() + _thread = None + _stop = False + _timer = None + _threadSleep = threading.Event() + + def __init__(self, username, password): + self._br.set_handle_robots(False) + self._br.open("https://www.paycheckrecords.com") + self._br.select_form(name="Login_Form") + + self._br.form["userStrId"] = username + self._br.form["password"] = password + + self._br.submit() + + self._thread = threading.Thread(target=self.preventTimeOut) + self._thread.start() + + def preventTimeOut(self): + while not self._stop: + self._browserSem.acquire() +# print "aquired lock" + url = self._br.geturl() + #print "url = ", url + self._br.open(url) +# print "refreshed" + self._browserSem.release() +# print "reload page from thread" + self._threadSleep.wait(30) +# print "awake" + self._threadSleep.clear() + + + + def getLatestPayStub(self): + self._browserSem.acquire() + originalurl = self._br.geturl() + paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp") + + ret = self._getPaystubsFromTable(paystubResponse.read(), range(1, 2)) + + self._br.open(originalurl) + self._browserSem.release() + return ret[0] + + def getPayStubsInRange(self, startDate, endDate, sequence = 0): + self._browserSem.acquire() + originalurl = self._br.geturl() + paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp") + self._br.select_form(name="dateSelect") + self._br.form["startDate"] = startDate.strftime("%m/%d/%Y") + self._br.form["endDate"] = endDate.strftime("%m/%d/%Y") + paystubResponse = self._br.submit() + ret = self._getPaystubsFromTable(paystubResponse.read(),sequence) + + self._br.open(originalurl) + self._browserSem.release() + return ret + + + + def _getPaystubsFromTable(self, html, sequence, GetHtml = True): + soup = BeautifulSoup(html) + PayStubTable = soup.find("table", { "class" : "report" }) + payrows = PayStubTable.findAll('tr') + headerCols = payrows[0].findAll('td') + ret = [] + i = 0 + DateIndex = -1 + NetIndex = -1 + TotalIndex = -1 + + for col in headerCols: + colName = col.string + if colName == u'Pay Date' and DateIndex == -1: + DateIndex = i + elif colName == u'Total Pay' and TotalIndex == -1: + TotalIndex = i + elif colName == u'Net Pay' and NetIndex == -1: + NetIndex = i + i = i + 1 + if sequence == 0: + sequence = range(1, len(payrows)) + for index in sequence: + paystubHtml = None + rowCols = payrows[index].findAll('td') + rowDate = rowCols[DateIndex].a.string.strip() + rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$")) + rowNetPay = float(rowCols[NetIndex].string.strip().strip("$")) + tmpDateTime = datetime.strptime(rowDate, '%m/%d/%Y') + if GetHtml: + paystubResponse = self._br.open(rowCols[DateIndex].a['href']) + paystubHtml = paystubResponse.read() + self._br.back() + tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, paystubHtml) + ret.append(tmpPayStub) + + return ret + + + + def close(self): + #print "Closing Instance" + self._stop = True + #print "_stop set" + self._threadSleep.set() + #print "_threadSleep set" + self._thread.join() + #print "thread joined" + self._br.close() + #print "Closing Done" diff --git a/paycheckrecords/paystub.py b/paycheckrecords/paystub.py index de5ef7f..84f6112 100755 --- a/paycheckrecords/paystub.py +++ b/paycheckrecords/paystub.py @@ -1,15 +1,15 @@ import datetime class paystub: - def __init__(self, payDate, TotalPay, NetPay, html = None): - if type(payDate) is not datetime and type(payDate) is not datetime.datetime: - raise ValueError("payDate is not a datetime object") - - if type(TotalPay) is not float: - raise ValueError("TotalPay needs to be a float") - if type(NetPay) is not float: - raise ValueError("NetPay needs to be a float") - - self.PayDate = payDate - self.TotalPay = TotalPay - self.NetPay = NetPay - self.HTML = html + def __init__(self, payDate, TotalPay, NetPay, html = None): + if type(payDate) is not datetime and type(payDate) is not datetime.datetime: + raise ValueError("payDate is not a datetime object") + + if type(TotalPay) is not float: + raise ValueError("TotalPay needs to be a float") + if type(NetPay) is not float: + raise ValueError("NetPay needs to be a float") + + self.PayDate = payDate + self.TotalPay = TotalPay + self.NetPay = NetPay + self.HTML = html From 249c35b6b6a593d6f984189466daff816589766f Mon Sep 17 00:00:00 2001 From: Brian Glod Date: Wed, 21 Feb 2018 13:39:28 -0500 Subject: [PATCH 2/6] Remove comma(s) from dollar amounts --- paycheckrecords/paycheckrecords.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paycheckrecords/paycheckrecords.py b/paycheckrecords/paycheckrecords.py index 18495c9..0f4fdb0 100755 --- a/paycheckrecords/paycheckrecords.py +++ b/paycheckrecords/paycheckrecords.py @@ -97,8 +97,8 @@ def _getPaystubsFromTable(self, html, sequence, GetHtml = True): paystubHtml = None rowCols = payrows[index].findAll('td') rowDate = rowCols[DateIndex].a.string.strip() - rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$")) - rowNetPay = float(rowCols[NetIndex].string.strip().strip("$")) + rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$").translate(dict.fromkeys(map(ord,','),None))) + rowNetPay = float(rowCols[NetIndex].string.strip().strip("$").translate(dict.fromkeys(map(ord,','),None))) tmpDateTime = datetime.strptime(rowDate, '%m/%d/%Y') if GetHtml: paystubResponse = self._br.open(rowCols[DateIndex].a['href']) From af10ef14ab591a7d9f52eb8375ddd48fad596d3d Mon Sep 17 00:00:00 2001 From: Brian Glod Date: Mon, 26 Feb 2018 19:31:45 -0500 Subject: [PATCH 3/6] Update .gitignore - Removed irrelevant ignores - Added *.html --- .gitignore | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index ded6067..9d4be04 100644 --- a/.gitignore +++ b/.gitignore @@ -1,36 +1,3 @@ *.py[cod] - -# C extensions -*.so - -# Packages -*.egg -*.egg-info -dist -build -eggs -parts -bin -var -sdist -develop-eggs -.installed.cfg -lib -lib64 __pycache__ - -# Installer logs -pip-log.txt - -# Unit test / coverage reports -.coverage -.tox -nosetests.xml - -# Translations -*.mo - -# Mr Developer -.mr.developer.cfg -.project -.pydevproject +*.html From d5c7034d8f8499a029b57518d2ab7332d4ab438e Mon Sep 17 00:00:00 2001 From: Brian Glod Date: Mon, 26 Feb 2018 19:36:36 -0500 Subject: [PATCH 4/6] Add detailed summary and example cleanup - Example now has meaningful prompts - Simple summary is more visually pleasing - Added ability to print out a sum total of each line item on on all pay stubs in the given period. Useful for W-2 and YTD verification. --- examples/paycheckProcess.py | 166 +++++++++++++++++++++++------ paycheckrecords/paycheckrecords.py | 36 ++++++- paycheckrecords/paystub.py | 3 +- 3 files changed, 169 insertions(+), 36 deletions(-) diff --git a/examples/paycheckProcess.py b/examples/paycheckProcess.py index 088221e..1ef21fb 100755 --- a/examples/paycheckProcess.py +++ b/examples/paycheckProcess.py @@ -1,9 +1,11 @@ +#!/usr/bin/env python2 + from datetime import date, timedelta from dateutil.relativedelta import relativedelta from bs4 import BeautifulSoup import re from getpass import getpass - +import os import sys sys.path.append("../") from paycheckrecords import * @@ -28,7 +30,7 @@ def checkRowForAll(row): return False def blackOut(html): - soup = BeautifulSoup(html) + soup = BeautifulSoup(html, "lxml") #blackout net pay tmp = soup.findAll('u') @@ -69,48 +71,148 @@ def blackOut(html): return str(soup.prettify(formatter=None)) +def printSimpleSummary( stubs ): + gross = 0.0 + totalnet = 0.0 + + print "" + print "QUICK SUMMARY:" + print "" + + print "----------------------------------------------" + print '{: <20} {: >12} {: >12}'.format( "Date", + "Total Pay", + "Net Pay" ) + print "----------------------------------------------" + for stub in stubs: + print '{: <20} {: >12} {: >12}'.format( stub.PayDate.strftime("%Y-%m-%d"), + stub.TotalPay, + stub.NetPay ) + gross = gross + stub.TotalPay + totalnet = totalnet + stub.NetPay + + print "----------------------------------------------" + print '{: <20} {: >12} {: >12}'.format( "", + str(gross), + str(totalnet) ) + print "" + +def printDetailedSummary( stubs ): + summary = {} + for stub in stubs: + for f in stub.StubDetails: + if f['name'] in summary: + summary[f['name']]['hours'] += f['hours'] + summary[f['name']]['rate'] += f['rate'] + summary[f['name']]['current'] += f['current'] + else: + summary[f['name']] = { 'hours' : f['hours'], + 'rate' : f['rate'], + 'current' : f['current'] } + + print "" + print "DETAILED TOTALS:" + print "" + + print "-----------------------------------------------------------" + print '{: <20} {: >12} {: >12} {: >12}'.format( "Field", + "Total Hours", + "Total Rate", + "Total" ) + print "-----------------------------------------------------------" + for s in summary: + print '{: <20} {: >12.2f} {: >12.2f} {: >12.2f}'.format( s, + summary[s]['hours'], + summary[s]['rate'], + summary[s]['current'] ) + print "" + + +def savePayStubs( stubs, redact=False ): + for stub in stubs: + filename = "paystub-" + stub.PayDate.strftime("%Y-%m-%d") + + if os.path.isfile(filename + ".html"): + i = 1 + while os.path.isfile(filename + "_" + str(i) + ".html"): + i += 1 + if i == 100: + print "There seem to be a lot of duplicate files? Aborting." + return -1 + filename += '_' + str(i) + + out = open(filename + ".html", "w") + out.write(stub.HTML) + out.close() + + if redact: + out = open(filename + "_redacted.html", "w") + out.write(blackOut(stub.HTML)) + out.close() + + def main(): - _day = int(input("Day:")) - username = raw_input("Username:") - password = getpass("Password:") + print "" + print "Print a summary of all pay stubs between the given dates." + print "Optionally save off the pay stubs and redacted pay stubs." + print "" - paycheckinst = paycheckrecords(username, password) try: + startdate = datetime.strptime(raw_input("Start date (MM/DD/YYYY): "), '%m/%d/%Y') + enddate = datetime.strptime(raw_input("End date (MM/DD/YYYY): "), '%m/%d/%Y') + except ValueError: + raise ValueError("Invalid date format.") + + + savestubs = raw_input("Save pay stubs? [Y/n] ") + if( savestubs.lower() == 'y' ): + savestubs = True + elif( savestubs.lower() == 'n' ): + savestubs = False + else: + print "Invalid response. Aborting." + return -1 + + if savestubs: + saveredacted = raw_input("Save redacted pay stubs? [Y/n] ") + if( saveredacted.lower() == 'y' ): + # Deleting the sensitive information is an exercise for the reader ... + print " WARNING: redacted pay stubs are intended to be printed. Although" + print " it is blacked out, the sensitive information is still" + print " present in the document." + saveredacted = raw_input(" Do you acknowledge and accept the above warning? [Y/n] ") + if( saveredacted.lower() == 'y' ): + saveredacted = True + elif( saveredacted.lower() == 'n' ): + saveredacted = False + else: + print "Invalid response. Aborting." + return -1 + elif( saveredacted.lower() == 'n' ): + saveredacted = False + else: + print "Invalid response. Aborting." + return -1 - now = date.today() - - if now.day > _day: - startdate = now.replace(day=_day+1) - enddate = startdate + timedelta(days=32) - enddate = enddate.replace(day = _day) + print "PaycheckRecords.com Credentials:" - else: + username = raw_input(" Username: ") + password = getpass(" Password: ") + print "" - enddate = now.replace(day=_day) - tmpdate = now.replace(day=1) - timedelta(days=1) - startdate = tmpdate.replace(day=_day+1) + paycheckinst = paycheckrecords(username, password) + try: + stubs = paycheckinst.getPayStubsInRange(startdate, enddate) + printSimpleSummary( stubs ) + printDetailedSummary( stubs ) - ret = paycheckinst.getPayStubsInRange(startdate, enddate) - gross = 0.0 - for stub in ret: - print "Date: ", stub.PayDate - print "Total Pay: ", stub.TotalPay - print "Net Pay: ", stub.NetPay - print "" - gross = gross + stub.TotalPay - filename = "paystub " + stub.PayDate.strftime("%m-%d-%Y") - out = open(filename + ".html", "w") - out.write(stub.HTML) - out.close() + if savestubs: + savePayStubs( stubs, saveredacted ) - out = open(filename + "(blacked out).html", "w") - out.write(blackOut(stub.HTML)) - out.close() - print "Gross: " + str(gross) finally: paycheckinst.close() diff --git a/paycheckrecords/paycheckrecords.py b/paycheckrecords/paycheckrecords.py index 0f4fdb0..e4e134a 100755 --- a/paycheckrecords/paycheckrecords.py +++ b/paycheckrecords/paycheckrecords.py @@ -69,10 +69,39 @@ def getPayStubsInRange(self, startDate, endDate, sequence = 0): self._browserSem.release() return ret - + def _getPayStubDetails(self, html): + soup = BeautifulSoup(html, "lxml") + details = soup.find_all("table", { "class" : [ "detailsWages", "detailsPart" ] }) + rv = [] + + # Paystub details seem to contain 4 elements, each consisting of one or more rows: + # [0] Pay (e.g. salary, bonus, ... ) + # [1] Deductions (e.g. 401k, healthcare, ... ) + # [2] Taxes (e.g. federal, state, SS, medicare, ... ) + # [3] Summary + for d in range( 0, len(details) ): + for r in details[d].find_all('tr')[1:]: + tds = r.find_all('td') + if( d == 0 ): # Pay field has extra elements: hours and rate + rv.append( { 'name' : tds[0].text.strip(), + 'hours' : float(tds[1].text.strip() or 0.0), + 'rate' : float(tds[2].text.strip() or 0.0), + 'current' : float(tds[3].text.strip()), + 'ytd' : float(tds[4].text.strip()) } ) + else: + rv.append( { 'name' : tds[0].text.strip(), + 'current' : float(tds[1].text.strip()), + 'ytd' : float(tds[2].text.strip()), + # Make post-processing easier + 'hours' : float(0.0), + 'rate' : float(0.0) } ) + + # List of dictionaries containing name/hours/rate/current/ytd + # information for each line-item of a paystub + return rv def _getPaystubsFromTable(self, html, sequence, GetHtml = True): - soup = BeautifulSoup(html) + soup = BeautifulSoup(html, "lxml") PayStubTable = soup.find("table", { "class" : "report" }) payrows = PayStubTable.findAll('tr') headerCols = payrows[0].findAll('td') @@ -103,8 +132,9 @@ def _getPaystubsFromTable(self, html, sequence, GetHtml = True): if GetHtml: paystubResponse = self._br.open(rowCols[DateIndex].a['href']) paystubHtml = paystubResponse.read() + stubDetails = self._getPayStubDetails(paystubHtml) self._br.back() - tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, paystubHtml) + tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, stubDetails, paystubHtml) ret.append(tmpPayStub) return ret diff --git a/paycheckrecords/paystub.py b/paycheckrecords/paystub.py index 84f6112..e1586b3 100755 --- a/paycheckrecords/paystub.py +++ b/paycheckrecords/paystub.py @@ -1,6 +1,6 @@ import datetime class paystub: - def __init__(self, payDate, TotalPay, NetPay, html = None): + def __init__(self, payDate, TotalPay, NetPay, stubDetails = None, html = None): if type(payDate) is not datetime and type(payDate) is not datetime.datetime: raise ValueError("payDate is not a datetime object") @@ -12,4 +12,5 @@ def __init__(self, payDate, TotalPay, NetPay, html = None): self.PayDate = payDate self.TotalPay = TotalPay self.NetPay = NetPay + self.StubDetails = stubDetails self.HTML = html From a972ca345dbff43171549206999cc014ed5aa799 Mon Sep 17 00:00:00 2001 From: Brian Glod Date: Tue, 27 Feb 2018 14:00:24 -0500 Subject: [PATCH 5/6] Better handling of invalid input --- examples/paycheckProcess.py | 73 ++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/examples/paycheckProcess.py b/examples/paycheckProcess.py index 1ef21fb..bfc694b 100755 --- a/examples/paycheckProcess.py +++ b/examples/paycheckProcess.py @@ -150,6 +150,23 @@ def savePayStubs( stubs, redact=False ): out.write(blackOut(stub.HTML)) out.close() +def yesno( x ): + while True: + resp = raw_input(x) + if( resp.lower() == 'y' ): + return True + elif( resp.lower() == 'n' ): + return False + else: + print " Invalid response." + +def get_date( x, fmt='%m/%d/%Y' ): + while True: + try: + resp = raw_input(x) or datetime.today().strftime(fmt) + return datetime.strptime(resp, fmt) + except ValueError: + print " Invalid date or date format provided." def main(): @@ -158,47 +175,35 @@ def main(): print "Optionally save off the pay stubs and redacted pay stubs." print "" - try: - startdate = datetime.strptime(raw_input("Start date (MM/DD/YYYY): "), '%m/%d/%Y') - enddate = datetime.strptime(raw_input("End date (MM/DD/YYYY): "), '%m/%d/%Y') - except ValueError: - raise ValueError("Invalid date format.") - - - savestubs = raw_input("Save pay stubs? [Y/n] ") - if( savestubs.lower() == 'y' ): - savestubs = True - elif( savestubs.lower() == 'n' ): - savestubs = False - else: - print "Invalid response. Aborting." - return -1 - - if savestubs: - saveredacted = raw_input("Save redacted pay stubs? [Y/n] ") - if( saveredacted.lower() == 'y' ): + while True: + startdate = get_date("Start date (MM/DD/YYYY): ", '%m/%d/%Y') + enddate = get_date("End date (MM/DD/YYYY): ", '%m/%d/%Y') + if( startdate <= enddate ): + break + else: + print " Invalid date range. Start date must be before or equal to end date." + + savestubs = yesno("Save pay stubs? [Y/n] ") + if( savestubs ): + saveredacted = yesno("Save redacted pay stubs? [Y/n] ") + if( saveredacted ): # Deleting the sensitive information is an exercise for the reader ... print " WARNING: redacted pay stubs are intended to be printed. Although" print " it is blacked out, the sensitive information is still" print " present in the document." - saveredacted = raw_input(" Do you acknowledge and accept the above warning? [Y/n] ") - if( saveredacted.lower() == 'y' ): - saveredacted = True - elif( saveredacted.lower() == 'n' ): - saveredacted = False - else: - print "Invalid response. Aborting." - return -1 - elif( saveredacted.lower() == 'n' ): - saveredacted = False - else: - print "Invalid response. Aborting." - return -1 + saveredacted = yesno(" Do you acknowledge and accept the above warning? [Y/n] ") print "PaycheckRecords.com Credentials:" - username = raw_input(" Username: ") - password = getpass(" Password: ") + while True: + username = raw_input(" Username: ") + if( username != "" ): + break + + while True: + password = getpass(" Password: ") + if( password != "" ): + break print "" From 5f0b69cb147a26995bc334185e5fdab91b4fbbaf Mon Sep 17 00:00:00 2001 From: Yuriy Vidineev Date: Mon, 14 Jan 2019 23:57:19 -0800 Subject: [PATCH 6/6] python3. mechanize to mechanicalsoup --- examples/paycheckProcess.py | 79 +++++++++++++++--------------- paycheckrecords/__init__.py | 4 +- paycheckrecords/paycheckrecords.py | 52 ++++++++++---------- 3 files changed, 68 insertions(+), 67 deletions(-) diff --git a/examples/paycheckProcess.py b/examples/paycheckProcess.py index bfc694b..d83e7ea 100755 --- a/examples/paycheckProcess.py +++ b/examples/paycheckProcess.py @@ -75,27 +75,27 @@ def printSimpleSummary( stubs ): gross = 0.0 totalnet = 0.0 - print "" - print "QUICK SUMMARY:" - print "" + print("") + print("QUICK SUMMARY:") + print("") - print "----------------------------------------------" - print '{: <20} {: >12} {: >12}'.format( "Date", + print("----------------------------------------------") + print(('{: <20} {: >12} {: >12}'.format( "Date", "Total Pay", - "Net Pay" ) - print "----------------------------------------------" + "Net Pay" ))) + print("----------------------------------------------") for stub in stubs: - print '{: <20} {: >12} {: >12}'.format( stub.PayDate.strftime("%Y-%m-%d"), + print(('{: <20} {: >12} {: >12}'.format( stub.PayDate.strftime("%Y-%m-%d"), stub.TotalPay, - stub.NetPay ) + stub.NetPay ))) gross = gross + stub.TotalPay totalnet = totalnet + stub.NetPay - print "----------------------------------------------" - print '{: <20} {: >12} {: >12}'.format( "", + print("----------------------------------------------") + print(('{: <20} {: >12} {: >12}'.format( "", str(gross), - str(totalnet) ) - print "" + str(totalnet) ))) + print("") def printDetailedSummary( stubs ): summary = {} @@ -110,22 +110,22 @@ def printDetailedSummary( stubs ): 'rate' : f['rate'], 'current' : f['current'] } - print "" - print "DETAILED TOTALS:" - print "" + print("") + print("DETAILED TOTALS:") + print("") - print "-----------------------------------------------------------" - print '{: <20} {: >12} {: >12} {: >12}'.format( "Field", + print("-----------------------------------------------------------") + print(('{: <20} {: >12} {: >12} {: >12}'.format( "Field", "Total Hours", "Total Rate", - "Total" ) - print "-----------------------------------------------------------" + "Total" ))) + print("-----------------------------------------------------------") for s in summary: - print '{: <20} {: >12.2f} {: >12.2f} {: >12.2f}'.format( s, + print(('{: <20} {: >12.2f} {: >12.2f} {: >12.2f}'.format( s, summary[s]['hours'], summary[s]['rate'], - summary[s]['current'] ) - print "" + summary[s]['current'] ))) + print("") def savePayStubs( stubs, redact=False ): @@ -137,7 +137,7 @@ def savePayStubs( stubs, redact=False ): while os.path.isfile(filename + "_" + str(i) + ".html"): i += 1 if i == 100: - print "There seem to be a lot of duplicate files? Aborting." + print("There seem to be a lot of duplicate files? Aborting.") return -1 filename += '_' + str(i) @@ -152,28 +152,29 @@ def savePayStubs( stubs, redact=False ): def yesno( x ): while True: - resp = raw_input(x) + resp = input(x) if( resp.lower() == 'y' ): return True elif( resp.lower() == 'n' ): return False else: - print " Invalid response." + print(" Invalid response.") def get_date( x, fmt='%m/%d/%Y' ): while True: try: - resp = raw_input(x) or datetime.today().strftime(fmt) + #resp = eval(input(x)) or datetime.today().strftime(fmt) + resp = input(x) or datetime.today().strftime(fmt) return datetime.strptime(resp, fmt) except ValueError: - print " Invalid date or date format provided." + print(" Invalid date or date format provided.") def main(): - print "" - print "Print a summary of all pay stubs between the given dates." - print "Optionally save off the pay stubs and redacted pay stubs." - print "" + print("") + print("Print a summary of all pay stubs between the given dates.") + print("Optionally save off the pay stubs and redacted pay stubs.") + print("") while True: startdate = get_date("Start date (MM/DD/YYYY): ", '%m/%d/%Y') @@ -181,22 +182,22 @@ def main(): if( startdate <= enddate ): break else: - print " Invalid date range. Start date must be before or equal to end date." + print(" Invalid date range. Start date must be before or equal to end date.") savestubs = yesno("Save pay stubs? [Y/n] ") if( savestubs ): saveredacted = yesno("Save redacted pay stubs? [Y/n] ") if( saveredacted ): # Deleting the sensitive information is an exercise for the reader ... - print " WARNING: redacted pay stubs are intended to be printed. Although" - print " it is blacked out, the sensitive information is still" - print " present in the document." + print(" WARNING: redacted pay stubs are intended to be printed. Although") + print(" it is blacked out, the sensitive information is still") + print(" present in the document.") saveredacted = yesno(" Do you acknowledge and accept the above warning? [Y/n] ") - print "PaycheckRecords.com Credentials:" + print("PaycheckRecords.com Credentials:") while True: - username = raw_input(" Username: ") + username = input(" Username: ") if( username != "" ): break @@ -205,7 +206,7 @@ def main(): if( password != "" ): break - print "" + print("") paycheckinst = paycheckrecords(username, password) diff --git a/paycheckrecords/__init__.py b/paycheckrecords/__init__.py index 287f67e..b847350 100644 --- a/paycheckrecords/__init__.py +++ b/paycheckrecords/__init__.py @@ -1,2 +1,2 @@ -import paystub -from paycheckrecords import * +from . import paystub +from .paycheckrecords import * diff --git a/paycheckrecords/paycheckrecords.py b/paycheckrecords/paycheckrecords.py index e4e134a..2aabc3b 100755 --- a/paycheckrecords/paycheckrecords.py +++ b/paycheckrecords/paycheckrecords.py @@ -1,14 +1,14 @@ from getpass import getpass import threading -import mechanize +import mechanicalsoup from bs4 import BeautifulSoup -from paystub import paystub +from .paystub import paystub from datetime import datetime from datetime import timedelta class paycheckrecords: - _br = mechanize.Browser() + _br = mechanicalsoup.StatefulBrowser() _browserSem = threading.Semaphore() _thread = None _stop = False @@ -16,14 +16,14 @@ class paycheckrecords: _threadSleep = threading.Event() def __init__(self, username, password): - self._br.set_handle_robots(False) + #self._br.set_handle_robots(False) self._br.open("https://www.paycheckrecords.com") - self._br.select_form(name="Login_Form") + self._br.select_form() - self._br.form["userStrId"] = username - self._br.form["password"] = password + self._br["userStrId"] = username + self._br["password"] = password - self._br.submit() + self._br.submit_selected() self._thread = threading.Thread(target=self.preventTimeOut) self._thread.start() @@ -32,7 +32,7 @@ def preventTimeOut(self): while not self._stop: self._browserSem.acquire() # print "aquired lock" - url = self._br.geturl() + url = self._br.get_url() #print "url = ", url self._br.open(url) # print "refreshed" @@ -46,10 +46,10 @@ def preventTimeOut(self): def getLatestPayStub(self): self._browserSem.acquire() - originalurl = self._br.geturl() + originalurl = self._br.get_url() paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp") - ret = self._getPaystubsFromTable(paystubResponse.read(), range(1, 2)) + ret = self._getPaystubsFromTable(paystubResponse.read(), list(range(1, 2))) self._br.open(originalurl) self._browserSem.release() @@ -57,13 +57,13 @@ def getLatestPayStub(self): def getPayStubsInRange(self, startDate, endDate, sequence = 0): self._browserSem.acquire() - originalurl = self._br.geturl() + originalurl = self._br.get_url() paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp") - self._br.select_form(name="dateSelect") - self._br.form["startDate"] = startDate.strftime("%m/%d/%Y") - self._br.form["endDate"] = endDate.strftime("%m/%d/%Y") - paystubResponse = self._br.submit() - ret = self._getPaystubsFromTable(paystubResponse.read(),sequence) + self._br.select_form("#dateSelect") + self._br["startDate"] = startDate.strftime("%m/%d/%Y") + self._br["endDate"] = endDate.strftime("%m/%d/%Y") + paystubResponse = self._br.submit_selected() + ret = self._getPaystubsFromTable(paystubResponse.text,sequence) self._br.open(originalurl) self._browserSem.release() @@ -113,27 +113,27 @@ def _getPaystubsFromTable(self, html, sequence, GetHtml = True): for col in headerCols: colName = col.string - if colName == u'Pay Date' and DateIndex == -1: + if colName == 'Pay Date' and DateIndex == -1: DateIndex = i - elif colName == u'Total Pay' and TotalIndex == -1: + elif colName == 'Total Pay' and TotalIndex == -1: TotalIndex = i - elif colName == u'Net Pay' and NetIndex == -1: + elif colName == 'Net Pay' and NetIndex == -1: NetIndex = i i = i + 1 if sequence == 0: - sequence = range(1, len(payrows)) + sequence = list(range(1, len(payrows))) for index in sequence: paystubHtml = None rowCols = payrows[index].findAll('td') rowDate = rowCols[DateIndex].a.string.strip() - rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$").translate(dict.fromkeys(map(ord,','),None))) - rowNetPay = float(rowCols[NetIndex].string.strip().strip("$").translate(dict.fromkeys(map(ord,','),None))) + rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$").translate(dict.fromkeys(list(map(ord,',')),None))) + rowNetPay = float(rowCols[NetIndex].string.strip().strip("$").translate(dict.fromkeys(list(map(ord,',')),None))) tmpDateTime = datetime.strptime(rowDate, '%m/%d/%Y') if GetHtml: - paystubResponse = self._br.open(rowCols[DateIndex].a['href']) - paystubHtml = paystubResponse.read() + paystubResponse = self._br.open_relative(rowCols[DateIndex].a['href']) + paystubHtml = paystubResponse.text stubDetails = self._getPayStubDetails(paystubHtml) - self._br.back() + #self._br.back() tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, stubDetails, paystubHtml) ret.append(tmpPayStub)