From 232861a19e6d6054458078b3c5736e4a04312e60 Mon Sep 17 00:00:00 2001
From: Brian Glod <brian.glod@nuand.com>
Date: Wed, 21 Feb 2018 13:33:59 -0500
Subject: [PATCH 1/6] Cleanup whitespace

- Tabs to spaces
- Remove all trailing whitespace
---
 examples/paycheckProcess.py        | 205 +++++++++++++-------------
 paycheckrecords/__init__.py        |   1 -
 paycheckrecords/paycheckrecords.py | 226 ++++++++++++++---------------
 paycheckrecords/paystub.py         |  26 ++--
 4 files changed, 228 insertions(+), 230 deletions(-)
diff --git a/examples/paycheckProcess.py b/examples/paycheckProcess.py
index e8e1aa2..088221e 100755
--- a/examples/paycheckProcess.py
+++ b/examples/paycheckProcess.py
@@ -1,4 +1,3 @@
-
 from datetime import date, timedelta
 from dateutil.relativedelta import relativedelta
 from bs4 import BeautifulSoup
@@ -10,109 +9,109 @@
 from paycheckrecords import *
 
 def checkRowForAll(row):
-	for col in row.findAll('td'):
-		if "Federal Income Tax" in str(col):
-			return True
-		if "Social Security" in str(col):
-			return True
-		if "Medicare" in str(col):
-			return True
-		if "NY Income Tax" in str(col):
-			return True
-		if "Cell Phone" in str(col):
-			return True
-		if "Deductions" in str(col):
-			return True
-		if "Taxes" in str(col):
-			return True
-		
-	return False
-	
+    for col in row.findAll('td'):
+        if "Federal Income Tax" in str(col):
+            return True
+        if "Social Security" in str(col):
+            return True
+        if "Medicare" in str(col):
+            return True
+        if "NY Income Tax" in str(col):
+            return True
+        if "Cell Phone" in str(col):
+            return True
+        if "Deductions" in str(col):
+            return True
+        if "Taxes" in str(col):
+            return True
+
+    return False
+
 def blackOut(html):
-	soup = BeautifulSoup(html)
-	
-	#blackout net pay
-	tmp = soup.findAll('u')
-	for tag in tmp:
-		if "Net Pay" in str(tag.parent):
-			tag["style"] = "background-color:black; -webkit-print-color-adjust: exact;"
-	tableList = ["paystub_pay_tbl", "paystub_ee_taxes_tbl", "paystub_summary_tbl"]
-	
-	#black out all
-	for curTable in tableList:
-		tmpTable = soup.find("table", {"id": curTable})
-		allrows = tmpTable.findAll('tr')
-		for row in allrows:
-			if checkRowForAll(row):
-				for col in row.findAll('td'):
-					if '.' in str(col):
-						col["style"] = "background-color:black;  -webkit-print-color-adjust: exact;"
-	
-	
-	
-	#black out netthispay
-	elem = soup.find(text=re.compile('.*Net This Check:.*'))
-	elem = elem.findNext('td')
-	elem["style"] = "background-color:black;  -webkit-print-color-adjust: exact;"
-	
-	#black out account
-	elem = soup.find(text=re.compile('.*Acct#.*'))
-	
-	nelem = elem.findNext('td')
-	nelem["style"] = "background-color:black;  -webkit-print-color-adjust: exact;"
-	
-	contents = elem.string
-	contentsList = contents.split("#")
-	newcontent = contentsList[0] + "#<span style = \"background-color:black;  -webkit-print-color-adjust: exact;\">"
-	contentsList = contentsList[1].split(":")
-	newcontent = newcontent + contentsList[0] + "</span>:" + contentsList[1]
-	elem.replaceWith(newcontent)
-	
-	return str(soup.prettify(formatter=None))
+    soup = BeautifulSoup(html)
+
+    #blackout net pay
+    tmp = soup.findAll('u')
+    for tag in tmp:
+        if "Net Pay" in str(tag.parent):
+            tag["style"] = "background-color:black; -webkit-print-color-adjust: exact;"
+    tableList = ["paystub_pay_tbl", "paystub_ee_taxes_tbl", "paystub_summary_tbl"]
+
+    #black out all
+    for curTable in tableList:
+        tmpTable = soup.find("table", {"id": curTable})
+        allrows = tmpTable.findAll('tr')
+        for row in allrows:
+            if checkRowForAll(row):
+                for col in row.findAll('td'):
+                    if '.' in str(col):
+                        col["style"] = "background-color:black;  -webkit-print-color-adjust: exact;"
+
+
+
+    #black out netthispay
+    elem = soup.find(text=re.compile('.*Net This Check:.*'))
+    elem = elem.findNext('td')
+    elem["style"] = "background-color:black;  -webkit-print-color-adjust: exact;"
+
+    #black out account
+    elem = soup.find(text=re.compile('.*Acct#.*'))
+
+    nelem = elem.findNext('td')
+    nelem["style"] = "background-color:black;  -webkit-print-color-adjust: exact;"
+
+    contents = elem.string
+    contentsList = contents.split("#")
+    newcontent = contentsList[0] + "#<span style = \"background-color:black;  -webkit-print-color-adjust: exact;\">"
+    contentsList = contentsList[1].split(":")
+    newcontent = newcontent + contentsList[0] + "</span>:" + contentsList[1]
+    elem.replaceWith(newcontent)
+
+    return str(soup.prettify(formatter=None))
 
 def main():
-	
-	_day = int(input("Day:"))
-	username = raw_input("Username:")
-	password = getpass("Password:")
-	
-	paycheckinst = paycheckrecords(username, password)
-	try:
-		
-		now = date.today()
-		
-		if now.day > _day:
-			startdate = now.replace(day=_day+1)
-			enddate = startdate + timedelta(days=32)
-			enddate = enddate.replace(day = _day)
-
-		else:
-
-			
-			enddate = now.replace(day=_day)
-			tmpdate = now.replace(day=1) - timedelta(days=1)
-			startdate = tmpdate.replace(day=_day+1)
-			
-			
-		
-		ret = paycheckinst.getPayStubsInRange(startdate, enddate)
-		gross = 0.0
-		for stub in ret:
-			print "Date: ", stub.PayDate
-			print "Total Pay: ", stub.TotalPay
-			print "Net Pay: ", stub.NetPay
-			print ""
-			gross = gross + stub.TotalPay
-			filename = "paystub " + stub.PayDate.strftime("%m-%d-%Y")
-			out = open(filename + ".html", "w")
-			out.write(stub.HTML)
-			out.close()
-			
-			out = open(filename + "(blacked out).html", "w")
-			out.write(blackOut(stub.HTML))
-			out.close()
-		print "Gross: " + str(gross)
-	finally:
-		paycheckinst.close()
-	
+
+    _day = int(input("Day:"))
+    username = raw_input("Username:")
+    password = getpass("Password:")
+
+    paycheckinst = paycheckrecords(username, password)
+    try:
+
+        now = date.today()
+
+        if now.day > _day:
+            startdate = now.replace(day=_day+1)
+            enddate = startdate + timedelta(days=32)
+            enddate = enddate.replace(day = _day)
+
+        else:
+
+
+            enddate = now.replace(day=_day)
+            tmpdate = now.replace(day=1) - timedelta(days=1)
+            startdate = tmpdate.replace(day=_day+1)
+
+
+
+        ret = paycheckinst.getPayStubsInRange(startdate, enddate)
+        gross = 0.0
+        for stub in ret:
+            print "Date: ", stub.PayDate
+            print "Total Pay: ", stub.TotalPay
+            print "Net Pay: ", stub.NetPay
+            print ""
+            gross = gross + stub.TotalPay
+            filename = "paystub " + stub.PayDate.strftime("%m-%d-%Y")
+            out = open(filename + ".html", "w")
+            out.write(stub.HTML)
+            out.close()
+
+            out = open(filename + "(blacked out).html", "w")
+            out.write(blackOut(stub.HTML))
+            out.close()
+        print "Gross: " + str(gross)
+    finally:
+        paycheckinst.close()
+
 main()
diff --git a/paycheckrecords/__init__.py b/paycheckrecords/__init__.py
index 7884ee8..287f67e 100644
--- a/paycheckrecords/__init__.py
+++ b/paycheckrecords/__init__.py
@@ -1,3 +1,2 @@
 import paystub
 from paycheckrecords import *
-
diff --git a/paycheckrecords/paycheckrecords.py b/paycheckrecords/paycheckrecords.py
index 4722ec6..18495c9 100755
--- a/paycheckrecords/paycheckrecords.py
+++ b/paycheckrecords/paycheckrecords.py
@@ -8,116 +8,116 @@
 
 
 class paycheckrecords:
-	_br = mechanize.Browser()
-	_browserSem = threading.Semaphore()
-	_thread = None
-	_stop = False
-	_timer = None
-	_threadSleep = threading.Event()
-	
-	def __init__(self, username, password):
-		self._br.set_handle_robots(False)
-		self._br.open("https://www.paycheckrecords.com")
-		self._br.select_form(name="Login_Form")
-		
-		self._br.form["userStrId"] = username
-		self._br.form["password"] = password
-		
-		self._br.submit()
-		
-		self._thread = threading.Thread(target=self.preventTimeOut)
-		self._thread.start()
-		
-	def preventTimeOut(self):
-		while not self._stop:
-			self._browserSem.acquire()
-#			print "aquired lock"
-			url = self._br.geturl()
-			#print "url = ", url
-			self._br.open(url)
-#			print "refreshed"
-			self._browserSem.release()
-#			print "reload page from thread"
-			self._threadSleep.wait(30)
-#			print "awake"
-			self._threadSleep.clear()
-	
-
-	
-	def getLatestPayStub(self):
-		self._browserSem.acquire()
-		originalurl = self._br.geturl()
-		paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp")
-		
-		ret = self._getPaystubsFromTable(paystubResponse.read(), range(1, 2))
-		
-		self._br.open(originalurl)
-		self._browserSem.release()
-		return ret[0]
-	
-	def getPayStubsInRange(self, startDate, endDate, sequence = 0):
-		self._browserSem.acquire()
-		originalurl = self._br.geturl()
-		paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp")
-		self._br.select_form(name="dateSelect")
-		self._br.form["startDate"] = startDate.strftime("%m/%d/%Y")
-		self._br.form["endDate"] = endDate.strftime("%m/%d/%Y")		
-		paystubResponse = self._br.submit()
-		ret = self._getPaystubsFromTable(paystubResponse.read(),sequence)
-		
-		self._br.open(originalurl)
-		self._browserSem.release()
-		return ret
-	
-		
-	
-	def _getPaystubsFromTable(self, html, sequence, GetHtml = True):
-		soup = BeautifulSoup(html)
-		PayStubTable = soup.find("table", { "class" : "report" })
-		payrows = PayStubTable.findAll('tr')
-		headerCols = payrows[0].findAll('td')
-		ret = []
-		i = 0
-		DateIndex = -1
-		NetIndex = -1
-		TotalIndex = -1
-		
-		for col in headerCols:
-			colName = col.string
-			if colName == u'Pay Date' and DateIndex == -1:
-				DateIndex = i
-			elif colName == u'Total Pay' and TotalIndex == -1:
-				TotalIndex = i
-			elif colName == u'Net Pay' and NetIndex == -1:
-				NetIndex = i
-			i = i + 1
-		if sequence == 0:
-			sequence = range(1, len(payrows))
-		for index in sequence:
-			paystubHtml = None
-			rowCols = payrows[index].findAll('td')
-			rowDate = rowCols[DateIndex].a.string.strip()
-			rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$"))
-			rowNetPay = float(rowCols[NetIndex].string.strip().strip("$"))
-			tmpDateTime = datetime.strptime(rowDate, '%m/%d/%Y')
-			if GetHtml:
-				paystubResponse = self._br.open(rowCols[DateIndex].a['href'])
-				paystubHtml = paystubResponse.read()
-				self._br.back()
-			tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, paystubHtml)
-			ret.append(tmpPayStub)
-		
-		return ret
-
-		
-	
-	def close(self):
-		#print "Closing Instance"
-		self._stop = True
-		#print "_stop set"
-		self._threadSleep.set()
-		#print "_threadSleep set"
-		self._thread.join()
-		#print "thread joined"
-		self._br.close()
-		#print "Closing Done"
\ No newline at end of file
+    _br = mechanize.Browser()
+    _browserSem = threading.Semaphore()
+    _thread = None
+    _stop = False
+    _timer = None
+    _threadSleep = threading.Event()
+
+    def __init__(self, username, password):
+        self._br.set_handle_robots(False)
+        self._br.open("https://www.paycheckrecords.com")
+        self._br.select_form(name="Login_Form")
+
+        self._br.form["userStrId"] = username
+        self._br.form["password"] = password
+
+        self._br.submit()
+
+        self._thread = threading.Thread(target=self.preventTimeOut)
+        self._thread.start()
+
+    def preventTimeOut(self):
+        while not self._stop:
+            self._browserSem.acquire()
+#            print "aquired lock"
+            url = self._br.geturl()
+            #print "url = ", url
+            self._br.open(url)
+#            print "refreshed"
+            self._browserSem.release()
+#            print "reload page from thread"
+            self._threadSleep.wait(30)
+#            print "awake"
+            self._threadSleep.clear()
+
+
+
+    def getLatestPayStub(self):
+        self._browserSem.acquire()
+        originalurl = self._br.geturl()
+        paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp")
+
+        ret = self._getPaystubsFromTable(paystubResponse.read(), range(1, 2))
+
+        self._br.open(originalurl)
+        self._browserSem.release()
+        return ret[0]
+
+    def getPayStubsInRange(self, startDate, endDate, sequence = 0):
+        self._browserSem.acquire()
+        originalurl = self._br.geturl()
+        paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp")
+        self._br.select_form(name="dateSelect")
+        self._br.form["startDate"] = startDate.strftime("%m/%d/%Y")
+        self._br.form["endDate"] = endDate.strftime("%m/%d/%Y")
+        paystubResponse = self._br.submit()
+        ret = self._getPaystubsFromTable(paystubResponse.read(),sequence)
+
+        self._br.open(originalurl)
+        self._browserSem.release()
+        return ret
+
+
+
+    def _getPaystubsFromTable(self, html, sequence, GetHtml = True):
+        soup = BeautifulSoup(html)
+        PayStubTable = soup.find("table", { "class" : "report" })
+        payrows = PayStubTable.findAll('tr')
+        headerCols = payrows[0].findAll('td')
+        ret = []
+        i = 0
+        DateIndex = -1
+        NetIndex = -1
+        TotalIndex = -1
+
+        for col in headerCols:
+            colName = col.string
+            if colName == u'Pay Date' and DateIndex == -1:
+                DateIndex = i
+            elif colName == u'Total Pay' and TotalIndex == -1:
+                TotalIndex = i
+            elif colName == u'Net Pay' and NetIndex == -1:
+                NetIndex = i
+            i = i + 1
+        if sequence == 0:
+            sequence = range(1, len(payrows))
+        for index in sequence:
+            paystubHtml = None
+            rowCols = payrows[index].findAll('td')
+            rowDate = rowCols[DateIndex].a.string.strip()
+            rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$"))
+            rowNetPay = float(rowCols[NetIndex].string.strip().strip("$"))
+            tmpDateTime = datetime.strptime(rowDate, '%m/%d/%Y')
+            if GetHtml:
+                paystubResponse = self._br.open(rowCols[DateIndex].a['href'])
+                paystubHtml = paystubResponse.read()
+                self._br.back()
+            tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, paystubHtml)
+            ret.append(tmpPayStub)
+
+        return ret
+
+
+
+    def close(self):
+        #print "Closing Instance"
+        self._stop = True
+        #print "_stop set"
+        self._threadSleep.set()
+        #print "_threadSleep set"
+        self._thread.join()
+        #print "thread joined"
+        self._br.close()
+        #print "Closing Done"
diff --git a/paycheckrecords/paystub.py b/paycheckrecords/paystub.py
index de5ef7f..84f6112 100755
--- a/paycheckrecords/paystub.py
+++ b/paycheckrecords/paystub.py
@@ -1,15 +1,15 @@
 import datetime
 class paystub:
-	def __init__(self, payDate, TotalPay, NetPay, html = None):
-		if type(payDate) is not datetime and type(payDate) is not datetime.datetime:
-			raise ValueError("payDate is not a datetime object")
-		
-		if type(TotalPay) is not float:
-			raise ValueError("TotalPay needs to be a float")
-		if type(NetPay) is not float:
-			raise ValueError("NetPay needs to be a float")
-		
-		self.PayDate = payDate
-		self.TotalPay = TotalPay
-		self.NetPay = NetPay
-		self.HTML = html
+    def __init__(self, payDate, TotalPay, NetPay, html = None):
+        if type(payDate) is not datetime and type(payDate) is not datetime.datetime:
+            raise ValueError("payDate is not a datetime object")
+
+        if type(TotalPay) is not float:
+            raise ValueError("TotalPay needs to be a float")
+        if type(NetPay) is not float:
+            raise ValueError("NetPay needs to be a float")
+
+        self.PayDate = payDate
+        self.TotalPay = TotalPay
+        self.NetPay = NetPay
+        self.HTML = html

From 249c35b6b6a593d6f984189466daff816589766f Mon Sep 17 00:00:00 2001
From: Brian Glod <brian.glod@nuand.com>
Date: Wed, 21 Feb 2018 13:39:28 -0500
Subject: [PATCH 2/6] Remove comma(s) from dollar amounts

---
 paycheckrecords/paycheckrecords.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paycheckrecords/paycheckrecords.py b/paycheckrecords/paycheckrecords.py
index 18495c9..0f4fdb0 100755
--- a/paycheckrecords/paycheckrecords.py
+++ b/paycheckrecords/paycheckrecords.py
@@ -97,8 +97,8 @@ def _getPaystubsFromTable(self, html, sequence, GetHtml = True):
             paystubHtml = None
             rowCols = payrows[index].findAll('td')
             rowDate = rowCols[DateIndex].a.string.strip()
-            rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$"))
-            rowNetPay = float(rowCols[NetIndex].string.strip().strip("$"))
+            rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$").translate(dict.fromkeys(map(ord,','),None)))
+            rowNetPay = float(rowCols[NetIndex].string.strip().strip("$").translate(dict.fromkeys(map(ord,','),None)))
             tmpDateTime = datetime.strptime(rowDate, '%m/%d/%Y')
             if GetHtml:
                 paystubResponse = self._br.open(rowCols[DateIndex].a['href'])

From af10ef14ab591a7d9f52eb8375ddd48fad596d3d Mon Sep 17 00:00:00 2001
From: Brian Glod <brian.glod@nuand.com>
Date: Mon, 26 Feb 2018 19:31:45 -0500
Subject: [PATCH 3/6] Update .gitignore

- Removed irrelevant ignores
- Added *.html
---
 .gitignore | 35 +----------------------------------
 1 file changed, 1 insertion(+), 34 deletions(-)

diff --git a/.gitignore b/.gitignore
index ded6067..9d4be04 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,36 +1,3 @@
 *.py[cod]
-
-# C extensions
-*.so
-
-# Packages
-*.egg
-*.egg-info
-dist
-build
-eggs
-parts
-bin
-var
-sdist
-develop-eggs
-.installed.cfg
-lib
-lib64
 __pycache__
-
-# Installer logs
-pip-log.txt
-
-# Unit test / coverage reports
-.coverage
-.tox
-nosetests.xml
-
-# Translations
-*.mo
-
-# Mr Developer
-.mr.developer.cfg
-.project
-.pydevproject
+*.html

From d5c7034d8f8499a029b57518d2ab7332d4ab438e Mon Sep 17 00:00:00 2001
From: Brian Glod <brian.glod@nuand.com>
Date: Mon, 26 Feb 2018 19:36:36 -0500
Subject: [PATCH 4/6] Add detailed summary and example cleanup

- Example now has meaningful prompts
- Simple summary is more visually pleasing
- Added ability to print out a sum total of each line item on on all pay
  stubs in the given period. Useful for W-2 and YTD verification.
---
 examples/paycheckProcess.py        | 166 +++++++++++++++++++++++------
 paycheckrecords/paycheckrecords.py |  36 ++++++-
 paycheckrecords/paystub.py         |   3 +-
 3 files changed, 169 insertions(+), 36 deletions(-)

diff --git a/examples/paycheckProcess.py b/examples/paycheckProcess.py
index 088221e..1ef21fb 100755
--- a/examples/paycheckProcess.py
+++ b/examples/paycheckProcess.py
@@ -1,9 +1,11 @@
+#!/usr/bin/env python2
+
 from datetime import date, timedelta
 from dateutil.relativedelta import relativedelta
 from bs4 import BeautifulSoup
 import re
 from getpass import getpass
-
+import os
 import sys
 sys.path.append("../")
 from paycheckrecords import *
@@ -28,7 +30,7 @@ def checkRowForAll(row):
     return False
 
 def blackOut(html):
-    soup = BeautifulSoup(html)
+    soup = BeautifulSoup(html, "lxml")
 
     #blackout net pay
     tmp = soup.findAll('u')
@@ -69,48 +71,148 @@ def blackOut(html):
 
     return str(soup.prettify(formatter=None))
 
+def printSimpleSummary( stubs ):
+    gross    = 0.0
+    totalnet = 0.0
+
+    print ""
+    print "QUICK SUMMARY:"
+    print ""
+
+    print "----------------------------------------------"
+    print '{: <20} {: >12} {: >12}'.format( "Date",
+                                            "Total Pay",
+                                            "Net Pay" )
+    print "----------------------------------------------"
+    for stub in stubs:
+        print '{: <20} {: >12} {: >12}'.format( stub.PayDate.strftime("%Y-%m-%d"),
+                                                stub.TotalPay,
+                                                stub.NetPay )
+        gross    = gross    + stub.TotalPay
+        totalnet = totalnet + stub.NetPay
+
+    print "----------------------------------------------"
+    print '{: <20} {: >12} {: >12}'.format( "",
+                                            str(gross),
+                                            str(totalnet) )
+    print ""
+
+def printDetailedSummary( stubs ):
+    summary = {}
+    for stub in stubs:
+        for f in stub.StubDetails:
+            if f['name'] in summary:
+                summary[f['name']]['hours']   += f['hours']
+                summary[f['name']]['rate']    += f['rate']
+                summary[f['name']]['current'] += f['current']
+            else:
+                summary[f['name']] = { 'hours'   : f['hours'],
+                                       'rate'    : f['rate'],
+                                       'current' : f['current'] }
+
+    print ""
+    print "DETAILED TOTALS:"
+    print ""
+
+    print "-----------------------------------------------------------"
+    print '{: <20} {: >12} {: >12} {: >12}'.format( "Field",
+                                                    "Total Hours",
+                                                    "Total Rate",
+                                                    "Total" )
+    print "-----------------------------------------------------------"
+    for s in summary:
+        print '{: <20} {: >12.2f} {: >12.2f} {: >12.2f}'.format( s,
+                                                                 summary[s]['hours'],
+                                                                 summary[s]['rate'],
+                                                                 summary[s]['current'] )
+    print ""
+
+
+def savePayStubs( stubs, redact=False ):
+    for stub in stubs:
+        filename = "paystub-" + stub.PayDate.strftime("%Y-%m-%d")
+
+        if os.path.isfile(filename + ".html"):
+            i = 1
+            while os.path.isfile(filename + "_" + str(i) + ".html"):
+                i += 1
+                if i == 100:
+                    print "There seem to be a lot of duplicate files? Aborting."
+                    return -1
+            filename += '_' + str(i)
+
+        out = open(filename + ".html", "w")
+        out.write(stub.HTML)
+        out.close()
+
+        if redact:
+            out = open(filename + "_redacted.html", "w")
+            out.write(blackOut(stub.HTML))
+            out.close()
+
+
 def main():
 
-    _day = int(input("Day:"))
-    username = raw_input("Username:")
-    password = getpass("Password:")
+    print ""
+    print "Print a summary of all pay stubs between the given dates."
+    print "Optionally save off the pay stubs and redacted pay stubs."
+    print ""
 
-    paycheckinst = paycheckrecords(username, password)
     try:
+        startdate = datetime.strptime(raw_input("Start date (MM/DD/YYYY): "), '%m/%d/%Y')
+        enddate   = datetime.strptime(raw_input("End   date (MM/DD/YYYY): "), '%m/%d/%Y')
+    except ValueError:
+        raise ValueError("Invalid date format.")
+
+
+    savestubs = raw_input("Save pay stubs? [Y/n] ")
+    if( savestubs.lower() == 'y' ):
+        savestubs = True
+    elif( savestubs.lower() == 'n' ):
+        savestubs = False
+    else:
+        print "Invalid response. Aborting."
+        return -1
+
+    if savestubs:
+        saveredacted = raw_input("Save redacted pay stubs? [Y/n] ")
+        if( saveredacted.lower() == 'y' ):
+            # Deleting the sensitive information is an exercise for the reader ...
+            print "  WARNING: redacted pay stubs are intended to be printed. Although"
+            print "           it is blacked out, the sensitive information is still"
+            print "           present in the document."
+            saveredacted = raw_input("  Do you acknowledge and accept the above warning? [Y/n] ")
+            if( saveredacted.lower() == 'y' ):
+                saveredacted = True
+            elif( saveredacted.lower() == 'n' ):
+                saveredacted = False
+            else:
+                print "Invalid response. Aborting."
+                return -1
+        elif( saveredacted.lower() == 'n' ):
+            saveredacted = False
+        else:
+            print "Invalid response. Aborting."
+            return -1
 
-        now = date.today()
-
-        if now.day > _day:
-            startdate = now.replace(day=_day+1)
-            enddate = startdate + timedelta(days=32)
-            enddate = enddate.replace(day = _day)
+    print "PaycheckRecords.com Credentials:"
 
-        else:
+    username = raw_input("  Username: ")
+    password = getpass("  Password: ")
 
+    print ""
 
-            enddate = now.replace(day=_day)
-            tmpdate = now.replace(day=1) - timedelta(days=1)
-            startdate = tmpdate.replace(day=_day+1)
+    paycheckinst = paycheckrecords(username, password)
 
+    try:
+        stubs = paycheckinst.getPayStubsInRange(startdate, enddate)
 
+        printSimpleSummary( stubs )
+        printDetailedSummary( stubs )
 
-        ret = paycheckinst.getPayStubsInRange(startdate, enddate)
-        gross = 0.0
-        for stub in ret:
-            print "Date: ", stub.PayDate
-            print "Total Pay: ", stub.TotalPay
-            print "Net Pay: ", stub.NetPay
-            print ""
-            gross = gross + stub.TotalPay
-            filename = "paystub " + stub.PayDate.strftime("%m-%d-%Y")
-            out = open(filename + ".html", "w")
-            out.write(stub.HTML)
-            out.close()
+        if savestubs:
+            savePayStubs( stubs, saveredacted )
 
-            out = open(filename + "(blacked out).html", "w")
-            out.write(blackOut(stub.HTML))
-            out.close()
-        print "Gross: " + str(gross)
     finally:
         paycheckinst.close()
 
diff --git a/paycheckrecords/paycheckrecords.py b/paycheckrecords/paycheckrecords.py
index 0f4fdb0..e4e134a 100755
--- a/paycheckrecords/paycheckrecords.py
+++ b/paycheckrecords/paycheckrecords.py
@@ -69,10 +69,39 @@ def getPayStubsInRange(self, startDate, endDate, sequence = 0):
         self._browserSem.release()
         return ret
 
-
+    def _getPayStubDetails(self, html):
+        soup    = BeautifulSoup(html, "lxml")
+        details = soup.find_all("table", { "class" : [ "detailsWages", "detailsPart" ] })
+        rv      = []
+
+        # Paystub details seem to contain 4 elements, each consisting of one or more rows:
+        #  [0] Pay        (e.g. salary, bonus, ... )
+        #  [1] Deductions (e.g. 401k, healthcare, ... )
+        #  [2] Taxes      (e.g. federal, state, SS, medicare, ... )
+        #  [3] Summary
+        for d in range( 0, len(details) ):
+            for r in details[d].find_all('tr')[1:]:
+                tds = r.find_all('td')
+                if( d == 0 ): # Pay field has extra elements: hours and rate
+                    rv.append( { 'name'    : tds[0].text.strip(),
+                                 'hours'   : float(tds[1].text.strip() or 0.0),
+                                 'rate'    : float(tds[2].text.strip() or 0.0),
+                                 'current' : float(tds[3].text.strip()),
+                                 'ytd'     : float(tds[4].text.strip()) } )
+                else:
+                    rv.append( { 'name'    : tds[0].text.strip(),
+                                 'current' : float(tds[1].text.strip()),
+                                 'ytd'     : float(tds[2].text.strip()),
+                                 # Make post-processing easier
+                                 'hours'   : float(0.0),
+                                 'rate'    : float(0.0) } )
+
+        # List of dictionaries containing name/hours/rate/current/ytd
+        # information for each line-item of a paystub
+        return rv
 
     def _getPaystubsFromTable(self, html, sequence, GetHtml = True):
-        soup = BeautifulSoup(html)
+        soup = BeautifulSoup(html, "lxml")
         PayStubTable = soup.find("table", { "class" : "report" })
         payrows = PayStubTable.findAll('tr')
         headerCols = payrows[0].findAll('td')
@@ -103,8 +132,9 @@ def _getPaystubsFromTable(self, html, sequence, GetHtml = True):
             if GetHtml:
                 paystubResponse = self._br.open(rowCols[DateIndex].a['href'])
                 paystubHtml = paystubResponse.read()
+                stubDetails = self._getPayStubDetails(paystubHtml)
                 self._br.back()
-            tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, paystubHtml)
+            tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, stubDetails, paystubHtml)
             ret.append(tmpPayStub)
 
         return ret
diff --git a/paycheckrecords/paystub.py b/paycheckrecords/paystub.py
index 84f6112..e1586b3 100755
--- a/paycheckrecords/paystub.py
+++ b/paycheckrecords/paystub.py
@@ -1,6 +1,6 @@
 import datetime
 class paystub:
-    def __init__(self, payDate, TotalPay, NetPay, html = None):
+    def __init__(self, payDate, TotalPay, NetPay, stubDetails = None, html = None):
         if type(payDate) is not datetime and type(payDate) is not datetime.datetime:
             raise ValueError("payDate is not a datetime object")
 
@@ -12,4 +12,5 @@ def __init__(self, payDate, TotalPay, NetPay, html = None):
         self.PayDate = payDate
         self.TotalPay = TotalPay
         self.NetPay = NetPay
+        self.StubDetails = stubDetails
         self.HTML = html

From a972ca345dbff43171549206999cc014ed5aa799 Mon Sep 17 00:00:00 2001
From: Brian Glod <brian.glod@nuand.com>
Date: Tue, 27 Feb 2018 14:00:24 -0500
Subject: [PATCH 5/6] Better handling of invalid input

---
 examples/paycheckProcess.py | 73 ++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 34 deletions(-)

diff --git a/examples/paycheckProcess.py b/examples/paycheckProcess.py
index 1ef21fb..bfc694b 100755
--- a/examples/paycheckProcess.py
+++ b/examples/paycheckProcess.py
@@ -150,6 +150,23 @@ def savePayStubs( stubs, redact=False ):
             out.write(blackOut(stub.HTML))
             out.close()
 
+def yesno( x ):
+    while True:
+        resp = raw_input(x)
+        if( resp.lower() == 'y' ):
+            return True
+        elif( resp.lower() == 'n' ):
+            return False
+        else:
+            print "  Invalid response."
+
+def get_date( x, fmt='%m/%d/%Y' ):
+    while True:
+        try:
+            resp = raw_input(x) or datetime.today().strftime(fmt)
+            return datetime.strptime(resp, fmt)
+        except ValueError:
+            print "  Invalid date or date format provided."
 
 def main():
 
@@ -158,47 +175,35 @@ def main():
     print "Optionally save off the pay stubs and redacted pay stubs."
     print ""
 
-    try:
-        startdate = datetime.strptime(raw_input("Start date (MM/DD/YYYY): "), '%m/%d/%Y')
-        enddate   = datetime.strptime(raw_input("End   date (MM/DD/YYYY): "), '%m/%d/%Y')
-    except ValueError:
-        raise ValueError("Invalid date format.")
-
-
-    savestubs = raw_input("Save pay stubs? [Y/n] ")
-    if( savestubs.lower() == 'y' ):
-        savestubs = True
-    elif( savestubs.lower() == 'n' ):
-        savestubs = False
-    else:
-        print "Invalid response. Aborting."
-        return -1
-
-    if savestubs:
-        saveredacted = raw_input("Save redacted pay stubs? [Y/n] ")
-        if( saveredacted.lower() == 'y' ):
+    while True:
+        startdate = get_date("Start date (MM/DD/YYYY): ", '%m/%d/%Y')
+        enddate   = get_date("End date (MM/DD/YYYY): ", '%m/%d/%Y')
+        if( startdate <= enddate ):
+            break
+        else:
+            print "  Invalid date range. Start date must be before or equal to end date."
+
+    savestubs = yesno("Save pay stubs? [Y/n] ")
+    if( savestubs ):
+        saveredacted = yesno("Save redacted pay stubs? [Y/n] ")
+        if( saveredacted ):
             # Deleting the sensitive information is an exercise for the reader ...
             print "  WARNING: redacted pay stubs are intended to be printed. Although"
             print "           it is blacked out, the sensitive information is still"
             print "           present in the document."
-            saveredacted = raw_input("  Do you acknowledge and accept the above warning? [Y/n] ")
-            if( saveredacted.lower() == 'y' ):
-                saveredacted = True
-            elif( saveredacted.lower() == 'n' ):
-                saveredacted = False
-            else:
-                print "Invalid response. Aborting."
-                return -1
-        elif( saveredacted.lower() == 'n' ):
-            saveredacted = False
-        else:
-            print "Invalid response. Aborting."
-            return -1
+            saveredacted = yesno("  Do you acknowledge and accept the above warning? [Y/n] ")
 
     print "PaycheckRecords.com Credentials:"
 
-    username = raw_input("  Username: ")
-    password = getpass("  Password: ")
+    while True:
+        username = raw_input("  Username: ")
+        if( username != "" ):
+            break
+
+    while True:
+        password = getpass("  Password: ")
+        if( password != "" ):
+            break
 
     print ""
 

From 5f0b69cb147a26995bc334185e5fdab91b4fbbaf Mon Sep 17 00:00:00 2001
From: Yuriy Vidineev <yuriy.vidineev@revjet.com>
Date: Mon, 14 Jan 2019 23:57:19 -0800
Subject: [PATCH 6/6] python3. mechanize to mechanicalsoup

---
 examples/paycheckProcess.py        | 79 +++++++++++++++---------------
 paycheckrecords/__init__.py        |  4 +-
 paycheckrecords/paycheckrecords.py | 52 ++++++++++----------
 3 files changed, 68 insertions(+), 67 deletions(-)

diff --git a/examples/paycheckProcess.py b/examples/paycheckProcess.py
index bfc694b..d83e7ea 100755
--- a/examples/paycheckProcess.py
+++ b/examples/paycheckProcess.py
@@ -75,27 +75,27 @@ def printSimpleSummary( stubs ):
     gross    = 0.0
     totalnet = 0.0
 
-    print ""
-    print "QUICK SUMMARY:"
-    print ""
+    print("")
+    print("QUICK SUMMARY:")
+    print("")
 
-    print "----------------------------------------------"
-    print '{: <20} {: >12} {: >12}'.format( "Date",
+    print("----------------------------------------------")
+    print(('{: <20} {: >12} {: >12}'.format( "Date",
                                             "Total Pay",
-                                            "Net Pay" )
-    print "----------------------------------------------"
+                                            "Net Pay" )))
+    print("----------------------------------------------")
     for stub in stubs:
-        print '{: <20} {: >12} {: >12}'.format( stub.PayDate.strftime("%Y-%m-%d"),
+        print(('{: <20} {: >12} {: >12}'.format( stub.PayDate.strftime("%Y-%m-%d"),
                                                 stub.TotalPay,
-                                                stub.NetPay )
+                                                stub.NetPay )))
         gross    = gross    + stub.TotalPay
         totalnet = totalnet + stub.NetPay
 
-    print "----------------------------------------------"
-    print '{: <20} {: >12} {: >12}'.format( "",
+    print("----------------------------------------------")
+    print(('{: <20} {: >12} {: >12}'.format( "",
                                             str(gross),
-                                            str(totalnet) )
-    print ""
+                                            str(totalnet) )))
+    print("")
 
 def printDetailedSummary( stubs ):
     summary = {}
@@ -110,22 +110,22 @@ def printDetailedSummary( stubs ):
                                        'rate'    : f['rate'],
                                        'current' : f['current'] }
 
-    print ""
-    print "DETAILED TOTALS:"
-    print ""
+    print("")
+    print("DETAILED TOTALS:")
+    print("")
 
-    print "-----------------------------------------------------------"
-    print '{: <20} {: >12} {: >12} {: >12}'.format( "Field",
+    print("-----------------------------------------------------------")
+    print(('{: <20} {: >12} {: >12} {: >12}'.format( "Field",
                                                     "Total Hours",
                                                     "Total Rate",
-                                                    "Total" )
-    print "-----------------------------------------------------------"
+                                                    "Total" )))
+    print("-----------------------------------------------------------")
     for s in summary:
-        print '{: <20} {: >12.2f} {: >12.2f} {: >12.2f}'.format( s,
+        print(('{: <20} {: >12.2f} {: >12.2f} {: >12.2f}'.format( s,
                                                                  summary[s]['hours'],
                                                                  summary[s]['rate'],
-                                                                 summary[s]['current'] )
-    print ""
+                                                                 summary[s]['current'] )))
+    print("")
 
 
 def savePayStubs( stubs, redact=False ):
@@ -137,7 +137,7 @@ def savePayStubs( stubs, redact=False ):
             while os.path.isfile(filename + "_" + str(i) + ".html"):
                 i += 1
                 if i == 100:
-                    print "There seem to be a lot of duplicate files? Aborting."
+                    print("There seem to be a lot of duplicate files? Aborting.")
                     return -1
             filename += '_' + str(i)
 
@@ -152,28 +152,29 @@ def savePayStubs( stubs, redact=False ):
 
 def yesno( x ):
     while True:
-        resp = raw_input(x)
+        resp = input(x)
         if( resp.lower() == 'y' ):
             return True
         elif( resp.lower() == 'n' ):
             return False
         else:
-            print "  Invalid response."
+            print("  Invalid response.")
 
 def get_date( x, fmt='%m/%d/%Y' ):
     while True:
         try:
-            resp = raw_input(x) or datetime.today().strftime(fmt)
+            #resp = eval(input(x)) or datetime.today().strftime(fmt)
+            resp = input(x) or datetime.today().strftime(fmt)
             return datetime.strptime(resp, fmt)
         except ValueError:
-            print "  Invalid date or date format provided."
+            print("  Invalid date or date format provided.")
 
 def main():
 
-    print ""
-    print "Print a summary of all pay stubs between the given dates."
-    print "Optionally save off the pay stubs and redacted pay stubs."
-    print ""
+    print("")
+    print("Print a summary of all pay stubs between the given dates.")
+    print("Optionally save off the pay stubs and redacted pay stubs.")
+    print("")
 
     while True:
         startdate = get_date("Start date (MM/DD/YYYY): ", '%m/%d/%Y')
@@ -181,22 +182,22 @@ def main():
         if( startdate <= enddate ):
             break
         else:
-            print "  Invalid date range. Start date must be before or equal to end date."
+            print("  Invalid date range. Start date must be before or equal to end date.")
 
     savestubs = yesno("Save pay stubs? [Y/n] ")
     if( savestubs ):
         saveredacted = yesno("Save redacted pay stubs? [Y/n] ")
         if( saveredacted ):
             # Deleting the sensitive information is an exercise for the reader ...
-            print "  WARNING: redacted pay stubs are intended to be printed. Although"
-            print "           it is blacked out, the sensitive information is still"
-            print "           present in the document."
+            print("  WARNING: redacted pay stubs are intended to be printed. Although")
+            print("           it is blacked out, the sensitive information is still")
+            print("           present in the document.")
             saveredacted = yesno("  Do you acknowledge and accept the above warning? [Y/n] ")
 
-    print "PaycheckRecords.com Credentials:"
+    print("PaycheckRecords.com Credentials:")
 
     while True:
-        username = raw_input("  Username: ")
+        username = input("  Username: ")
         if( username != "" ):
             break
 
@@ -205,7 +206,7 @@ def main():
         if( password != "" ):
             break
 
-    print ""
+    print("")
 
     paycheckinst = paycheckrecords(username, password)
 
diff --git a/paycheckrecords/__init__.py b/paycheckrecords/__init__.py
index 287f67e..b847350 100644
--- a/paycheckrecords/__init__.py
+++ b/paycheckrecords/__init__.py
@@ -1,2 +1,2 @@
-import paystub
-from paycheckrecords import *
+from . import paystub
+from .paycheckrecords import *
diff --git a/paycheckrecords/paycheckrecords.py b/paycheckrecords/paycheckrecords.py
index e4e134a..2aabc3b 100755
--- a/paycheckrecords/paycheckrecords.py
+++ b/paycheckrecords/paycheckrecords.py
@@ -1,14 +1,14 @@
 from getpass import getpass
 import threading
-import mechanize
+import mechanicalsoup
 from bs4 import BeautifulSoup
-from paystub import paystub
+from .paystub import paystub
 from datetime import datetime
 from datetime import timedelta
 
 
 class paycheckrecords:
-    _br = mechanize.Browser()
+    _br = mechanicalsoup.StatefulBrowser()
     _browserSem = threading.Semaphore()
     _thread = None
     _stop = False
@@ -16,14 +16,14 @@ class paycheckrecords:
     _threadSleep = threading.Event()
 
     def __init__(self, username, password):
-        self._br.set_handle_robots(False)
+        #self._br.set_handle_robots(False)
         self._br.open("https://www.paycheckrecords.com")
-        self._br.select_form(name="Login_Form")
+        self._br.select_form()
 
-        self._br.form["userStrId"] = username
-        self._br.form["password"] = password
+        self._br["userStrId"] = username
+        self._br["password"] = password
 
-        self._br.submit()
+        self._br.submit_selected()
 
         self._thread = threading.Thread(target=self.preventTimeOut)
         self._thread.start()
@@ -32,7 +32,7 @@ def preventTimeOut(self):
         while not self._stop:
             self._browserSem.acquire()
 #            print "aquired lock"
-            url = self._br.geturl()
+            url = self._br.get_url()
             #print "url = ", url
             self._br.open(url)
 #            print "refreshed"
@@ -46,10 +46,10 @@ def preventTimeOut(self):
 
     def getLatestPayStub(self):
         self._browserSem.acquire()
-        originalurl = self._br.geturl()
+        originalurl = self._br.get_url()
         paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp")
 
-        ret = self._getPaystubsFromTable(paystubResponse.read(), range(1, 2))
+        ret = self._getPaystubsFromTable(paystubResponse.read(), list(range(1, 2)))
 
         self._br.open(originalurl)
         self._browserSem.release()
@@ -57,13 +57,13 @@ def getLatestPayStub(self):
 
     def getPayStubsInRange(self, startDate, endDate, sequence = 0):
         self._browserSem.acquire()
-        originalurl = self._br.geturl()
+        originalurl = self._br.get_url()
         paystubResponse = self._br.open("https://www.paycheckrecords.com/in/paychecks.jsp")
-        self._br.select_form(name="dateSelect")
-        self._br.form["startDate"] = startDate.strftime("%m/%d/%Y")
-        self._br.form["endDate"] = endDate.strftime("%m/%d/%Y")
-        paystubResponse = self._br.submit()
-        ret = self._getPaystubsFromTable(paystubResponse.read(),sequence)
+        self._br.select_form("#dateSelect")
+        self._br["startDate"] = startDate.strftime("%m/%d/%Y")
+        self._br["endDate"] = endDate.strftime("%m/%d/%Y")
+        paystubResponse = self._br.submit_selected()
+        ret = self._getPaystubsFromTable(paystubResponse.text,sequence)
 
         self._br.open(originalurl)
         self._browserSem.release()
@@ -113,27 +113,27 @@ def _getPaystubsFromTable(self, html, sequence, GetHtml = True):
 
         for col in headerCols:
             colName = col.string
-            if colName == u'Pay Date' and DateIndex == -1:
+            if colName == 'Pay Date' and DateIndex == -1:
                 DateIndex = i
-            elif colName == u'Total Pay' and TotalIndex == -1:
+            elif colName == 'Total Pay' and TotalIndex == -1:
                 TotalIndex = i
-            elif colName == u'Net Pay' and NetIndex == -1:
+            elif colName == 'Net Pay' and NetIndex == -1:
                 NetIndex = i
             i = i + 1
         if sequence == 0:
-            sequence = range(1, len(payrows))
+            sequence = list(range(1, len(payrows)))
         for index in sequence:
             paystubHtml = None
             rowCols = payrows[index].findAll('td')
             rowDate = rowCols[DateIndex].a.string.strip()
-            rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$").translate(dict.fromkeys(map(ord,','),None)))
-            rowNetPay = float(rowCols[NetIndex].string.strip().strip("$").translate(dict.fromkeys(map(ord,','),None)))
+            rowTotalPay = float(rowCols[TotalIndex].string.strip().strip("$").translate(dict.fromkeys(list(map(ord,',')),None)))
+            rowNetPay = float(rowCols[NetIndex].string.strip().strip("$").translate(dict.fromkeys(list(map(ord,',')),None)))
             tmpDateTime = datetime.strptime(rowDate, '%m/%d/%Y')
             if GetHtml:
-                paystubResponse = self._br.open(rowCols[DateIndex].a['href'])
-                paystubHtml = paystubResponse.read()
+                paystubResponse = self._br.open_relative(rowCols[DateIndex].a['href'])
+                paystubHtml = paystubResponse.text
                 stubDetails = self._getPayStubDetails(paystubHtml)
-                self._br.back()
+                #self._br.back()
             tmpPayStub = paystub(tmpDateTime, rowTotalPay, rowNetPay, stubDetails, paystubHtml)
             ret.append(tmpPayStub)