From ebd18100dffe0ce5ba5f25a96d130e6f5451714c Mon Sep 17 00:00:00 2001 From: Yan Wen <60089662+uniquePaul@users.noreply.github.com> Date: Wed, 14 Jul 2021 20:58:50 -0700 Subject: [PATCH 1/5] refactor: PortalFetch crawler_download.py can accept multiple quarterValues now --- PortalFetch/crawler_download.py | 56 +++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/PortalFetch/crawler_download.py b/PortalFetch/crawler_download.py index 26193cd..d04ba96 100644 --- a/PortalFetch/crawler_download.py +++ b/PortalFetch/crawler_download.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- """Fetch course information from De Anza myportal. It requires file 'user.ini' to load the user's own user name and password. @@ -206,34 +207,42 @@ def waitUtilPageLoaded(driver, count): raise ElementNotVisibleException("Could not load the full page!") -def generateQuarterAndFilename(quarterValue): +def generateQuarterAndFilename(quarterValueStr): """Return quarter and filename. Args: quarterValue:the quarter_value in crawler.config Returns: - quarter str and filename str + quarter str list and filename str list """ - year = quarterValue[0:4] - quarterSwitcher = { - "1": "Summer", - "2": "Fall", - "3": "Winter", - "4": "Spring", - } - schoolSwitcher = { - "1": "Foothill", - "2": "De Anza", - } - school = schoolSwitcher.get(quarterValue[5], "") - quarter = quarterSwitcher.get(quarterValue[4], "") - if quarter == "Summer": - year = str(int(year)-1) - quarterOutput = year + " " + quarter + " " + school - if school == "De Anza": - school = "De_Anza" - fileNameOutput = year + "_" + quarter + "_" + school + "_courseData.json" + n = 6 + quarterValueList = [quarterValueStr[i:i+n] for i in range(0, len(quarterValueStr), n)] + fileNameOutput = [] + quarterOutput = [] + for quarterValue in quarterValueList: + year = quarterValue[0:4] + quarterSwitcher = { + "1": "Summer", + "2": "Fall", + "3": "Winter", + "4": "Spring", + } + schoolSwitcher = { + "1": "Foothill", + "2": "De Anza", + } + school = schoolSwitcher.get(quarterValue[5], "") + quarter = quarterSwitcher.get(quarterValue[4], "") + if quarter == "Summer": + year = str(int(year)-1) + + quarterOutput.append(year + " " + quarter + " " + school) + if school == "De Anza": + school = "De_Anza" + + fileNameOutput.append(year + "_" + quarter + "_" + school + "_courseData.json") + return quarterOutput, fileNameOutput @@ -278,9 +287,10 @@ def main(): # Save searched courses html = saveResult(driver) # get quarter and filename based on quarter_value in crawler.config - quarter, filename = generateQuarterAndFilename(value) + quarter_list, filename_list = generateQuarterAndFilename(value) + for i in range(0, len(filename_list)): + DataProcess().data_process(html, filename_list[i], quarter_list[i]) - DataProcess().data_process(html, filename, quarter) logging.info("Download Finished!") except Exception as e: logger.error(repr(e)) From f2444ee78febe6fc00460e9e27d8f36fc81704a3 Mon Sep 17 00:00:00 2001 From: Yan Wen <60089662+uniquePaul@users.noreply.github.com> Date: Sun, 18 Jul 2021 13:22:59 -0700 Subject: [PATCH 2/5] refactor: fixed variable names and adding comments --- PortalFetch/crawler_download.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/PortalFetch/crawler_download.py b/PortalFetch/crawler_download.py index d04ba96..07b5aa3 100644 --- a/PortalFetch/crawler_download.py +++ b/PortalFetch/crawler_download.py @@ -216,8 +216,8 @@ def generateQuarterAndFilename(quarterValueStr): quarter str list and filename str list """ - n = 6 - quarterValueList = [quarterValueStr[i:i+n] for i in range(0, len(quarterValueStr), n)] + singleQuarterValueLength = 6 + quarterValueList = [quarterValueStr[i:i+n] for i in range(0, len(quarterValueStr), singleQuarterValueLength)] fileNameOutput = [] quarterOutput = [] for quarterValue in quarterValueList: @@ -232,8 +232,13 @@ def generateQuarterAndFilename(quarterValueStr): "1": "Foothill", "2": "De Anza", } + + # the number in index 5 indicates foothill: 1 or De Anza: 2 school = schoolSwitcher.get(quarterValue[5], "") + + # the number in index 4 indicates quarter starts from 1(summer) to 4(Spring) quarter = quarterSwitcher.get(quarterValue[4], "") + if quarter == "Summer": year = str(int(year)-1) @@ -288,7 +293,7 @@ def main(): html = saveResult(driver) # get quarter and filename based on quarter_value in crawler.config quarter_list, filename_list = generateQuarterAndFilename(value) - for i in range(0, len(filename_list)): + for i in range(len(filename_list)): DataProcess().data_process(html, filename_list[i], quarter_list[i]) logging.info("Download Finished!") From 37c8cb8b1c8f81a09bfd7666941af2d4824ce00e Mon Sep 17 00:00:00 2001 From: uniquePaul Date: Tue, 27 Jul 2021 08:33:23 -0700 Subject: [PATCH 3/5] refactor: able to download series of quartervalue example input: 202111_202022 --- PortalFetch/crawler_download.py | 142 ++++++++++++++------------------ 1 file changed, 63 insertions(+), 79 deletions(-) diff --git a/PortalFetch/crawler_download.py b/PortalFetch/crawler_download.py index 07b5aa3..b24dbdc 100644 --- a/PortalFetch/crawler_download.py +++ b/PortalFetch/crawler_download.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """Fetch course information from De Anza myportal. It requires file 'user.ini' to load the user's own user name and password. @@ -111,6 +110,7 @@ def openSearchPage(driver): driver.switch_to.window(name) # Waiting for elements in the page to appear, indicating that the page has finished loading waitUtilPageLoaded(driver, 30) + def findAppsMenu(driver): @@ -207,47 +207,34 @@ def waitUtilPageLoaded(driver, count): raise ElementNotVisibleException("Could not load the full page!") -def generateQuarterAndFilename(quarterValueStr): +def generateQuarterAndFilename(quarterValue): """Return quarter and filename. Args: quarterValue:the quarter_value in crawler.config Returns: - quarter str list and filename str list + quarter str and filename str """ - singleQuarterValueLength = 6 - quarterValueList = [quarterValueStr[i:i+n] for i in range(0, len(quarterValueStr), singleQuarterValueLength)] - fileNameOutput = [] - quarterOutput = [] - for quarterValue in quarterValueList: - year = quarterValue[0:4] - quarterSwitcher = { - "1": "Summer", - "2": "Fall", - "3": "Winter", - "4": "Spring", - } - schoolSwitcher = { - "1": "Foothill", - "2": "De Anza", - } - - # the number in index 5 indicates foothill: 1 or De Anza: 2 - school = schoolSwitcher.get(quarterValue[5], "") - - # the number in index 4 indicates quarter starts from 1(summer) to 4(Spring) - quarter = quarterSwitcher.get(quarterValue[4], "") - - if quarter == "Summer": - year = str(int(year)-1) - - quarterOutput.append(year + " " + quarter + " " + school) - if school == "De Anza": - school = "De_Anza" - - fileNameOutput.append(year + "_" + quarter + "_" + school + "_courseData.json") - + year = quarterValue[0:4] + quarterSwitcher = { + "1": "Summer", + "2": "Fall", + "3": "Winter", + "4": "Spring", + } + schoolSwitcher = { + "1": "Foothill", + "2": "De Anza", + } + school = schoolSwitcher.get(quarterValue[5], "") + quarter = quarterSwitcher.get(quarterValue[4], "") + if quarter == "Summer": + year = str(int(year)-1) + quarterOutput = year + " " + quarter + " " + school + if school == "De Anza": + school = "De_Anza" + fileNameOutput = year + "_" + quarter + "_" + school + "_courseData.json" return quarterOutput, fileNameOutput @@ -257,50 +244,47 @@ def main(): Login in De Anza myportal using username and password. click Apps-Lookup Classes-Select by term -submit-Advanced Search-in Subject, select all-Section search-Download all the course infromation-Save in an excel """ - driver = webdriver.Chrome(ChromeDriverManager().install()) - login_myportal(driver) - - # Wait for the 'list-group-item' can be found and clicked - web_driver_counter = 400 - list_group_item = None - while web_driver_counter: + quartervalue = parser.get('config', 'quarter_value') + quartervalueList = quartervalue.split('_') + for value in quartervalueList: + driver = webdriver.Chrome(ChromeDriverManager().install()) + login_myportal(driver) + # Wait for the 'list-group-item' can be found and clicked + web_driver_counter = 400 + list_group_item = None + while web_driver_counter: + try: + list_group_item = driver.find_element_by_class_name("list-group-item") + except: + pass + web_driver_counter -= 1 + if not list_group_item: + logger.error("Could not find list-group item!") + raise NoSuchElementException("Could not find list-group item!") try: - list_group_item = driver.find_element_by_class_name("list-group-item") - except: - pass - web_driver_counter -= 1 - if not list_group_item: - logger.error("Could not find list-group item!") - raise NoSuchElementException("Could not find list-group item!") - - try: - # Course search page from homepage after login - openSearchPage(driver) - selectelement = driver.find_element_by_tag_name("select") - # Select specified course - quarter_downlist = Select(selectelement) - value = parser.get('config', 'quarter_value') - quarter_downlist.select_by_value(value) - # click 'Submit' button - locateButton(driver, "submit") - # click 'Advance Search' button - locateButton(driver, "advance") - # Wait while the page is loading - waitUtilPageLoaded(driver, 30) - # Go to the advanced options page and start filling in various search terms - fillAdvanceSearch(driver) - # Save searched courses - html = saveResult(driver) - # get quarter and filename based on quarter_value in crawler.config - quarter_list, filename_list = generateQuarterAndFilename(value) - for i in range(len(filename_list)): - DataProcess().data_process(html, filename_list[i], quarter_list[i]) - - logging.info("Download Finished!") - except Exception as e: - logger.error(repr(e)) - sys.exit(-1) - - + # Course search page from homepage after login + openSearchPage(driver) + selectelement = driver.find_element_by_tag_name("select") + # Select specified course + quarter_downlist = Select(selectelement) + quarter_downlist.select_by_value(value) + # click 'Submit' button + locateButton(driver, "submit") + # click 'Advance Search' button + locateButton(driver, "advance") + # Wait while the page is loading + waitUtilPageLoaded(driver, 30) + # Go to the advanced options page and start filling in various search terms + fillAdvanceSearch(driver) + # Save searched courses + html = saveResult(driver) + # get quarter and filename based on quarter_value in crawler.config + quarter, filename = generateQuarterAndFilename(value) + DataProcess().data_process(html, filename, quarter) + logging.info("Download Finished!") + except Exception as e: + logger.error(repr(e)) + sys.exit(-1) + if __name__ == "__main__": main() From 38d3396f6b52fd84916048f2d624b9582d859c83 Mon Sep 17 00:00:00 2001 From: uniquePaul Date: Tue, 27 Jul 2021 08:34:25 -0700 Subject: [PATCH 4/5] refactor: able to download series of quartervalues example input: 202111_202022 --- PortalFetch/crawler_download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PortalFetch/crawler_download.py b/PortalFetch/crawler_download.py index b24dbdc..8a8ef58 100644 --- a/PortalFetch/crawler_download.py +++ b/PortalFetch/crawler_download.py @@ -110,7 +110,6 @@ def openSearchPage(driver): driver.switch_to.window(name) # Waiting for elements in the page to appear, indicating that the page has finished loading waitUtilPageLoaded(driver, 30) - def findAppsMenu(driver): @@ -285,6 +284,7 @@ def main(): except Exception as e: logger.error(repr(e)) sys.exit(-1) - + + if __name__ == "__main__": main() From 3e6f0de8a64fa90ac755138b94f15f5c503105dc Mon Sep 17 00:00:00 2001 From: uniquePaul Date: Tue, 27 Jul 2021 16:05:15 -0700 Subject: [PATCH 5/5] refactor: able to download based on series quartervalue example input: 202122_202121 --- PortalFetch/crawler_download.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/PortalFetch/crawler_download.py b/PortalFetch/crawler_download.py index 8a8ef58..7c01fbb 100644 --- a/PortalFetch/crawler_download.py +++ b/PortalFetch/crawler_download.py @@ -30,7 +30,6 @@ options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-ssl-errors') - def locateButton(driver, button): """Search a specific button and click it if found. @@ -59,7 +58,6 @@ def locateButton(driver, button): return raise NoSuchElementException(button + " element is not found!") - def login_myportal(driver): """Open myportal website and login. @@ -84,7 +82,6 @@ def login_myportal(driver): except: raise KeyError("Login failed, please check input username/password!") - def openSearchPage(driver): """Click 'Apps'->'Look Up Classes' and open search page. @@ -111,7 +108,6 @@ def openSearchPage(driver): # Waiting for elements in the page to appear, indicating that the page has finished loading waitUtilPageLoaded(driver, 30) - def findAppsMenu(driver): """Find Apps menu. @@ -133,7 +129,6 @@ def findAppsMenu(driver): if not appMenu: raise NoSuchElementException("Apps menu is not found!") - def lookUpClasses(driver): """Find app list. @@ -154,7 +149,6 @@ def lookUpClasses(driver): return classes raise NoSuchElementException("No Look Up Classes feature found in the app list!") - def fillAdvanceSearch(driver): """Go to the advanced options page and select all options in Subject list. @@ -173,7 +167,6 @@ def fillAdvanceSearch(driver): subjectListSelect.select_by_index(i) locateButton(driver, "section") - def saveResult(driver): """Save the results of courses to a html. @@ -187,7 +180,6 @@ def saveResult(driver): html = driver.page_source return html - def waitUtilPageLoaded(driver, count): """Wait until page loaded. @@ -205,7 +197,6 @@ def waitUtilPageLoaded(driver, count): return raise ElementNotVisibleException("Could not load the full page!") - def generateQuarterAndFilename(quarterValue): """Return quarter and filename. @@ -236,7 +227,6 @@ def generateQuarterAndFilename(quarterValue): fileNameOutput = year + "_" + quarter + "_" + school + "_courseData.json" return quarterOutput, fileNameOutput - def main(): """Download course information from De Anza myportal. @@ -285,6 +275,5 @@ def main(): logger.error(repr(e)) sys.exit(-1) - if __name__ == "__main__": main()