diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..2f3651f Binary files /dev/null and b/.DS_Store differ diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml new file mode 100644 index 0000000..947de61 --- /dev/null +++ b/.github/workflows/linting.yaml @@ -0,0 +1,33 @@ +name: Linting Linux + +on: + pull_request: + +jobs: + linting: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: + - "3.11" + + name: linting + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version : ${{ matrix.python-version }} + + - name: Install flake8 + run: | + python -m pip install flake8 + + - name: Check format without making corrections + run: | + flake8 diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml new file mode 100644 index 0000000..ed47eb8 --- /dev/null +++ b/.github/workflows/unit-test.yaml @@ -0,0 +1,37 @@ +name: Unit Test Linux + +on: push + +jobs: + unit-test: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: + - "3.11" + + name: unit-test + runs-on: ${{ matrix.os }} + if: ${{ github.event.workflow_run.conclusion == 'success' }} + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version : ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install -r requirements.txt + + - name: Check if installed packages confirm with requirements.txt + run: | + pip freeze -r requirements.txt + + - name: Run tests + run: | + python -m unittest discover -v diff --git a/.gitignore b/.gitignore index 68bc17f..dffe030 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Act bin file for local testing +bin/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -158,3 +161,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +playwright/.auth +screen-shots diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..254e642 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +test_all: + python -m unittest discover -v + +test_replit_scrapper: + python -m unittest ./tests/test_replit_scrapper.py + +test_github_archiver: + python -m unittest ./tests/test_github_archiver.py + +lint: + flake8 \ No newline at end of file diff --git a/README.md b/README.md index ced3d09..d3474c1 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ -# Helper app +# Helper app + +[![Unit Test Linux](https://github.com/Python-Dojo/Dojo-Helper-App/actions/workflows/unit-test.yaml/badge.svg?branch=main)](https://github.com/Python-Dojo/Dojo-Helper-App/actions/workflows/unit-test.yaml) + This is an app to help the hosts of the fortnightly Dojo hosts do the common tasks quicky and effectively. This will include making the replit repos, placing invite links in the discord channel, coping the written code to the github archive and sending a link in the discord channel. Updates and tickets can be found on the projects page for bounty hunters to complete. diff --git a/funcs/__init__.py b/funcs/__init__.py new file mode 100644 index 0000000..5a86c79 --- /dev/null +++ b/funcs/__init__.py @@ -0,0 +1 @@ +# placeholder file to modularize (to package) .py files under the directory "tests", making the files importable. diff --git a/funcs/func.py b/funcs/func.py new file mode 100644 index 0000000..8d531d0 --- /dev/null +++ b/funcs/func.py @@ -0,0 +1,2 @@ +def sum(x: float, y: float) -> float: + return x + y diff --git a/funcs/github_archiver.py b/funcs/github_archiver.py new file mode 100644 index 0000000..3e4c4dc --- /dev/null +++ b/funcs/github_archiver.py @@ -0,0 +1,121 @@ +from abc import ABC, abstractmethod +import os +from github import Github, Auth, InputGitTreeElement +import base64 + + +class GithubArchiverInterface(ABC): + @abstractmethod + def identify_target_files(): + """ + Read list of target files to be pushed to github, excluding replit's system files. + Raise error if target folder does not exist or is empty. + """ + pass + + @abstractmethod + def commit_to_github(): + """ + Commit target files to github. + """ + pass + + +class GithubArchiver(GithubArchiverInterface): + + def __init__(self, project_name, github_access_token, commit_message="Auto-archive") -> None: + self._project_name = project_name + self._file_paths = dict() + self._file_list = list() + self._commit_sha = "" + self.__github_access_token = github_access_token + self._commit_message = commit_message + + def get_project_name(self) -> str: + return self._project_name + + def identify_target_files(self) -> None: + print("GithubArchiver: Begin to parse target files...") + download_folder_path = "./screen-shots" + extracted_folder_path = os.path.join(download_folder_path, self.get_project_name()) + assert os.path.isdir(extracted_folder_path) is True, "Target folder does not exist" + assert len(os.listdir(extracted_folder_path)) != 0, "Target folder is empty" + + replit_junk = [ + '.cache', + '.upm', + '.replit', + 'poetry.lock', + 'pyproject.toml', + 'replit_zip_error_log.txt', + 'replit.nix', + ] + + # Walk through the directory and its subdirectories + for root, dirs, files in os.walk(extracted_folder_path): + for file in files: + file_full_path = os.path.join(root, file) + file_relative_path = file_full_path.replace(extracted_folder_path, self.get_project_name()) + if not any(excluded in file_relative_path for excluded in replit_junk): + self._file_paths[file_relative_path] = file_full_path + self._file_list.append(file_relative_path) + + print("GithubArchiver: Target files are parsed") + + def get_target_files(self) -> list: + return self._file_list + + def commit_to_github(self) -> None: + print("GithubArchiver: Begin to upload files to Github...") + assert len(self._file_list) != 0, "Target files are not identified" + auth = Auth.Token(self.__github_access_token) + g = Github(auth=auth) + repo = g.get_user().get_repo('The-Archive') + main_branch = repo.get_branch("main") + main_tree = repo.get_git_tree(sha=main_branch.commit.sha) + + tree = list() + for file_relative_path, file_full_path in self._file_paths.items(): + + with open(file_full_path, "rb") as file: + file_content = file.read() + + file_content_based64 = base64.b64encode(file_content) + + blob = repo.create_git_blob( + content=file_content_based64.decode('utf-8'), + encoding="base64" + ) + + tree.append( + InputGitTreeElement( + path=file_relative_path, + mode="100644", + type="blob", + sha=blob.sha, + ) + ) + + new_tree = repo.create_git_tree( + tree=tree, + base_tree=main_tree + ) + + commit = repo.create_git_commit( + message=self._commit_message, + tree=repo.get_git_tree(sha=new_tree.sha), + parents=[repo.get_git_commit(main_branch.commit.sha)], + ) + + archive_ref = repo.get_git_ref(ref='heads/main') + print(f"GithubArchiver: Archive_ref is {archive_ref}") + self._commit_sha = commit.sha + + # Commit to Github + archive_ref.edit(sha=commit.sha) + print("GithubArchiver: Upload complete") + + g.close() + + def get_commit_sha(self) -> str: + return self._commit_sha diff --git a/funcs/replit_scrapper.py b/funcs/replit_scrapper.py new file mode 100644 index 0000000..00c5dd2 --- /dev/null +++ b/funcs/replit_scrapper.py @@ -0,0 +1,125 @@ +from playwright.sync_api import sync_playwright +from playwright_stealth import stealth_sync + + +class ReplitScrapper(): + user_agent = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/116.0.0.0 " + "Safari/537.36 " + "Edg/116.0.1938.81" + ) + + def __init__(self, login_name, login_password): + self.__login_name = login_name + self.__login_password = login_password + self._replit_url = None + self._downloaded_filename = None + + def set_replit_url(self, replit_url) -> None: + if replit_url is None: + raise ValueError + self._replit_url = replit_url + + def get_replit_url(self) -> str: + if self._replit_url is None: + raise ValueError("Missing replit_url") + return self._replit_url + + def _set_downloaded_filename(self, filename) -> None: + if filename is None: + raise ValueError("ReplitScrapper._set_downloaded_filename() argument is None") + self._downloaded_filename = filename + + def get_downloaded_filename(self) -> str: + if self._downloaded_filename is None: + raise ValueError("Missing downloaded_filename") + return self._downloaded_filename + + def _visit_replit_repo(self, page) -> None: + response = page.goto(self.get_replit_url(), wait_until="domcontentloaded") + if response.status != 200: + if response.status == 404: + print(f"response.status = {response.status}") + raise ValueError("Invalid replit_url") + else: + print(f"response.status = {response.status}") + raise ValueError("ReplitScrapper._visit_replit_repo() something other than 404 happened") + + def _login_replit(self, page) -> None: + # Login + page.goto('https://replit.com/login', wait_until="domcontentloaded") + page.screenshot(path="./screen-shots/replit.png") + url_init = "https://identitytoolkit.googleapis.com/v1/accounts" + with page.expect_response(lambda response: url_init in response.url) as response_info: + page.locator( + "xpath=/html/body/div[1]/div/div[2]/div/main/div[2]/div/form/div[1]/input" + ).fill(self.__login_name) + page.locator( + "xpath=/html/body/div[1]/div/div[2]/div/main/div[2]/div/form/div[2]/div/input" + ).fill(self.__login_password) + page.locator( + "xpath=/html/body/div[1]/div/div[2]/div/main/div[2]/div/form/div[3]/button" + ).click() + response = response_info.value + if response.status != 200: + print(response) + if response.status == 400: + print(f"response.status = {response.status}") + raise ValueError("Invalid login credentials") + else: + print(f"response.status = {response.status}") + raise ValueError("ReplitScrapper._login_replit() something other than 401 happened") + page.wait_for_url("https://replit.com/~") + page.screenshot(path="./screen-shots/replit_after_login.png") + + def _download_as_zip(self, page) -> None: + # Wait for page load + page.locator( + "xpath=/html/body/div[1]/div[1]/div[1]/div[2]/div/div[1]/div/div[3]/div/div[1]/button/div/span" + ).wait_for() + while page.locator( + "xpath=/html/body/div[1]/div[1]/div[1]/div[2]/header/div[2]/button" + ).text_content() != "Run": + print(page.locator( + "xpath=/html/body/div[1]/div[1]/div[1]/div[2]/header/div[2]/button" + ).text_content()) + page.wait_for_timeout(2000) + page.screenshot(path="./screen-shots/target_page.png") + + # Begin downloading + page.locator( + "xpath=/html/body/div[1]/div[1]/div[1]/div[2]/div/div[1]/div/div[2]/div[1]/div[1]/div/button[3]" + ).click() + with page.expect_download() as download_info: + page.locator( + "xpath=/html/body/div[@class='css-1o92kwk']//div[@id='item-4']//div[@class='css-1l2rn59']" + ).click() + download = download_info.value + self._set_downloaded_filename(download.suggested_filename) + download.save_as(f"./screen-shots/{download.suggested_filename}") + + def run(self): + print("ReplitScrapper: Begin downloading repo files...") + with sync_playwright() as p: + # Context setup + browser = p.chromium.launch(slow_mo=50) + # browser = p.chromium.launch(headless=False + # , slow_mo=50 + # ) + context = browser.new_context(user_agent=ReplitScrapper.user_agent) + page = context.new_page() + stealth_sync(page) + + # Login replit + self._login_replit(page) + + # Download repo files as zip + self._visit_replit_repo(page) + self._download_as_zip(page) + + # Clean-up + context.close() + browser.close() + print("ReplitScrapper: Download complete") diff --git a/main.py b/main.py new file mode 100644 index 0000000..c56f249 --- /dev/null +++ b/main.py @@ -0,0 +1,31 @@ +from funcs.replit_scrapper import ReplitScrapper +from funcs.github_archiver import GithubArchiver +import os +import zipfile +from dotenv import load_dotenv +load_dotenv() + +WDIR = os.path.abspath(os.path.dirname(__name__)) + +if __name__ == "__main__": + test_url = "https://replit.com/@pythondojoarchi/SlipperyGargantuanDebuggers" + project_name = "SlipperyGargantuanDebuggers" + + # Download repo files as zip + scrapper = ReplitScrapper(login_name=os.environ['EMAIL'], login_password=os.environ['PASSWORD']) + scrapper.set_replit_url(test_url) + scrapper.run() + + # Unzip downloaded zip file + download_folder_path = os.path.join(WDIR, "screen-shots") + full_file_path = os.path.join(download_folder_path, project_name+".zip") + extracted_folder_path = os.path.join(download_folder_path, project_name) + zipfile.ZipFile(full_file_path).extractall(extracted_folder_path) + + # Commit target files to Github + archiver = GithubArchiver( + project_name=project_name, + github_access_token=os.environ['GITHUB_ACCESS_TOKEN'] + ) + archiver.identify_target_files() + archiver.commit_to_github() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d05daec --- /dev/null +++ b/requirements.txt @@ -0,0 +1,59 @@ +appnope==0.1.3 +asttokens==2.4.1 +certifi==2023.11.17 +cffi==1.16.0 +charset-normalizer==3.3.2 +comm==0.2.0 +cryptography==41.0.7 +debugpy==1.8.0 +decorator==5.1.1 +Deprecated==1.2.14 +executing==2.0.1 +flake8==6.1.0 +greenlet==3.0.1 +idna==3.6 +iniconfig==2.0.0 +ipykernel==6.27.1 +ipython==8.19.0 +jedi==0.19.1 +jupyter_client==8.6.0 +jupyter_core==5.5.1 +matplotlib-inline==0.1.6 +mccabe==0.7.0 +nest-asyncio==1.5.8 +packaging==23.2 +parso==0.8.3 +pexpect==4.9.0 +platformdirs==4.1.0 +playwright==1.40.0 +playwright-stealth==1.0.6 +pluggy==1.3.0 +prompt-toolkit==3.0.43 +psutil==5.9.7 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycodestyle==2.11.0 +pycparser==2.21 +pyee==11.0.1 +pyflakes==3.1.0 +PyGithub==2.1.1 +Pygments==2.17.2 +PyJWT==2.8.0 +PyNaCl==1.5.0 +pytest==7.4.3 +pytest-base-url==2.0.0 +pytest-playwright==0.4.3 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-slugify==8.0.1 +pyzmq==25.1.2 +requests==2.31.0 +six==1.16.0 +stack-data==0.6.3 +text-unidecode==1.3 +tornado==6.4 +traitlets==5.14.0 +typing_extensions==4.9.0 +urllib3==2.1.0 +wcwidth==0.2.12 +wrapt==1.16.0 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..66da313 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,10 @@ +[flake8] +max-line-length = 120 +exclude = + .git, + __pycache__, + docs/source/conf.py, + old, + build, + dist, + venv diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..5a86c79 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# placeholder file to modularize (to package) .py files under the directory "tests", making the files importable. diff --git a/tests/test_func.py b/tests/test_func.py new file mode 100644 index 0000000..8618217 --- /dev/null +++ b/tests/test_func.py @@ -0,0 +1,12 @@ +import unittest +from funcs import func + + +class Test(unittest.TestCase): + + def test_sum(self): + self.assertEqual(func.sum(2, 3), 5) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_github_archiver.py b/tests/test_github_archiver.py new file mode 100644 index 0000000..75682dc --- /dev/null +++ b/tests/test_github_archiver.py @@ -0,0 +1,78 @@ +# from github import Github, Auth +import unittest +from funcs.github_archiver import GithubArchiver +import os +from dotenv import load_dotenv +load_dotenv() + + +class Test(unittest.TestCase): + + def test_archiver_raise_error_if_target_folder_does_not_exist(self): + archiver = GithubArchiver( + project_name="directory_not_exist", + github_access_token=os.environ['GITHUB_ACCESS_TOKEN'] + ) + with self.assertRaises(AssertionError) as ctx_manager: + archiver.identify_target_files() + self.assertEqual(str(ctx_manager.exception), "Target folder does not exist") + + def test_archiver_raise_error_if_target_folder_is_empty(self): + archiver = GithubArchiver( + project_name="empty_folder", + github_access_token=os.environ['GITHUB_ACCESS_TOKEN'] + ) + with self.assertRaises(AssertionError) as ctx_manager: + archiver.identify_target_files() + self.assertEqual(str(ctx_manager.exception), "Target folder is empty") + + # def test_archiver_return_list_of_target_files(self): + # target_list = [ + # "SlipperyGargantuanDebuggers/test-README.md", + # "SlipperyGargantuanDebuggers/road.jpg", + # "SlipperyGargantuanDebuggers/test.py", + # "SlipperyGargantuanDebuggers/main.py", + # "SlipperyGargantuanDebuggers/Group-1/test-1.txt", + # "SlipperyGargantuanDebuggers/Group-2/test-2.txt", + # ] + + # archiver = GithubArchiver( + # project_name="SlipperyGargantuanDebuggers", + # xxwgithub_access_token=os.environ['GITHUB_ACCESS_TOKEN'] + # ) + # archiver.identify_target_files() + # # https://stackoverflow.com/questions/12813633/how-to-assert-two-list-contain-the-same-elements-in-python + # self.assertCountEqual(archiver.get_target_files(), target_list) + + # def test_archiver_raise_error_if_target_files_not_set(self): + # archiver = GithubArchiver( + # project_name="SlipperyGargantuanDebuggers", + # github_access_token=os.environ['GITHUB_ACCESS_TOKEN'] + # ) + # with self.assertRaises(AssertionError) as ctx_manager: + # archiver.commit_to_github() + + # self.assertEqual(str(ctx_manager.exception), "Target files are not identified") + + # def test_archiver_upload_target_files_to_github(self): + # archiver = GithubArchiver( + # project_name="SlipperyGargantuanDebuggers", + # github_access_token=os.environ['GITHUB_ACCESS_TOKEN'] + # ) + # archiver.identify_target_files() + # archiver.commit_to_github() + + # auth = Auth.Token(os.environ['GITHUB_ACCESS_TOKEN']) + # g = Github(auth=auth) + # repo = g.get_user().get_repo('The-Archive') + # commit = repo.get_commit(archiver.get_commit_sha()) + # target_list = list() + # for file in commit.files: + # target_list.append(file.filename) + # g.close() + + # self.assertCountEqual(archiver.get_target_files(), target_list) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_replit_scrapper.py b/tests/test_replit_scrapper.py new file mode 100644 index 0000000..34b902c --- /dev/null +++ b/tests/test_replit_scrapper.py @@ -0,0 +1,46 @@ +import unittest +from funcs.replit_scrapper import ReplitScrapper +# import os +from dotenv import load_dotenv +load_dotenv() + + +class Test(unittest.TestCase): + + def test_scrapper_raise_value_error_when_replit_url_not_set(self): + scrapper = ReplitScrapper(login_name=None, login_password=None) + with self.assertRaises(ValueError) as ctx_manager: + scrapper.get_replit_url() + self.assertEqual(str(ctx_manager.exception), 'Missing replit_url') + + def test_scrapper_return_replit_url(self): + test_url = "https://replit.com/@pythondojoarchi/SlipperyGargantuanDebuggers" + + scrapper = ReplitScrapper(login_name=None, login_password=None) + scrapper.set_replit_url(test_url) + self.assertEqual(scrapper.get_replit_url(), test_url) + + # Commented out to avoid replit acount freezes + # def test_scrapper_login_with_invalid_credentials(self): + # scrapper = ReplitScrapper(login_name = os.environ['EMAIL'], login_password = "ThisIsNotTheCorrectPassword") + # with self.assertRaises(ValueError) as ctx_manager: + # scrapper.run() + # self.assertEqual(str(ctx_manager.exception), 'Invalid login credentials') + + # def test_scrapper_download_repo_as_zip(self): + # test_url = "https://replit.com/@pythondojoarchi/SlipperyGargantuanDebuggers" + # target_zip_name = "SlipperyGargantuanDebuggers.zip" + # WDIR = os.path.abspath(os.path.dirname(__name__)) + # full_target_file_path = os.path.join(WDIR, "screen-shots", target_zip_name) + # print(full_target_file_path) + + # scrapper = ReplitScrapper(login_name=os.environ['EMAIL'], login_password=os.environ['PASSWORD']) + # scrapper.set_replit_url(test_url) + # scrapper.run() + + # print(scrapper.get_downloaded_filename()) + # self.assertTrue(os.path.exists(full_target_file_path)) + + +if __name__ == "__main__": + unittest.main()