Python-Dojo · fungss · Aug 24, 2023 · Aug 25, 2023 · Aug 25, 2023 · Aug 25, 2023
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml
@@ -0,0 +1,33 @@
+name: Linting Linux
+
+on:
+  pull_request:
+
+jobs:
+  linting:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version:
+        - "3.11"
+
+    name: linting
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+            python-version : ${{ matrix.python-version }}
+
+      - name: Install flake8
+        run: | 
+          python -m pip install flake8
+
+      - name: Check format without making corrections
+        run: |
+          flake8
diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml
@@ -0,0 +1,37 @@
+name: Unit Test Linux
+
+on: push
+
+jobs:
+  unit-test:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version:
+        - "3.11"
+
+    name: unit-test
+    runs-on: ${{ matrix.os }}
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+            python-version : ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: | 
+          python -m pip install -r requirements.txt
+
+      - name: Check if installed packages confirm with requirements.txt
+        run: |
+          pip freeze -r requirements.txt
+
+      - name: Run tests
+        run: |
+          python -m unittest discover -v
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# Act bin file for local testing
+bin/ 
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -158,3 +161,6 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+playwright/.auth
+screen-shots
diff --git a/Makefile b/Makefile
@@ -0,0 +1,11 @@
+test_all:
+	python -m unittest discover -v
+
+test_replit_scrapper:
+	python -m unittest ./tests/test_replit_scrapper.py
+
+test_github_archiver:
+	python -m unittest ./tests/test_github_archiver.py
+
+lint:
+	flake8
diff --git a/README.md b/README.md
@@ -1,3 +1,6 @@
-# Helper app
+# Helper app 
+
+[![Unit Test Linux](https://github.com/Python-Dojo/Dojo-Helper-App/actions/workflows/unit-test.yaml/badge.svg?branch=main)](https://github.com/Python-Dojo/Dojo-Helper-App/actions/workflows/unit-test.yaml)
+
 This is an app to help the hosts of the fortnightly Dojo hosts do the common tasks quicky and effectively. This will include making the replit repos, placing invite links in the discord channel, coping the written code to the github archive and sending a link in the discord channel. 
 Updates and tickets can be found on the projects page for bounty hunters to complete. 
diff --git a/funcs/__init__.py b/funcs/__init__.py
@@ -0,0 +1 @@
+# placeholder file to modularize (to package) .py files under the directory "tests", making the files importable.
diff --git a/funcs/func.py b/funcs/func.py
@@ -0,0 +1,2 @@
+def sum(x: float, y: float) -> float:
+    return x + y
diff --git a/funcs/github_archiver.py b/funcs/github_archiver.py
@@ -0,0 +1,121 @@
+from abc import ABC, abstractmethod
+import os
+from github import Github, Auth, InputGitTreeElement
+import base64
+
+
+class GithubArchiverInterface(ABC):
+    @abstractmethod
+    def identify_target_files():
+        """
+        Read list of target files to be pushed to github, excluding replit's system files.
+        Raise error if target folder does not exist or is empty.
+        """
+        pass
+
+    @abstractmethod
+    def commit_to_github():
+        """
+        Commit target files to github.
+        """
+        pass
+
+
+class GithubArchiver(GithubArchiverInterface):
+
+    def __init__(self, project_name, github_access_token, commit_message="Auto-archive") -> None:
+        self._project_name = project_name
+        self._file_paths = dict()
+        self._file_list = list()
+        self._commit_sha = ""
+        self.__github_access_token = github_access_token
+        self._commit_message = commit_message
+
+    def get_project_name(self) -> str:
+        return self._project_name
+
+    def identify_target_files(self) -> None:
+        print("GithubArchiver: Begin to parse target files...")
+        download_folder_path = "./screen-shots"
+        extracted_folder_path = os.path.join(download_folder_path, self.get_project_name())
+        assert os.path.isdir(extracted_folder_path) is True, "Target folder does not exist"
+        assert len(os.listdir(extracted_folder_path)) != 0, "Target folder is empty"
+
+        replit_junk = [
+            '.cache',
+            '.upm',
+            '.replit',
+            'poetry.lock',
+            'pyproject.toml',
+            'replit_zip_error_log.txt',
+            'replit.nix',
+        ]
+
+        # Walk through the directory and its subdirectories
+        for root, dirs, files in os.walk(extracted_folder_path):
+            for file in files:
+                file_full_path = os.path.join(root, file)
+                file_relative_path = file_full_path.replace(extracted_folder_path, self.get_project_name())
+                if not any(excluded in file_relative_path for excluded in replit_junk):
+                    self._file_paths[file_relative_path] = file_full_path
+                    self._file_list.append(file_relative_path)
+
+        print("GithubArchiver: Target files are parsed")
+
+    def get_target_files(self) -> list:
+        return self._file_list
+
+    def commit_to_github(self) -> None:
+        print("GithubArchiver: Begin to upload files to Github...")
+        assert len(self._file_list) != 0, "Target files are not identified"
+        auth = Auth.Token(self.__github_access_token)
+        g = Github(auth=auth)
+        repo = g.get_user().get_repo('The-Archive')
+        main_branch = repo.get_branch("main")
+        main_tree = repo.get_git_tree(sha=main_branch.commit.sha)
+
+        tree = list()
+        for file_relative_path, file_full_path in self._file_paths.items():
+
+            with open(file_full_path, "rb") as file:
+                file_content = file.read()
+
+            file_content_based64 = base64.b64encode(file_content)
+
+            blob = repo.create_git_blob(
+                    content=file_content_based64.decode('utf-8'),
+                    encoding="base64"
+                )
+
+            tree.append(
+                InputGitTreeElement(
+                    path=file_relative_path,
+                    mode="100644",
+                    type="blob",
+                    sha=blob.sha,
+                )
+            )
+
+        new_tree = repo.create_git_tree(
+            tree=tree,
+            base_tree=main_tree
+        )
+
+        commit = repo.create_git_commit(
+            message=self._commit_message,
+            tree=repo.get_git_tree(sha=new_tree.sha),
+            parents=[repo.get_git_commit(main_branch.commit.sha)],
+        )
+
+        archive_ref = repo.get_git_ref(ref='heads/main')
+        print(f"GithubArchiver: Archive_ref is {archive_ref}")
+        self._commit_sha = commit.sha
+
+        # Commit to Github
+        archive_ref.edit(sha=commit.sha)
+        print("GithubArchiver: Upload complete")
+
+        g.close()
+
+    def get_commit_sha(self) -> str:
+        return self._commit_sha
diff --git a/funcs/replit_scrapper.py b/funcs/replit_scrapper.py
@@ -0,0 +1,125 @@
+from playwright.sync_api import sync_playwright
+from playwright_stealth import stealth_sync
+
+
+class ReplitScrapper():
+    user_agent = (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/116.0.0.0 "
+        "Safari/537.36 "
+        "Edg/116.0.1938.81"
+    )
+
+    def __init__(self, login_name, login_password):
+        self.__login_name = login_name
+        self.__login_password = login_password
+        self._replit_url = None
+        self._downloaded_filename = None
+
+    def set_replit_url(self, replit_url) -> None:
+        if replit_url is None:
+            raise ValueError
+        self._replit_url = replit_url
+
+    def get_replit_url(self) -> str:
+        if self._replit_url is None:
+            raise ValueError("Missing replit_url")
+        return self._replit_url
+
+    def _set_downloaded_filename(self, filename) -> None:
+        if filename is None:
+            raise ValueError("ReplitScrapper._set_downloaded_filename() argument is None")
+        self._downloaded_filename = filename
+
+    def get_downloaded_filename(self) -> str:
+        if self._downloaded_filename is None:
+            raise ValueError("Missing downloaded_filename")
+        return self._downloaded_filename
+
+    def _visit_replit_repo(self, page) -> None:
+        response = page.goto(self.get_replit_url(), wait_until="domcontentloaded")
+        if response.status != 200:
+            if response.status == 404:
+                print(f"response.status = {response.status}")
+                raise ValueError("Invalid replit_url")
+            else:
+                print(f"response.status = {response.status}")
+                raise ValueError("ReplitScrapper._visit_replit_repo() something other than 404 happened")
+
+    def _login_replit(self, page) -> None:
+        # Login
+        page.goto('https://replit.com/login', wait_until="domcontentloaded")
+        page.screenshot(path="./screen-shots/replit.png")
+        url_init = "https://identitytoolkit.googleapis.com/v1/accounts"
+        with page.expect_response(lambda response: url_init in response.url) as response_info:
+            page.locator(
+                "xpath=/html/body/div[1]/div/div[2]/div/main/div[2]/div/form/div[1]/input"
+            ).fill(self.__login_name)
+            page.locator(
+                "xpath=/html/body/div[1]/div/div[2]/div/main/div[2]/div/form/div[2]/div/input"
+            ).fill(self.__login_password)
+            page.locator(
+                "xpath=/html/body/div[1]/div/div[2]/div/main/div[2]/div/form/div[3]/button"
+            ).click()
+        response = response_info.value
+        if response.status != 200:
+            print(response)
+            if response.status == 400:
+                print(f"response.status = {response.status}")
+                raise ValueError("Invalid login credentials")
+            else:
+                print(f"response.status = {response.status}")
+                raise ValueError("ReplitScrapper._login_replit() something other than 401 happened")
+        page.wait_for_url("https://replit.com/~")
+        page.screenshot(path="./screen-shots/replit_after_login.png")
+
+    def _download_as_zip(self, page) -> None:
+        # Wait for page load
+        page.locator(
+            "xpath=/html/body/div[1]/div[1]/div[1]/div[2]/div/div[1]/div/div[3]/div/div[1]/button/div/span"
+        ).wait_for()
+        while page.locator(
+                "xpath=/html/body/div[1]/div[1]/div[1]/div[2]/header/div[2]/button"
+                ).text_content() != "Run":
+            print(page.locator(
+                "xpath=/html/body/div[1]/div[1]/div[1]/div[2]/header/div[2]/button"
+                ).text_content())
+            page.wait_for_timeout(2000)
+        page.screenshot(path="./screen-shots/target_page.png")
+
+        # Begin downloading
+        page.locator(
+            "xpath=/html/body/div[1]/div[1]/div[1]/div[2]/div/div[1]/div/div[2]/div[1]/div[1]/div/button[3]"
+        ).click()
+        with page.expect_download() as download_info:
+            page.locator(
+                "xpath=/html/body/div[@class='css-1o92kwk']//div[@id='item-4']//div[@class='css-1l2rn59']"
+            ).click()
+        download = download_info.value
+        self._set_downloaded_filename(download.suggested_filename)
+        download.save_as(f"./screen-shots/{download.suggested_filename}")
+
+    def run(self):
+        print("ReplitScrapper: Begin downloading repo files...")
+        with sync_playwright() as p:
+            # Context setup
+            browser = p.chromium.launch(slow_mo=50)
+            # browser = p.chromium.launch(headless=False
+            #                 , slow_mo=50
+            #                 )
+            context = browser.new_context(user_agent=ReplitScrapper.user_agent)
+            page = context.new_page()
+            stealth_sync(page)
+
+            # Login replit
+            self._login_replit(page)
+
+            # Download repo files as zip
+            self._visit_replit_repo(page)
+            self._download_as_zip(page)
+
+            # Clean-up
+            context.close()
+            browser.close()
+        print("ReplitScrapper: Download complete")
diff --git a/main.py b/main.py
@@ -0,0 +1,31 @@
+from funcs.replit_scrapper import ReplitScrapper
+from funcs.github_archiver import GithubArchiver
+import os
+import zipfile
+from dotenv import load_dotenv
+load_dotenv()
+
+WDIR = os.path.abspath(os.path.dirname(__name__))
+
+if __name__ == "__main__":
+    test_url = "https://replit.com/@pythondojoarchi/SlipperyGargantuanDebuggers"
+    project_name = "SlipperyGargantuanDebuggers"
+
+    # Download repo files as zip
+    scrapper = ReplitScrapper(login_name=os.environ['EMAIL'], login_password=os.environ['PASSWORD'])
+    scrapper.set_replit_url(test_url)
+    scrapper.run()
+
+    # Unzip downloaded zip file
+    download_folder_path = os.path.join(WDIR, "screen-shots")
+    full_file_path = os.path.join(download_folder_path, project_name+".zip")
+    extracted_folder_path = os.path.join(download_folder_path, project_name)
+    zipfile.ZipFile(full_file_path).extractall(extracted_folder_path)
+
+    # Commit target files to Github
+    archiver = GithubArchiver(
+        project_name=project_name,
+        github_access_token=os.environ['GITHUB_ACCESS_TOKEN']
+    )
+    archiver.identify_target_files()
+    archiver.commit_to_github()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# placeholder file to modularize (to package) .py files under the directory "tests", making the files importable.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		def sum(x: float, y: float) -> float:
		return x + y