diff --git a/.github/workflows/Autointegrate_awesomeAzd.yml b/.github/workflows/Autointegrate_awesomeAzd.yml new file mode 100644 index 0000000..90e7bd1 --- /dev/null +++ b/.github/workflows/Autointegrate_awesomeAzd.yml @@ -0,0 +1,67 @@ +name: Awesome-azd and Exec Docs Sync +permissions: + actions: write + contents: write +on: + workflow_dispatch: + +jobs: + Workloads-PR: + runs-on: ubuntu-latest + environment: "AzD Integration" + + steps: + - name: Checkout respository + uses: actions/checkout@v2 + with: + ref: dev + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r ${{ github.workspace }}/scripts/generate_fields/requirements.txt + + - name: Fetch workloads from azd and exec docs scenarios + run: | + curl -o ${{ github.workspace }}/templates.json https://raw.githubusercontent.com/Azure/awesome-azd/main/website/static/templates.json + curl -o ${{ github.workspace }}/exec_metadata.json https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/metadata.json + + - name: Updating Workloads + run: | + echo "Running script" + python ${{ github.workspace }}/scripts/add_workloads/add_azd.py --root ${{ github.workspace }} --input_file ${{ github.workspace }}/templates.json + python ${{ github.workspace }}/scripts/add_workloads/add_exec_docs.py --root ${{ github.workspace }} --input_file ${{ github.workspace }}/exec_metadata.json + rm ${{ github.workspace }}/templates.json + rm ${{ github.workspace }}/exec_metadata.json + + - name: Generating Fields + env: + AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} + AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} + GIT_EMU_PAT: ${{ secrets.GIT_EMU_PAT }} + + run: | + echo "Generating New Fields" + python ${{ github.workspace }}/scripts/generate_fields/generate_fields.py --root ${{ github.workspace }} + + - name: Configure Git + run: | + git config --global user.name 'github-actions[bot]' + git config --global user.email 'github-actions[bot]@users.noreply.github.com' + + - name: Raise PR + uses: peter-evans/create-pull-request@v4 + with: + token: ${{ secrets.GIT_PAT }} + branch: "auto-pr-branch-${{ github.run_number }}" + commit-message: "Triggered update of workloads.json" + title: "Triggered PR: workloads.json update by ${{ github.actor}}" + body: "Triggered update of workloads.json by ${{ github.actor}}" + author: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>" + labels: "automated-pr" + delete-branch: true \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3492bbd..d70b4cf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .DS_Store helper_functions.sh +**/__pycache__/** \ No newline at end of file diff --git a/scripts/add_awesome_azd/add_azd.py b/scripts/add_awesome_azd/add_azd.py deleted file mode 100644 index 33c950d..0000000 --- a/scripts/add_awesome_azd/add_azd.py +++ /dev/null @@ -1,62 +0,0 @@ -import json -import uuid - -def main(): - with open("./workloads/azd_templates.json", "r") as f: - data = json.load(f) - - new_workloads = [] - - ## Get azd workloads - for workload in data: - if "msft" in workload["tags"]: - new_workloads.append(workload) - - ## Add correct fields - with open("./workloads/workloads.json", "r") as f: - workloads = json.load(f) - - correct_keys = [] - unique_azd = {} - for key in workloads[0].keys(): - correct_keys.append(key) - - for workload in workloads: - unique_azd[workload["source"]] = workload - - ## Add correct keys for new_workloads - for workload in new_workloads: - if workload["source"] in unique_azd: - keys = unique_azd[workload["source"]].keys() - for key in keys: - if key in workload: - unique_azd[workload["source"]][key] = workload[key] - else: - new_workload = {} - for key in correct_keys: - if key in workload: - new_workload[key] = workload[key] - else: - match key: - case "tags": - new_workload[key] = [] - case "products": - new_workload[key] = [] - case "sampleQueries": - new_workload[key] = [] - case "deploymentOptions": - new_workload[key] = ["AzD"] - case "sourceType": - new_workload[key] = "Azd" - case "deploymentConfig": - new_workload[key] = {} - case "id": - new_workload[key] = str(uuid.uuid4()) - workloads.append(new_workload) - - ## Write to file - with open("./workloads/new_workloads.json", "w") as f: - json.dump(workloads, f, indent=4) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/add_workloads/add_azd.py b/scripts/add_workloads/add_azd.py new file mode 100644 index 0000000..7e6d6d1 --- /dev/null +++ b/scripts/add_workloads/add_azd.py @@ -0,0 +1,80 @@ +import json, uuid, argparse, logging + +logging.basicConfig(level=logging.INFO, format='[%(asctime)s] - %(levelname)s - %(message)s') + +def main(root: str, input_file: str, check_quality: bool = False): + workloads_file = f"{root}/workloads/workloads.json" + + with open(input_file, "r") as f: + data = json.load(f) + + new_workloads = [] + + ## Get azd workloads + for workload in data: + if "msft" in workload["tags"]: + new_workloads.append(workload) + elif check_quality: + if workload["quality"]: + new_workloads.append(workload) + + ## Add correct fields + with open(workloads_file, "r") as f: + workloads = json.load(f) + + correct_keys = workloads[0].keys() + unique_azd = {} + for workload in workloads: + unique_azd[workload["source"]] = workload + + ## Add correct keys for new_workloads + for azd_workload in new_workloads: + logging.info(f"Processing workload: {workload['title']}") + if azd_workload["source"] in unique_azd: + for key in azd_workload.keys(): + if key in correct_keys: + unique_azd[azd_workload["source"]][key] = azd_workload[key] + else: + logging.info(f"Adding new workload: {workload['title']}") + new_workload = {} + for key in correct_keys: + if key in azd_workload: + new_workload[key] = azd_workload[key] + else: + match key: + case "tags": + new_workload[key] = [] + case "products": + new_workload[key] = [] + case "sampleQueries": + new_workload[key] = [] + case "deploymentOptions": + new_workload[key] = ["AzD"] + case "sourceType": + new_workload[key] = "Azd" + case "deploymentConfig": + new_workload[key] = {} + case "id": + new_workload[key] = str(uuid.uuid4()) + case "tech": + new_workload[key] = [] + case "keyFeatures": + new_workload[key] = [] + case _: + new_workload[key] = [] + workloads.append(new_workload) + + ## Write to file + with open(workloads_file, "w") as f: + json.dump(workloads, f, indent=4) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process workloads and add necessary fields.", + usage="%(prog)s --root ROOT --input_file INPUT_FILE [--check-quality]") + parser.add_argument("-r", "--root", help="Path to the root directory.", required=True) + parser.add_argument("-i", "--input_file", help="Path to the input JSON file.", required=True) + parser.add_argument("--check-quality", action="store_true", help="Whether to check the quality field in the workloads.") + + args = parser.parse_args() + + main(args.root, args.input_file, args.check_quality) \ No newline at end of file diff --git a/scripts/add_workloads/add_exec_docs.py b/scripts/add_workloads/add_exec_docs.py new file mode 100644 index 0000000..01494e3 --- /dev/null +++ b/scripts/add_workloads/add_exec_docs.py @@ -0,0 +1,97 @@ +import json, uuid, argparse, logging + +logging.basicConfig(level=logging.INFO, format='[%(asctime)s] - %(levelname)s - %(message)s') + +def main(root: str, input_file: str, check_quality: bool = False): + workloads_file = f"{root}/workloads/workloads.json" + + with open(input_file, "r") as f: + data = json.load(f) + + new_workloads = [] + active_workload_sources = set() + + ## Get azd workloads + for workload in data: + if workload["status"] == "active": + new_workloads.append(workload) + active_workload_sources.add(workload["sourceUrl"]) + elif check_quality: + if workload.get("quality", None): + new_workloads.append(workload) + + ## Add correct fields + with open(workloads_file, "r") as f: + workloads = json.load(f) + + non_active_exec_docs = [] + for workload in workloads: + if workload["sourceType"] == "ExecDocs" and workload["source"] not in active_workload_sources: + non_active_exec_docs.append(workload) + + for workload in non_active_exec_docs: + logging.info(f"Removing workload: {workload['title']}") + workloads.remove(workload) + + correct_keys = workloads[0].keys() + unique_exec = {} + for workload in workloads: + unique_exec[workload["source"]] = workload + + ## Add correct keys for new_workloads + for exec_workload in new_workloads: + logging.info(f"Processing workload: {workload['title']}") + if exec_workload["sourceUrl"] in unique_exec: + for key in exec_workload.keys(): + if key in correct_keys: + unique_exec[exec_workload["sourceUrl"]][key] = exec_workload[key] + else: + logging.info(f"Adding new workload: {workload['title']}") + new_workload = {} + for key in correct_keys: + if key in exec_workload: + new_workload[key] = exec_workload[key] + else: + match key: + case "source": + new_workload[key] = exec_workload["sourceUrl"] + case "tags": + new_workload[key] = [] + case "products": + new_workload[key] = [] + case "sampleQueries": + new_workload[key] = [] + case "deploymentOptions": + new_workload[key] = ["azcli"] + case "sourceType": + new_workload[key] = "ExecDocs" + case "deploymentConfig": + new_workload[key] = { + "execDocs": { + "path": exec_workload["key"].replace("/", "%2F") + } + } + case "id": + new_workload[key] = str(uuid.uuid4()) + case "tech": + new_workload[key] = [] + case "keyFeatures": + new_workload[key] = [] + case _: + new_workload[key] = [] + workloads.append(new_workload) + + ## Write to file + with open(workloads_file, "w") as f: + json.dump(workloads, f, indent=4) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process workloads and add necessary fields.", + usage="%(prog)s --root ROOT --input_file INPUT_FILE [--check-quality]") + parser.add_argument("-r", "--root", help="Path to the root directory.", required=True) + parser.add_argument("-i", "--input_file", help="Path to the input JSON file.", required=True) + parser.add_argument("--check-quality", action="store_true", help="Whether to check the quality field in the workloads.") + + args = parser.parse_args() + + main(args.root, args.input_file, args.check_quality) \ No newline at end of file diff --git a/scripts/add_workloads/delete_workloads.py b/scripts/add_workloads/delete_workloads.py new file mode 100644 index 0000000..c184a79 --- /dev/null +++ b/scripts/add_workloads/delete_workloads.py @@ -0,0 +1,30 @@ +import argparse, json, requests, logging + +logging.basicConfig(level=logging.INFO, format='[%(asctime)s] - %(levelname)s - %(message)s') + +def main(root="."): + workloads = json.load(open(f"{root}/workloads/workloads.json", "r")) + keep_workloads = [] + sources = set() + + for workload in workloads: + res = requests.get(workload["source"]) + + print(res.status_code, workload["source"]) + if res.status_code == 200 and workload["source"] not in sources: + logging.info(f"Keeping workload {workload['title']}") + keep_workloads.append(workload) + sources.add(workload["source"]) + else: + logging.info(f"Removing workload {workload['title']}") + + json.dump(keep_workloads, open(f"{root}/workloads/workloads.json", "w"), indent=4) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Delete unsupported workloads in workloads.json. Does so by looking at the source url.", + usage="%(prog)s --root ROOT") + parser.add_argument("-r", "--root", help="Path to the root directory.", required=True) + + args = parser.parse_args() + + main(args.root) \ No newline at end of file diff --git a/scripts/generate_fields/generate_fields.py b/scripts/generate_fields/generate_fields.py new file mode 100644 index 0000000..b8b43e6 --- /dev/null +++ b/scripts/generate_fields/generate_fields.py @@ -0,0 +1,71 @@ +import os +import argparse +from openai import AzureOpenAI + +from utils.file_functions import read_file, write_file +from utils.get_responses import get_responses +from utils.fields import get_fields + +FILE_PATH = "workloads/workloads.json" +DEPLOYMENT_MODEL = "gpt-4o-mini" +FAILED_WORKLOADS = "failed_workloads.json" +SUCCESSFUL_WORKLOADS = "workloads/workloads.json" + + +def main(all_workloads: bool = False, fields: str = "", root: str = ".", output: str = ""): + """ + Process workloads with Azure OpenAI. + + Arguments: + all_workloads -- Boolean indicating whether to process all workloads. + fields -- String of fields to turn on, represented by letters. + + Usage examples: + 1. To process all workloads: + python script_name.py --all-workloads + + 2. To process specific fields: + python script_name.py --fields abc + """ + + fields_that_need_responses = get_fields(fields) + + if not root: root = "." + workloads = read_file(f"{root}/{FILE_PATH}") + client = AzureOpenAI( + api_key = os.getenv("AZURE_OPENAI_API_KEY"), + api_version = "2024-02-01", + azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + ) + + finished_workloads = [] + + for workload in workloads: + finished_workloads.append(get_responses(workload, client, DEPLOYMENT_MODEL, fields_that_need_responses, all_workloads)) + + if not output: + write_file(f"{root}/{SUCCESSFUL_WORKLOADS}", finished_workloads) + else: + write_file(output, finished_workloads) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=( + 'Process workloads with Azure OpenAI.\n\n' + 'Usage examples:\n' + '1. To process all workloads:\n' + ' python script_name.py --all-workloads\n' + '\n' + '2. To process specific fields:\n' + ' python script_name.py --fields ' + ), + formatter_class=argparse.RawTextHelpFormatter + ) + parser.add_argument('-r', '--root', type=str, help='Path to the root directory') + parser.add_argument('-a', '--all-workloads', action='store_true', help='Process all workloads') + parser.add_argument('-f', '--fields', type=str, default='', help='String of fields to turn on, letters to indicate fields') + parser.add_argument('-o', '--output', type=str, help='Path to the output file') + + args = parser.parse_args() + + main(all_workloads=args.all_workloads, fields=args.fields, root=args.root, output=args.output) \ No newline at end of file diff --git a/scripts/generate_fields/requirements.txt b/scripts/generate_fields/requirements.txt new file mode 100644 index 0000000..957ed9e --- /dev/null +++ b/scripts/generate_fields/requirements.txt @@ -0,0 +1,3 @@ +openai===1.44.1 +requests===2.25.1 +httpx==0.27.2 # Required for OpenAI API BUG: TypeError: AsyncClient.__init__() got an unexpected keyword argument 'proxies' \ No newline at end of file diff --git a/scripts/utils/__init__.py b/scripts/generate_fields/utils/__init__.py similarity index 100% rename from scripts/utils/__init__.py rename to scripts/generate_fields/utils/__init__.py diff --git a/scripts/generate_fields/utils/external_data.py b/scripts/generate_fields/utils/external_data.py new file mode 100644 index 0000000..bfa1fc0 --- /dev/null +++ b/scripts/generate_fields/utils/external_data.py @@ -0,0 +1,39 @@ +import requests, os, logging +from utils.prompt import README_PROMPT + +EMU_PAT = os.getenv("GIT_EMU_PAT") + +EMU_HEADERS = {'Authorization': f'token {EMU_PAT}'} if EMU_PAT else {} + +def get_readme(github_url: str) -> str: + if github_url.startswith("https://github.com"): + raw_github = "https://raw.githubusercontent.com" + github_url = github_url.replace("https://github.com", raw_github) + readme_url = f"{github_url}/refs/heads/main/README.md" + response = requests.get(readme_url) + if response.status_code == 404: + try: + logging.info(f"Getting correct branch and readme from {github_url}") + github_api = github_url.replace(raw_github, "https://api.github.com/repos") + + res = requests.get(f"{github_api}/contents", headers=EMU_HEADERS) + contents = res.json() + readme_file = [file['download_url'] for file in contents if 'readme' in file['name'].lower()][0] + logging.info(f"Getting data from {readme_file}") + + response = requests.get(readme_file) + except Exception as e: + logging.error(f"Error getting readme file: {e}") + raw_github = "https://raw.githubusercontent.com" + github_url = github_url.replace("https://github.com", raw_github) + readme_url = f"{github_url}/refs/heads/main/README.md" + response = requests.get(readme_url) + else: + logging.info(f"Getting data from {github_url}") + readme_url = github_url + response = requests.get(readme_url) + + return format_readme_request(response.text), response + +def format_readme_request(response) -> str: + return f"{README_PROMPT}:\n```\n{response}\n```{README_PROMPT}\n" \ No newline at end of file diff --git a/scripts/generate_fields/utils/fields.py b/scripts/generate_fields/utils/fields.py new file mode 100644 index 0000000..c706877 --- /dev/null +++ b/scripts/generate_fields/utils/fields.py @@ -0,0 +1,17 @@ +ALL_FIELDS = { + "G": "tags", + "F": "keyFeatures", + "P": "products", + "T": "tech", + "Q": "sampleQueries", +} + +def get_fields(field_string: str) -> list: + if not field_string: + return ALL_FIELDS.values() + + fields = [] + for char in field_string: + if char not in ALL_FIELDS: raise ValueError(f"Invalid field: {char}") + fields.append(ALL_FIELDS[char]) + return fields \ No newline at end of file diff --git a/scripts/utils/file_functions.py b/scripts/generate_fields/utils/file_functions.py similarity index 100% rename from scripts/utils/file_functions.py rename to scripts/generate_fields/utils/file_functions.py diff --git a/scripts/utils/get_responses.py b/scripts/generate_fields/utils/get_responses.py similarity index 74% rename from scripts/utils/get_responses.py rename to scripts/generate_fields/utils/get_responses.py index 987c909..6d80a58 100644 --- a/scripts/utils/get_responses.py +++ b/scripts/generate_fields/utils/get_responses.py @@ -1,31 +1,36 @@ import json - +import uuid +import time from openai import AzureOpenAI + from utils.external_data import get_readme -from utils.prompt import KEY_FEATURES_PROMPT, DISCLAIMER_PROMPT, SAMPLE_NEGATIVE_MATCH_PROMPT, SAMPLE_PRODUCTS_PROMPT, SAMPLE_TECH_PROMPT, SAMPLE_QUERIES_PROMPT, WORKLOAD_TYPE_PROMPT +from utils.prompt import KEY_FEATURES_PROMPT, DISCLAIMER_PROMPT, SAMPLE_NEGATIVE_MATCH_PROMPT, SAMPLE_PRODUCTS_PROMPT, SAMPLE_TECH_PROMPT, SAMPLE_QUERIES_PROMPT, WORKLOAD_TYPE_PROMPT, TAGS_PROMPT import logging -FIELDS_THAT_NEED_RESPONSES = ["keyFeatures"] +FIELDS_THAT_NEED_RESPONSES = ["tags", "keyFeatures", "sampleQueries", "tech", "products"] # Configure logging logging.basicConfig(level=logging.INFO, format='[%(asctime)s] - %(levelname)s - %(message)s') -def get_responses(workload: dict, client: AzureOpenAI, deployment_model: str) -> dict | str: +def get_responses(workload: dict, client: AzureOpenAI, deployment_model: str, fields_that_need_responses = FIELDS_THAT_NEED_RESPONSES, all_workloads = False) -> dict | str: logging.info(f"Processing workload: {workload['title']}") - if workload["sourceType"] != "ExecDocs": external_data = get_readme(workload['source']) - else: external_data = "" - for field in FIELDS_THAT_NEED_RESPONSES: + external_data = get_readme(workload['source']) + if workload["id"] == "": workload["id"] = str(uuid.uuid4()) + for field in fields_that_need_responses: + if (not all_workloads and workload.get(field, None)): continue logging.info(f"Getting response for {field}") + try: - workload[field] = get_field_response(client, workload, field, deployment_model) + workload[field] = get_field_response(client, workload, field, deployment_model, external_data) logging.info(f"Successfully got response for {field}") logging.info(f"Field {field} - Response: {workload[field]}") except Exception as e: logging.error(f"Error getting response for {field}: {e}") workload[field] = str(e) + time.sleep(5) return workload -def get_field_response(client: AzureOpenAI, workload: dict, field: str, deployment_model: str) -> str: +def get_field_response(client: AzureOpenAI, workload: dict, field: str, deployment_model: str, external_data: str) -> str: ask_prompt = "" match field: @@ -40,6 +45,8 @@ def get_field_response(client: AzureOpenAI, workload: dict, field: str, deployme ask_prompt = SAMPLE_NEGATIVE_MATCH_PROMPT case "keyFeatures": ask_prompt = KEY_FEATURES_PROMPT + case "tags": + ask_prompt = TAGS_PROMPT prompt = f"""title: {workload["title"]}\ndescription: {workload["description"]}\ntags:{workload['tags']}\n\n{external_data}\n\n{DISCLAIMER_PROMPT}\n\nYour Response:\n""" @@ -53,7 +60,6 @@ def get_field_response(client: AzureOpenAI, workload: dict, field: str, deployme } ], ) - # print(f"{field}: {response.choices[0].message.content}") return json.loads(response.choices[0].message.content) except Exception as e: return str(response.choices[0].message.content) \ No newline at end of file diff --git a/scripts/utils/prompt.py b/scripts/generate_fields/utils/prompt.py similarity index 71% rename from scripts/utils/prompt.py rename to scripts/generate_fields/utils/prompt.py index 7c7c837..4a54d14 100644 --- a/scripts/utils/prompt.py +++ b/scripts/generate_fields/utils/prompt.py @@ -1,5 +1,30 @@ PROMPT = """Add more sample queries to this json: """ +TAGS_PROMPT = """ +Tags are anything that you can associate with the workload. This can be languages, frameworks, APIs, technology, author, or key words (like "tutorial", "ai", "cli", or "application"). Here are some example +tags based on some workloads: + +``` +"title": "Kubernetes React Web App with Node.js API and MongoDB", +"description": "A blueprint for getting a React.js web app with a Node.js API and a MongoDB database on Azure. The blueprint includes sample application code (a ToDo web app) which can be removed and replaced with your own application code. Add your own source code and leverage the Infrastructure as Code assets (written in Bicep) to get up and running quickly. This architecture is for running Kubernetes clusters without setting up the control plane.", +"author": "Azure Dev", +"source": "https://github.com/Azure-Samples/todo-nodejs-mongo-aks", +"tags": ["bicep", "nodejs", "typescript", "javascript", "mongodb", "monitor", "keyvault", "reactjs", "appservice", "cosmosdb", "aks", "msft"] + +"title": "Quickstart: Deploy an Azure Kubernetes Service (AKS) cluster using Azure CLI", +"description": "Learn how to quickly deploy a Kubernetes cluster and deploy an application in Azure Kubernetes Service (AKS) using Azure CLI.", +"author": "Microsoft", +"source": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/aks/learn/quick-kubernetes-deploy-cli.md", +"tags": ["kubernetes", "azure", "CLI", "aks", "deployment", "cluster", "application", "setup", "tutorial", "provision", "msft"] +``` + +A lot of these keywords are gotten from the README of the file, which I will also provide you with. +Make a json list object that can be read with json.loads() in python that contains all tags that you think are necessary for the workload. DONT GET TOO CRAZY AND LIST TOO MANY. +Here's the data from the README of the workload (if it's an exec doc it actually deploys using Innovation Engine and some provided steps). After looking at all the information and exmaples, Make a json list readable by json.loads(): + + +""" + README_PROMPT = """ This is data straight from the github readme of the workload. You can also use this in your decisions for creating the correct responses for the field you are creating. IF THE TECHNOLOGY IN THE README IS POINTING TO OR REFERENCING ANOTHER WEBSITE OR REPOSITORY THEN DON'T USE THAT TECHNOLOGY. FOR EXAMPLE: @@ -20,6 +45,8 @@ Your Response: "["How to use Azure AI Search to power ChatGPT-style and Q&A experiences", "Can I use Azure OpenAI to enhance ChatGPT responses with my company's proprietary data?", "How to integrate Azure AI Search with ChatGPT for custom knowledge bases?"]" +Avoid using verbiage like "what is the best". The questions should be more geared toward broader usage context, specific tech stack mentions, or deployment. + WORKLOAD DETAILS: """ @@ -37,6 +64,7 @@ IT IS IMPORTANT YOU ONLY USE LANGUAGES AND FRAMWORKS AND DATABASES AND APIS IN THIS, NO OTHER AZURE PRODUCTS, SERVICES, OR PLATFORMS. LETS BE CONSISTENT ESPECIALLY WITH LANGUAGES OF ALL FLAVORS, WHETHER IT'S A PROGRAMMING LANGUAGE OR A MARKUP LANGUAGE, IT BELONGS HERE. THIS CAN BE LEFT BLANK IF THERE ARE NO LANGUAGES OR FRAMEWORKS MENTIONED. DON'T INCLUDE PLATFORMS (HINT: ChatGPT is a platform, OpenAI is an API). AND START EVERY STRING WITH A CAPITAL LETTER AND ITS FORMAL NAME LIKE dotnetsharp=.NET OR C# AND NO SPACES! DONT FORGET BICEP OR TERRAFORM! +Operating systems can also be mentioned, or web server like Apache, Nginx, or IIS. WORKLOAD DETAILS: @@ -89,4 +117,12 @@ KEY_FEATURES_PROMPT = """You are an expert at reading all the provided content and understanding the key features of the workload. Please list the key features of the workload in a json list object that can be read with json.loads() in python. THIS MEANS YOU ONLY RETURN THE LIST OBJECT WITH NO OTHER WORDS, AND STRING ARE DOUBLE QUOTED ALWAYS. -Key features are the main capabilities of the workload that make it unique and valuable. These are the features that users would be most interested in when deciding whether to use the workload. Include anything important.""" \ No newline at end of file +Key features are the main capabilities of the workload that make it unique and valuable. These are the features that users would be most interested in when deciding whether to use the workload. Include anything important. +Your important information that you are including from the README.md should be longer than a word or two, but not overly verbose either. Short sentence length or three word bullet point key feature. +THESE SHOULD BE OVERARCHING THEMES, NOT SOMETHING SMALL AND SPECIFIC THAT DOESN"T ENCAPSULATE GENERAL IDEAS. "CREATE A RESOURCE GROUP" DOES NOT ENCAPSUATE GENERAL IDEAS. If you want to mention that you're creating resources, explain more specific resources and why it's important to the workload. + +GOOD Examples: +"React application for Azure AI Search", "Web interface for visualizing retrieval modes", and "Supports image search using text-to-image and image-to-image functionalities" are all great examples of a multi-dimensional answer. + +DON"T PUT SHORT BAD EXAMPLES LIKE "create a resource group" THIS IS BAD. +""" \ No newline at end of file diff --git a/scripts/generate_workloads/requirements.txt b/scripts/generate_workloads/requirements.txt deleted file mode 100644 index e63b219..0000000 --- a/scripts/generate_workloads/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -openai===1.44.1 \ No newline at end of file diff --git a/scripts/generate_workloads/workloads.py b/scripts/generate_workloads/workloads.py deleted file mode 100644 index 7d166a8..0000000 --- a/scripts/generate_workloads/workloads.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from utils.file_functions import read_file, write_file -from utils.get_responses import get_responses -from openai import AzureOpenAI - -FILE_PATH = "workloads/workloads.json" -DEPLOYMENT_MODEL = "gpt4o" -FAILED_WORKLOADS = "failed_workloads.json" -SUCCESSFUL_WORKLOADS = "new_workloads.json" - - -def main(): - workloads = read_file(FILE_PATH) - - client = AzureOpenAI( - api_key = os.getenv("AZURE_OPENAI_API_KEY"), - api_version = "2024-02-01", - azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") - ) - - finished_workloads = [] - - for workload in workloads: - finished_workloads.append(get_responses(workload, client, DEPLOYMENT_MODEL)) - - write_file(SUCCESSFUL_WORKLOADS, finished_workloads) - - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/scripts/test_embeddings/workloads_vector_only.json b/scripts/test_embeddings/workloads_vector_only.json new file mode 100644 index 0000000..ca8b331 --- /dev/null +++ b/scripts/test_embeddings/workloads_vector_only.json @@ -0,0 +1,64 @@ +{ + "I want to create a website based on python": [ + { + "title": "Python (Django) Web App with PostgreSQL in VNet", + "source": "https://github.com/Azure-Samples/msdocs-django-postgresql-sample-app", + "search_score": 0.83078945, + "search_rerankerScore": null + }, + { + "title": "Python (Flask) Web App with PostgreSQL in VNet", + "source": "https://github.com/Azure-Samples/msdocs-flask-postgresql-sample-app", + "search_score": 0.82961214, + "search_rerankerScore": null + }, + { + "title": "Serverless Azure OpenAI Quick Start with LlamaIndex (Python)", + "source": "https://github.com/Azure-Samples/llama-index-python", + "search_score": 0.828326, + "search_rerankerScore": null + }, + { + "title": "React Web App with Python API and MongoDB - Terraform", + "source": "https://github.com/Azure-Samples/todo-python-mongo-terraform", + "search_score": 0.8278469, + "search_rerankerScore": null + }, + { + "title": "React Web App with Python API and MongoDB", + "source": "https://github.com/Azure-Samples/todo-python-mongo", + "search_score": 0.8262746, + "search_rerankerScore": null + }, + { + "title": "Static React Web App + Functions with Python API and MongoDB", + "source": "https://github.com/Azure-Samples/todo-python-mongo-swa-func", + "search_score": 0.826117, + "search_rerankerScore": null + }, + { + "title": "Python (Django) Web App with PostgreSQL via Azure Container Apps", + "source": "https://github.com/Azure-Samples/azure-django-postgres-aca", + "search_score": 0.8220776, + "search_rerankerScore": null + }, + { + "title": "Containerized React Web App with Python API and MongoDB", + "source": "https://github.com/Azure-Samples/todo-python-mongo-aca", + "search_score": 0.82124877, + "search_rerankerScore": null + }, + { + "title": "Creative Writing Assistant: Working with Agents using Promptflow (Python Implementation)", + "source": "https://github.com/Azure-Samples/agent-openai-python-prompty", + "search_score": 0.8212195, + "search_rerankerScore": null + }, + { + "title": "FastAPI on Azure Functions", + "source": "https://github.com/Azure-Samples/fastapi-on-azure-functions", + "search_score": 0.8145797, + "search_rerankerScore": null + } + ] +} \ No newline at end of file diff --git a/scripts/utils/external_data.py b/scripts/utils/external_data.py deleted file mode 100644 index 5f5a96b..0000000 --- a/scripts/utils/external_data.py +++ /dev/null @@ -1,12 +0,0 @@ -import requests -from utils.prompt import README_PROMPT - -def get_readme(github_url: str) -> str: - raw_github = "https://raw.githubusercontent.com" - github_url = github_url.replace("https://github.com", raw_github) - readme_url = f"{github_url}/refs/heads/main/README.md" - response = requests.get(readme_url) - return format_readme_request(response.text) - -def format_readme_request(response) -> str: - return f"{README_PROMPT}:\n```\n{response}\n```{README_PROMPT}\n" \ No newline at end of file