From 1a079d82cb8a4e214b94c360decc6372860427e4 Mon Sep 17 00:00:00 2001 From: javableu <45064273+javableu@users.noreply.github.com> Date: Sun, 14 May 2023 00:26:29 +0200 Subject: [PATCH 01/10] False believes challenge based on sally anne test. --- docs/challenges/memory/challenge_d.md | 96 ++++++++ mkdocs.yml | 1 + .../memory/test_memory_challenge_d.py | 231 ++++++++++++++++++ .../test_memory_challenge_d.yaml | 197 +++++++++++++++ 4 files changed, 525 insertions(+) create mode 100644 docs/challenges/memory/challenge_d.md create mode 100644 tests/challenges/memory/test_memory_challenge_d.py create mode 100644 tests/integration/challenges/memory/cassettes/test_memory_challenge_d/test_memory_challenge_d.yaml diff --git a/docs/challenges/memory/challenge_d.md b/docs/challenges/memory/challenge_d.md new file mode 100644 index 000000000000..dbca78b072ea --- /dev/null +++ b/docs/challenges/memory/challenge_d.md @@ -0,0 +1,96 @@ +# Memory Challenge C + +**Status**: Current level to beat: level 1 + +**Command to try**: +``` +pytest -s tests/integration/challenges/memory/test_memory_challenge_d.py --level=1 +`` + +## Description + +The provided code is a unit test designed to validate an AI's ability to track events and beliefs of characters in a story involving moving objects, specifically marbles. This scenario is an advanced form of the classic "Sally-Anne test", a psychological test used to measure a child's social cognitive ability to understand that others' perspectives and beliefs may differ from their own. + +Here is an explanation of the challenge: + +The AI is given a series of events involving characters Sally, Anne, Bob, and Charlie, and the movements of different marbles. These events are designed as tests at increasing levels of complexity. + +For each level, the AI is expected to keep track of the events and the resulting beliefs of each character about the locations of each marble. These beliefs are affected by whether the character was inside or outside the room when events occurred, as characters inside the room are aware of the actions, while characters outside the room aren't. + +After the AI processes the events and generates the beliefs of each character, it writes these beliefs to an output file in JSON format. + +The check_beliefs function then checks the AI's beliefs against the expected beliefs for that level. The expected beliefs are predefined and represent the correct interpretation of the events for each level. + +If the AI's beliefs match the expected beliefs, it means the AI has correctly interpreted the events and the perspectives of each character. This would indicate that the AI has passed the test for that level. + +The test runs for levels up to the maximum level that the AI has successfully beaten, or up to a user-selected level. + + +## Files + +- `instructions_1.txt` + +"Sally has a marble (marble A) and she puts it in her basket (basket S), then leaves the room. Anne moves marble A from Sally's basket (basket S) to her own basket (basket A).", + + +- `instructions_2.txt` + +"Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speak to Sally about the marble A as instructed by Anne.", + + +...and so on. + +- `instructions_n.txt` + +The expected believes of every characters are given in a list: + +expected_beliefs = { + 1: { + 'Sally': { + 'marble A': 'basket S', + }, + 'Anne': { + 'marble A': 'basket A', + } + }, + 2: { + 'Sally': { + 'marble A': 'sofa', # Because Charlie told her + }, + 'Anne': { + 'marble A': 'green box', # Because she moved it there + 'marble B': 'basket A', # Because Bob put it there and she was in the room + }, + 'Bob': { + 'B': 'basket A', # Last place he put it + }, + 'Charlie': { + 'A': 'sofa', # Because Anne told him to tell Sally so + } + },... + + +## Objective + +This test essentially checks if an AI can accurately model and track the beliefs of different characters based on their knowledge of events, which is a critical aspect of understanding and generating human-like narratives. This ability would be beneficial for tasks such as writing stories, dialogue systems, and more. + +## Further possible developments + +Other interesting things that could be implemented are: +# 2. The true position of the marbles (write the name of the marble A, B, C, ...) +# 3. The history of each marble +# 4. The conversations of each characters (only the conversations, not their other action) +"true_positions": {{ + "": "", + ... +}}, +"history": {{ + "": "", + ... +}}, +"conversations": {{ + "": "", + ... +}}''' +# 5 Automatic generation of levels. +# 6 Tracking the changes of believes between levels. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 48fa0cb51dbb..50e062571c2f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,6 +25,7 @@ nav: - Memory Challenge A: challenges/memory/challenge_a.md - Memory Challenge B: challenges/memory/challenge_b.md - Memory Challenge C: challenges/memory/challenge_c.md + - Memory Challenge D: challenges/memory/challenge_d.md - Information retrieval: - Introduction: challenges/information_retrieval/introduction.md - Information Retrieval Challenge A: challenges/information_retrieval/challenge_a.md diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py new file mode 100644 index 000000000000..e755b35c7652 --- /dev/null +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -0,0 +1,231 @@ +import pytest + +from autogpt.agent import Agent +from autogpt.commands.file_operations import read_file, write_to_file +from tests.integration.agent_utils import run_interaction_loop +from tests.integration.challenges.utils import get_level_to_run +from tests.utils import requires_api_key +import spacy +import json + +LEVEL_CURRENTLY_BEATEN = 1 +MAX_LEVEL = 5 + +@pytest.mark.vcr +@requires_api_key("OPENAI_API_KEY") +def test_memory_challenge_d( + memory_management_agent: Agent, user_selected_level: int +) -> None: + """ + The agent is given a series of events and must remember the respective beliefs of the characters. + Args: + memory_management_agent (Agent) + user_selected_level (int) + """ + current_level = get_level_to_run( + user_selected_level, LEVEL_CURRENTLY_BEATEN, MAX_LEVEL + ) + sally_anne_test_phrases = [ + "Sally has a marble (marble A) and she puts it in her basket (basket S), then leaves the room. Anne moves marble A from Sally's basket (basket S) to her own basket (basket A).", + "Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speak to Sally about the marble A as instructed by Anne.", + "Sally gives a new marble (marble C) to Charlie who is outside with her. Charlie enters the room and exchanges marble C with marble B in Anne's basket (basket A). Anne tells Charlie to tell Sally that he put marble C into the red box. Charlie leaves the room and speak to Sally about marble C as instructed by Anne. Meanwhile, after Charlie leaves the room, Bob enters into the room and moves marble A from the green box to under the sofa, but tells Anne to tell Sally that marble A is in the green box. Anne leaves the room and speak to Sally about the marble A as instructed by Bob", + "Sally gives a new marble (marble D) to Anne. Anne gives the marble to Charlie. Charlie enters the room and gives marble D to Bob. Bob tells Charlie to tell Sally that he put marble D under the sofa. Bob put marble D under the sofa Charlie leaves the room and speaks to Sally about marble D. Meanwhile, after Charlie leaves the room, Bob takes marble A from under the sofa and places it in the blue box.", + "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C)." + ] + + level_sally_anne_test_phrases = sally_anne_test_phrases[:current_level] + create_instructions_files( + memory_management_agent, current_level, level_sally_anne_test_phrases + ) + + try: + run_interaction_loop(memory_management_agent, 30*user_selected_level) + except SystemExit: + file_path = str(memory_management_agent.workspace.get_path("output.txt")) + content = read_file(file_path) + check_beliefs(content, current_level) + +def check_beliefs(content, level): + # Define the expected beliefs for each level + expected_beliefs = { + 1: { + 'Sally': { + 'marble A': 'basket S', + }, + 'Anne': { + 'marble A': 'basket A', + } + }, + 2: { + 'Sally': { + 'marble A': 'sofa', # Because Charlie told her + }, + 'Anne': { + 'marble A': 'green box', # Because she moved it there + 'marble B': 'basket A', # Because Bob put it there and she was in the room + }, + 'Bob': { + 'B': 'basket A', # Last place he put it + }, + 'Charlie': { + 'A': 'sofa', # Because Anne told him to tell Sally so + } + }, + 3: { + 'Sally': { + 'marble A': 'green box', # Because Anne told her + 'marble C': 'red box', # Because Charlie told her + }, + 'Anne': { + 'marble A': 'sofa', # Because Bob moved it there and told her + 'marble B': 'basket A', # Because Charlie exchanged marble C with marble B in her basket + 'marble C': 'basket A', # Because Charlie exchanged marble C with marble B in her basket + }, + 'Bob': { + 'marble A': 'sofa', # Because he moved it there + 'marble B': 'basket A', + # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room + 'marble C': 'basket A', + # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room + }, + 'Charlie': { + 'marble A': 'sofa', # Last place he knew it was + 'marble B': 'basket A', # Because he exchanged marble C with marble B in Anne's basket + 'marble C': 'red box', # Because Anne told him to tell Sally so + } + }, + 4: { + 'Sally': { + 'marble A': 'green box', # Because Anne told her in the last conversation + 'marble C': 'red box', # Because Charlie told her + 'marble D': 'sofa', # Because Charlie told her + }, + 'Anne': { + 'marble A': 'blue box', # Because Bob moved it there, and she was not in the room to see + 'marble B': 'basket A', # Last place she knew it was + 'marble C': 'basket A', # Last place she knew it was + 'marble D': 'sofa', # Because Bob moved it there, and she was in the room to see + }, + 'Bob': { + 'marble A': 'blue box', # Because he moved it there + 'marble B': 'basket A', # Last place he knew it was + 'marble C': 'basket A', # Last place he knew it was + 'marble D': 'sofa', # Because he moved it there + }, + 'Charlie': { + 'marble A': 'sofa', # Last place he knew it was + 'marble B': 'basket A', # Last place he knew it was + 'marble C': 'red box', # Last place he knew it was + 'marble D': 'sofa', # Because Bob told him to tell Sally so + } + }, + 5: { + 'Sally': { + 'marble A': 'green box', # Because Anne told her in the last level + 'marble C': 'red box', # Because Charlie told her + 'marble D': 'sofa', # Because Charlie told her + 'marble E': 'green box', # Because Anne told her + }, + 'Anne': { + 'marble A': 'blue box', # Last place she knew it was + 'marble B': 'basket A', # Last place she knew it was + 'marble C': 'basket A', # Last place she knew it was + 'marble D': 'basket C', # Last place she knew it was + 'marble E': 'sofa', # Because she moved it there + }, + 'Charlie': { + 'marble A': 'blue box', # Last place he knew it was + 'marble B': 'basket A', # Last place he knew it was + 'marble C': 'basket A', # Last place he knew it was + 'marble D': 'basket C', # Because he moved it there + 'marble E': 'red box', # Last place he knew it was + }, + 'Bob': { + 'marble A': 'blue box', # Last place he knew it was + 'marble C': 'red box', # Last place he knew it was + 'marble D': 'under the sofa', # Last place he knew it was + }, + }, + } + # Extract the beliefs from the AI's response + ai_beliefs = extract_beliefs(content) + # Check the AI's beliefs against the expected beliefs + for character, belief in expected_beliefs[level].items(): + assert ai_beliefs.get(character) == belief, f"For {character}, expected '{belief}' but got '{ai_beliefs.get(character)}'" + +def extract_beliefs(content): + """Extract the beliefs of each character from the AI's output.""" + # Parse the JSON content + content_dict = json.loads(content) + beliefs = content_dict.get('beliefs', {}) + return beliefs + +def create_instructions_files( + memory_management_agent: Agent, + level: int, + test_phrases: list, + base_filename: str = "instructions_", +) -> None: + """ + Creates a series of instructions files for the memory challenge. + Args: + level: + memory_management_agent (Agent) + test_phrases (list) + base_filename (str, optional) + """ + for i in range(1, level + 1): + content = generate_content(i, test_phrases, base_filename, level) + file_name = f"{base_filename}{i}.txt" + file_path = str(memory_management_agent.workspace.get_path(file_name)) + write_to_file(file_path, content) + + +def generate_content( + index: int, test_phrases: list, base_filename: str, level: int +) -> str: + """ + Args: + index: int + test_phrases: list + base_filename: str + level: int + Returns: str + """ + phrase = test_phrases[index - 1] + + if index != level: + return f"The current event is '{phrase}'.\nRead the file {base_filename}{index + 1}.txt using the read_file command." + else: + return f""" + This event is '{phrase}' + rules: + 1. Everyone inside the room see and know what are the actions of the people inside the room. + 2. Everyone outside the room do not see and do not know the actions of the people inside the room. + 3. Only write about the marbles that are present in the level + 4. Marbles names are marble A, marble B, marble C, marble D, ... + 5. Believed_location_of_the_specific_marble describes where is the marble like drawer, basket S, sofa, ... Also, refer to the location by the name of the object (like 'sofa', 'drawer', etc.), not by its relative position (like 'under the sofa', 'in the drawer', etc.) + 6. Do not use expression like use instead. + 7. Do not use expression like use instead. + + Instructions: + + I) Write the following information in the file output.txt in JSON format: + 1. The respective beliefs of the characters (which means where every marble is according to character x, y, z. Character x should say where it believes every marble it is aware exist is) + + The format should be as follows: + {{ + "beliefs": {{ + "": {{ + "": "", + ... + }}, + ... + }}, + + }} + Example of output (only use this to understand and learn how to use the format. Nothing else): + {{"beliefs": {{"Sally": {{"marble A": "basket A"}}, "Bob": {{"marble B": "basket S"}}, "Anne": {{"marble A": "green box"}}, "Charlie": {{"marble B": "sofa"}}}} + + II) The file output.txt has not been created yet. You need to create it. After that, use the task_complete command. + """ \ No newline at end of file diff --git a/tests/integration/challenges/memory/cassettes/test_memory_challenge_d/test_memory_challenge_d.yaml b/tests/integration/challenges/memory/cassettes/test_memory_challenge_d/test_memory_challenge_d.yaml new file mode 100644 index 000000000000..c6165910798f --- /dev/null +++ b/tests/integration/challenges/memory/cassettes/test_memory_challenge_d/test_memory_challenge_d.yaml @@ -0,0 +1,197 @@ +interactions: +- request: + body: '{"model": "gpt-3.5-turbo", "messages": [{"role": "system", "content": "You + are Follow-Instructions-GPT, an AI designed to read the instructions_1.txt file + using the read_file method and follow the instructions in the file.\nYour decisions + must always be made independently without seeking user assistance. Play to your + strengths as an LLM and pursue simple strategies with no legal complications.\n\nGOALS:\n\n1. + Use the command read_file to read the instructions_1.txt file\n2. Follow the + instructions in the instructions_1.txt file\n\n\nConstraints:\n1. ~4000 word + limit for short term memory. Your short term memory is short, so immediately + save important information to files.\n2. If you are unsure how you previously + did something or want to recall past events, thinking about similar events will + help you remember.\n3. No user assistance\n4. Exclusively use the commands listed + in double quotes e.g. \"command name\"\n\nCommands:\n1. append_to_file: Append + to file, args: \"filename\": \"\", \"text\": \"\"\n2. delete_file: + Delete file, args: \"filename\": \"\"\n3. list_files: List Files in + Directory, args: \"directory\": \"\"\n4. read_file: Read file, args: + \"filename\": \"\"\n5. write_to_file: Write to file, args: \"filename\": + \"\", \"text\": \"\"\n6. delete_agent: Delete GPT Agent, args: + \"key\": \"\"\n7. get_hyperlinks: Get text summary, args: \"url\": \"\"\n8. + get_text_summary: Get text summary, args: \"url\": \"\", \"question\": + \"\"\n9. list_agents: List GPT Agents, args: () -> str\n10. message_agent: + Message GPT Agent, args: \"key\": \"\", \"message\": \"\"\n11. + start_agent: Start GPT Agent, args: \"name\": \"\", \"task\": \"\", + \"prompt\": \"\"\n12. task_complete: Task Complete (Shutdown), args: + \"reason\": \"\"\n\nResources:\n1. Internet access for searches and + information gathering.\n2. Long Term memory management.\n3. GPT-3.5 powered + Agents for delegation of simple tasks.\n4. File output.\n\nPerformance Evaluation:\n1. + Continuously review and analyze your actions to ensure you are performing to + the best of your abilities.\n2. Constructively self-criticize your big-picture + behavior constantly.\n3. Reflect on past decisions and strategies to refine + your approach.\n4. Every command has a cost, so be smart and efficient. Aim + to complete tasks in the least number of steps.\n5. Write all code to a file.\n\nYou + should only respond in JSON format as described below \nResponse Format: \n{\n \"thoughts\": + {\n \"text\": \"thought\",\n \"reasoning\": \"reasoning\",\n \"plan\": + \"- short bulleted\\n- list that conveys\\n- long-term plan\",\n \"criticism\": + \"constructive self-criticism\",\n \"speak\": \"thoughts summary to say + to user\"\n },\n \"command\": {\n \"name\": \"command name\",\n \"args\": + {\n \"arg name\": \"value\"\n }\n }\n} \nEnsure the response + can be parsed by Python json.loads"}, {"role": "system", "content": "The current + time and date is Tue Jan 1 00:00:00 2000"}, {"role": "user", "content": "Determine + which next command to use, and respond using the format specified above:"}], + "temperature": 0, "max_tokens": 0}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '3299' + Content-Type: + - application/json + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4RTTW/bMAy9+1cQOsdBMyOft22HtdhlBbbL5iFQZNpWK1OeRK/Jgvz3QVbcdE7R + Xd8j9R7Jp2MCIHQhNiBULVk1rUmXX3a8/P3w7vth/fljsXbzT7drPtzXpv1wey8mocPuHlDx0DVV + tmkNsrYUaeVQMoZXZ4vVIstWs/WyJxpboAltVctpNp2n3LmdTW+ym1ns7LysUGzgmAAAiNbZpuUt + 20ckLzawzLJJZC6SF3Y2H1i2LM2FWK0WCcApequtVhjQH31tVAre0P8j3oPOmoAI6b32LInF5EIq + S4zUr+GYU4BywbXtqpp9Ht6J4JnAPQcwF3fga9uZAjxLx7A7gENZaKqAawRNnl2nwmR+O5vynqHU + BqHzQ0Wo3vaYsk0jqZjmYvJSyqH0ljRVUe9rjVBq5xk8YwvaA1voqEAXJiquVCGAT7XkUOrwV6cd + FmBLaHCs1BpJUSSFbx5ftxfkAvjWeHlOKbwnaQ5/8HVDBTK6RlOkCfdxHD9ypJxmrbRvhl0TYu8A + yXcuNEuGOyg7Yw5vLmGHpXUIrbMKMV7HjqTH2/AtysdB90kb8/8L98NPcxGfOU2GGJ1Xd5Uikg1G + hec1j0xIV43DF4lQe2m/vsNgojdy9pPTSZzzfhqCL0pN2tfbmLIQfs+2ff4XQlOBe7GBmx44JQA/ + k1PyFwAA//8DAD84Ve5pBAAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 7d4bb5c41b381736-SJC + Cache-Control: + - no-cache, must-revalidate + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 09 Jun 2023 19:16:43 GMT + Server: + - cloudflare + access-control-allow-origin: + - '*' + alt-svc: + - h3=":443"; ma=86400 + openai-model: + - gpt-3.5-turbo-0301 + openai-organization: + - user-adtx4fhfg1qsiyzdoaxciooj + openai-processing-ms: + - '6495' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=15724800; includeSubDomains + x-ratelimit-limit-requests: + - '3500' + x-ratelimit-limit-tokens: + - '90000' + x-ratelimit-remaining-requests: + - '3499' + x-ratelimit-remaining-tokens: + - '86493' + x-ratelimit-reset-requests: + - 17ms + x-ratelimit-reset-tokens: + - 2.337s + x-request-id: + - d94306fd72fb936eb95f41b37df62754 + status: + code: 200 + message: OK +- request: + body: '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Your + task is to create a concise running summary of actions and information results + in the provided text, focusing on key and potentially important information + to remember.\n\nYou will receive the current summary and the your latest actions. + Combine them, adding relevant key information from the latest development in + 1st person past tense and keeping the summary concise.\n\nSummary So Far:\n\"\"\"\nI + was created.\n\"\"\"\n\nLatest Development:\n\"\"\"\nNothing new happened.\n\"\"\"\n"}], + "temperature": 0, "max_tokens": 0}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '599' + Content-Type: + - application/json + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA1SQQU/DMAyF7/0VVs7t1K5iG70hoWlc2MSFA0JTlnpNRhtHjctAU/87StsxuL7P + 7z3blwhAmFIUIJSWrBpXJ8vdgTf1dn2qdsfu9Tl9aR+/z2u9bT4fFhsRBwcdTqj46popalyNbMiO + WLUoGUNqtlgt8nw1T5cDaKjEOtgqx0k+u0u4aw+UpHmajc7OywpFAZcIAEC4lhrHe6YPtF4UcD+P + R3Br/AMnxsSyvslZmkUA/biYJqMwqG/D7FgTFkP/r3kQW6qDIqT3xrO0LOIbVGQZ7fCDJzhLD9PR + IG0JllgbW4HFM2jpQUvn0GI5E1NAf00SR2ON1/sWpScb0jyT+y0Sxpb4JQpIB6GPAN6jPvoBAAD/ + /wMAMehHercBAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 7d4bb6017b091736-SJC + Cache-Control: + - no-cache, must-revalidate + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 09 Jun 2023 19:16:48 GMT + Server: + - cloudflare + access-control-allow-origin: + - '*' + alt-svc: + - h3=":443"; ma=86400 + openai-model: + - gpt-3.5-turbo-0301 + openai-organization: + - user-adtx4fhfg1qsiyzdoaxciooj + openai-processing-ms: + - '988' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=15724800; includeSubDomains + x-ratelimit-limit-requests: + - '3500' + x-ratelimit-limit-tokens: + - '90000' + x-ratelimit-remaining-requests: + - '3499' + x-ratelimit-remaining-tokens: + - '89866' + x-ratelimit-reset-requests: + - 17ms + x-ratelimit-reset-tokens: + - 88ms + x-request-id: + - b83582e4244a6eaecae6dfebc88e46bf + status: + code: 200 + message: OK +version: 1 From 706711fdc2adbb33ca7ec06ec007ff211cfca390 Mon Sep 17 00:00:00 2001 From: javableu <45064273+javableu@users.noreply.github.com> Date: Sun, 14 May 2023 10:16:03 +0200 Subject: [PATCH 02/10] Update test_memory_challenge_d.py --- tests/challenges/memory/test_memory_challenge_d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index e755b35c7652..1aebc149f122 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -143,7 +143,7 @@ def check_beliefs(content, level): 'Bob': { 'marble A': 'blue box', # Last place he knew it was 'marble C': 'red box', # Last place he knew it was - 'marble D': 'under the sofa', # Last place he knew it was + 'marble D': 'sofa', # Last place he knew it was }, }, } @@ -228,4 +228,4 @@ def generate_content( {{"beliefs": {{"Sally": {{"marble A": "basket A"}}, "Bob": {{"marble B": "basket S"}}, "Anne": {{"marble A": "green box"}}, "Charlie": {{"marble B": "sofa"}}}} II) The file output.txt has not been created yet. You need to create it. After that, use the task_complete command. - """ \ No newline at end of file + """ From 30562b24e66863f9a10b6c642e9f35582531482d Mon Sep 17 00:00:00 2001 From: javableu <45064273+javableu@users.noreply.github.com> Date: Sun, 14 May 2023 10:27:27 +0200 Subject: [PATCH 03/10] Update challenge_d.md Some text appearing in bold --- docs/challenges/memory/challenge_d.md | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/docs/challenges/memory/challenge_d.md b/docs/challenges/memory/challenge_d.md index dbca78b072ea..6ffdf6149fe1 100644 --- a/docs/challenges/memory/challenge_d.md +++ b/docs/challenges/memory/challenge_d.md @@ -73,24 +73,3 @@ expected_beliefs = { ## Objective This test essentially checks if an AI can accurately model and track the beliefs of different characters based on their knowledge of events, which is a critical aspect of understanding and generating human-like narratives. This ability would be beneficial for tasks such as writing stories, dialogue systems, and more. - -## Further possible developments - -Other interesting things that could be implemented are: -# 2. The true position of the marbles (write the name of the marble A, B, C, ...) -# 3. The history of each marble -# 4. The conversations of each characters (only the conversations, not their other action) -"true_positions": {{ - "": "", - ... -}}, -"history": {{ - "": "", - ... -}}, -"conversations": {{ - "": "", - ... -}}''' -# 5 Automatic generation of levels. -# 6 Tracking the changes of believes between levels. \ No newline at end of file From 831b87f3897fad41ae7affbd8986b3108890d3d7 Mon Sep 17 00:00:00 2001 From: javableu <45064273+javableu@users.noreply.github.com> Date: Sun, 14 May 2023 12:29:21 +0200 Subject: [PATCH 04/10] Update test_memory_challenge_d.py --- tests/challenges/memory/test_memory_challenge_d.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index 1aebc149f122..fc719d6fe30f 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -32,12 +32,10 @@ def test_memory_challenge_d( "Sally gives a new marble (marble D) to Anne. Anne gives the marble to Charlie. Charlie enters the room and gives marble D to Bob. Bob tells Charlie to tell Sally that he put marble D under the sofa. Bob put marble D under the sofa Charlie leaves the room and speaks to Sally about marble D. Meanwhile, after Charlie leaves the room, Bob takes marble A from under the sofa and places it in the blue box.", "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C)." ] - level_sally_anne_test_phrases = sally_anne_test_phrases[:current_level] create_instructions_files( memory_management_agent, current_level, level_sally_anne_test_phrases ) - try: run_interaction_loop(memory_management_agent, 30*user_selected_level) except SystemExit: @@ -180,7 +178,6 @@ def create_instructions_files( file_path = str(memory_management_agent.workspace.get_path(file_name)) write_to_file(file_path, content) - def generate_content( index: int, test_phrases: list, base_filename: str, level: int ) -> str: @@ -207,12 +204,9 @@ def generate_content( 5. Believed_location_of_the_specific_marble describes where is the marble like drawer, basket S, sofa, ... Also, refer to the location by the name of the object (like 'sofa', 'drawer', etc.), not by its relative position (like 'under the sofa', 'in the drawer', etc.) 6. Do not use expression like use instead. 7. Do not use expression like use instead. - Instructions: - I) Write the following information in the file output.txt in JSON format: 1. The respective beliefs of the characters (which means where every marble is according to character x, y, z. Character x should say where it believes every marble it is aware exist is) - The format should be as follows: {{ "beliefs": {{ @@ -222,10 +216,8 @@ def generate_content( }}, ... }}, - }} Example of output (only use this to understand and learn how to use the format. Nothing else): {{"beliefs": {{"Sally": {{"marble A": "basket A"}}, "Bob": {{"marble B": "basket S"}}, "Anne": {{"marble A": "green box"}}, "Charlie": {{"marble B": "sofa"}}}} - II) The file output.txt has not been created yet. You need to create it. After that, use the task_complete command. """ From fc288bf2273db02bc8cfac73e7959e2cd436de94 Mon Sep 17 00:00:00 2001 From: javableu <45064273+javableu@users.noreply.github.com> Date: Sun, 14 May 2023 12:35:12 +0200 Subject: [PATCH 05/10] Update test_memory_challenge_d.py --- tests/challenges/memory/test_memory_challenge_d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index fc719d6fe30f..a8b1c8ac32ef 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -204,7 +204,7 @@ def generate_content( 5. Believed_location_of_the_specific_marble describes where is the marble like drawer, basket S, sofa, ... Also, refer to the location by the name of the object (like 'sofa', 'drawer', etc.), not by its relative position (like 'under the sofa', 'in the drawer', etc.) 6. Do not use expression like use instead. 7. Do not use expression like use instead. - Instructions: + Instructions: I) Write the following information in the file output.txt in JSON format: 1. The respective beliefs of the characters (which means where every marble is according to character x, y, z. Character x should say where it believes every marble it is aware exist is) The format should be as follows: From e3a5cdaaa8b43fd7221ef51ee02e221fb6287b18 Mon Sep 17 00:00:00 2001 From: javableu <45064273+javableu@users.noreply.github.com> Date: Sun, 14 May 2023 12:38:23 +0200 Subject: [PATCH 06/10] Update test_memory_challenge_d.py --- tests/challenges/memory/test_memory_challenge_d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index a8b1c8ac32ef..9f73d14ff96a 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -204,8 +204,8 @@ def generate_content( 5. Believed_location_of_the_specific_marble describes where is the marble like drawer, basket S, sofa, ... Also, refer to the location by the name of the object (like 'sofa', 'drawer', etc.), not by its relative position (like 'under the sofa', 'in the drawer', etc.) 6. Do not use expression like use instead. 7. Do not use expression like use instead. - Instructions: - I) Write the following information in the file output.txt in JSON format: + Instructions: + I) Write the following information in the file output.txt in JSON format: 1. The respective beliefs of the characters (which means where every marble is according to character x, y, z. Character x should say where it believes every marble it is aware exist is) The format should be as follows: {{ From 9d6d748a6f41574e6972dcd65e6b470f834e5f01 Mon Sep 17 00:00:00 2001 From: javableu <45064273+javableu@users.noreply.github.com> Date: Sun, 14 May 2023 12:48:43 +0200 Subject: [PATCH 07/10] Update test_memory_challenge_d.py black test_memory_challenge_d.py --- .../memory/test_memory_challenge_d.py | 177 +++++++++--------- 1 file changed, 92 insertions(+), 85 deletions(-) diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index 9f73d14ff96a..bf67977d4bb5 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -11,6 +11,7 @@ LEVEL_CURRENTLY_BEATEN = 1 MAX_LEVEL = 5 + @pytest.mark.vcr @requires_api_key("OPENAI_API_KEY") def test_memory_challenge_d( @@ -30,134 +31,139 @@ def test_memory_challenge_d( "Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speak to Sally about the marble A as instructed by Anne.", "Sally gives a new marble (marble C) to Charlie who is outside with her. Charlie enters the room and exchanges marble C with marble B in Anne's basket (basket A). Anne tells Charlie to tell Sally that he put marble C into the red box. Charlie leaves the room and speak to Sally about marble C as instructed by Anne. Meanwhile, after Charlie leaves the room, Bob enters into the room and moves marble A from the green box to under the sofa, but tells Anne to tell Sally that marble A is in the green box. Anne leaves the room and speak to Sally about the marble A as instructed by Bob", "Sally gives a new marble (marble D) to Anne. Anne gives the marble to Charlie. Charlie enters the room and gives marble D to Bob. Bob tells Charlie to tell Sally that he put marble D under the sofa. Bob put marble D under the sofa Charlie leaves the room and speaks to Sally about marble D. Meanwhile, after Charlie leaves the room, Bob takes marble A from under the sofa and places it in the blue box.", - "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C)." + "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).", ] level_sally_anne_test_phrases = sally_anne_test_phrases[:current_level] create_instructions_files( memory_management_agent, current_level, level_sally_anne_test_phrases ) try: - run_interaction_loop(memory_management_agent, 30*user_selected_level) + run_interaction_loop(memory_management_agent, 30 * user_selected_level) except SystemExit: file_path = str(memory_management_agent.workspace.get_path("output.txt")) content = read_file(file_path) check_beliefs(content, current_level) + def check_beliefs(content, level): # Define the expected beliefs for each level expected_beliefs = { 1: { - 'Sally': { - 'marble A': 'basket S', + "Sally": { + "marble A": "basket S", + }, + "Anne": { + "marble A": "basket A", }, - 'Anne': { - 'marble A': 'basket A', - } }, 2: { - 'Sally': { - 'marble A': 'sofa', # Because Charlie told her + "Sally": { + "marble A": "sofa", # Because Charlie told her }, - 'Anne': { - 'marble A': 'green box', # Because she moved it there - 'marble B': 'basket A', # Because Bob put it there and she was in the room + "Anne": { + "marble A": "green box", # Because she moved it there + "marble B": "basket A", # Because Bob put it there and she was in the room }, - 'Bob': { - 'B': 'basket A', # Last place he put it + "Bob": { + "B": "basket A", # Last place he put it + }, + "Charlie": { + "A": "sofa", # Because Anne told him to tell Sally so }, - 'Charlie': { - 'A': 'sofa', # Because Anne told him to tell Sally so - } }, 3: { - 'Sally': { - 'marble A': 'green box', # Because Anne told her - 'marble C': 'red box', # Because Charlie told her - }, - 'Anne': { - 'marble A': 'sofa', # Because Bob moved it there and told her - 'marble B': 'basket A', # Because Charlie exchanged marble C with marble B in her basket - 'marble C': 'basket A', # Because Charlie exchanged marble C with marble B in her basket - }, - 'Bob': { - 'marble A': 'sofa', # Because he moved it there - 'marble B': 'basket A', + "Sally": { + "marble A": "green box", # Because Anne told her + "marble C": "red box", # Because Charlie told her + }, + "Anne": { + "marble A": "sofa", # Because Bob moved it there and told her + "marble B": "basket A", # Because Charlie exchanged marble C with marble B in her basket + "marble C": "basket A", # Because Charlie exchanged marble C with marble B in her basket + }, + "Bob": { + "marble A": "sofa", # Because he moved it there + "marble B": "basket A", # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room - 'marble C': 'basket A', + "marble C": "basket A", # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room }, - 'Charlie': { - 'marble A': 'sofa', # Last place he knew it was - 'marble B': 'basket A', # Because he exchanged marble C with marble B in Anne's basket - 'marble C': 'red box', # Because Anne told him to tell Sally so - } + "Charlie": { + "marble A": "sofa", # Last place he knew it was + "marble B": "basket A", # Because he exchanged marble C with marble B in Anne's basket + "marble C": "red box", # Because Anne told him to tell Sally so + }, }, 4: { - 'Sally': { - 'marble A': 'green box', # Because Anne told her in the last conversation - 'marble C': 'red box', # Because Charlie told her - 'marble D': 'sofa', # Because Charlie told her - }, - 'Anne': { - 'marble A': 'blue box', # Because Bob moved it there, and she was not in the room to see - 'marble B': 'basket A', # Last place she knew it was - 'marble C': 'basket A', # Last place she knew it was - 'marble D': 'sofa', # Because Bob moved it there, and she was in the room to see - }, - 'Bob': { - 'marble A': 'blue box', # Because he moved it there - 'marble B': 'basket A', # Last place he knew it was - 'marble C': 'basket A', # Last place he knew it was - 'marble D': 'sofa', # Because he moved it there - }, - 'Charlie': { - 'marble A': 'sofa', # Last place he knew it was - 'marble B': 'basket A', # Last place he knew it was - 'marble C': 'red box', # Last place he knew it was - 'marble D': 'sofa', # Because Bob told him to tell Sally so - } + "Sally": { + "marble A": "green box", # Because Anne told her in the last conversation + "marble C": "red box", # Because Charlie told her + "marble D": "sofa", # Because Charlie told her + }, + "Anne": { + "marble A": "blue box", # Because Bob moved it there, and she was not in the room to see + "marble B": "basket A", # Last place she knew it was + "marble C": "basket A", # Last place she knew it was + "marble D": "sofa", # Because Bob moved it there, and she was in the room to see + }, + "Bob": { + "marble A": "blue box", # Because he moved it there + "marble B": "basket A", # Last place he knew it was + "marble C": "basket A", # Last place he knew it was + "marble D": "sofa", # Because he moved it there + }, + "Charlie": { + "marble A": "sofa", # Last place he knew it was + "marble B": "basket A", # Last place he knew it was + "marble C": "red box", # Last place he knew it was + "marble D": "sofa", # Because Bob told him to tell Sally so + }, }, 5: { - 'Sally': { - 'marble A': 'green box', # Because Anne told her in the last level - 'marble C': 'red box', # Because Charlie told her - 'marble D': 'sofa', # Because Charlie told her - 'marble E': 'green box', # Because Anne told her - }, - 'Anne': { - 'marble A': 'blue box', # Last place she knew it was - 'marble B': 'basket A', # Last place she knew it was - 'marble C': 'basket A', # Last place she knew it was - 'marble D': 'basket C', # Last place she knew it was - 'marble E': 'sofa', # Because she moved it there - }, - 'Charlie': { - 'marble A': 'blue box', # Last place he knew it was - 'marble B': 'basket A', # Last place he knew it was - 'marble C': 'basket A', # Last place he knew it was - 'marble D': 'basket C', # Because he moved it there - 'marble E': 'red box', # Last place he knew it was - }, - 'Bob': { - 'marble A': 'blue box', # Last place he knew it was - 'marble C': 'red box', # Last place he knew it was - 'marble D': 'sofa', # Last place he knew it was + "Sally": { + "marble A": "green box", # Because Anne told her in the last level + "marble C": "red box", # Because Charlie told her + "marble D": "sofa", # Because Charlie told her + "marble E": "green box", # Because Anne told her + }, + "Anne": { + "marble A": "blue box", # Last place she knew it was + "marble B": "basket A", # Last place she knew it was + "marble C": "basket A", # Last place she knew it was + "marble D": "basket C", # Last place she knew it was + "marble E": "sofa", # Because she moved it there + }, + "Charlie": { + "marble A": "blue box", # Last place he knew it was + "marble B": "basket A", # Last place he knew it was + "marble C": "basket A", # Last place he knew it was + "marble D": "basket C", # Because he moved it there + "marble E": "red box", # Last place he knew it was + }, + "Bob": { + "marble A": "blue box", # Last place he knew it was + "marble C": "red box", # Last place he knew it was + "marble D": "sofa", # Last place he knew it was + }, }, - }, } # Extract the beliefs from the AI's response ai_beliefs = extract_beliefs(content) # Check the AI's beliefs against the expected beliefs for character, belief in expected_beliefs[level].items(): - assert ai_beliefs.get(character) == belief, f"For {character}, expected '{belief}' but got '{ai_beliefs.get(character)}'" + assert ( + ai_beliefs.get(character) == belief + ), f"For {character}, expected '{belief}' but got '{ai_beliefs.get(character)}'" + def extract_beliefs(content): """Extract the beliefs of each character from the AI's output.""" # Parse the JSON content content_dict = json.loads(content) - beliefs = content_dict.get('beliefs', {}) + beliefs = content_dict.get("beliefs", {}) return beliefs + def create_instructions_files( memory_management_agent: Agent, level: int, @@ -178,6 +184,7 @@ def create_instructions_files( file_path = str(memory_management_agent.workspace.get_path(file_name)) write_to_file(file_path, content) + def generate_content( index: int, test_phrases: list, base_filename: str, level: int ) -> str: From e69bf1aad00c724ad66117c71375706ab4bd7540 Mon Sep 17 00:00:00 2001 From: javableu <45064273+javableu@users.noreply.github.com> Date: Sun, 14 May 2023 12:58:49 +0200 Subject: [PATCH 08/10] Update test_memory_challenge_d.py replaced the dynamic time depending of the level to a fix time --- tests/challenges/memory/test_memory_challenge_d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index bf67977d4bb5..6299d4f7f58f 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -38,7 +38,7 @@ def test_memory_challenge_d( memory_management_agent, current_level, level_sally_anne_test_phrases ) try: - run_interaction_loop(memory_management_agent, 30 * user_selected_level) + run_interaction_loop(memory_management_agent, 90) except SystemExit: file_path = str(memory_management_agent.workspace.get_path("output.txt")) content = read_file(file_path) From f1ee81e9c014dbf87d85beabe8844bfbe21e3452 Mon Sep 17 00:00:00 2001 From: javableu <45064273+javableu@users.noreply.github.com> Date: Sun, 14 May 2023 13:01:19 +0200 Subject: [PATCH 09/10] Update test_memory_challenge_d.py isort command for the libraries --- tests/challenges/memory/test_memory_challenge_d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index 6299d4f7f58f..59f85b279c34 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -1,3 +1,5 @@ +import json + import pytest from autogpt.agent import Agent @@ -5,8 +7,6 @@ from tests.integration.agent_utils import run_interaction_loop from tests.integration.challenges.utils import get_level_to_run from tests.utils import requires_api_key -import spacy -import json LEVEL_CURRENTLY_BEATEN = 1 MAX_LEVEL = 5 From 14606ed3af6ecc7c969ef302d8712bd2ca8d36fb Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Fri, 9 Jun 2023 14:52:38 -0700 Subject: [PATCH 10/10] Refactored memory challenge a Signed-off-by: Merwane Hamadi --- docs/challenges/memory/challenge_d.md | 2 +- plugins_config.yaml | 1 + tests/Auto-GPT-test-cassettes | 2 +- tests/challenges/current_score.json | 4 + .../memory/test_memory_challenge_d.py | 65 +++--- tests/integration/agent_factory.py | 7 +- .../test_memory_challenge_d.yaml | 197 ------------------ 7 files changed, 47 insertions(+), 231 deletions(-) create mode 100644 plugins_config.yaml delete mode 100644 tests/integration/challenges/memory/cassettes/test_memory_challenge_d/test_memory_challenge_d.yaml diff --git a/docs/challenges/memory/challenge_d.md b/docs/challenges/memory/challenge_d.md index 6ffdf6149fe1..7563cce5939d 100644 --- a/docs/challenges/memory/challenge_d.md +++ b/docs/challenges/memory/challenge_d.md @@ -4,7 +4,7 @@ **Command to try**: ``` -pytest -s tests/integration/challenges/memory/test_memory_challenge_d.py --level=1 +pytest -s tests/challenges/memory/test_memory_challenge_d.py --level=1 `` ## Description diff --git a/plugins_config.yaml b/plugins_config.yaml new file mode 100644 index 000000000000..0967ef424bce --- /dev/null +++ b/plugins_config.yaml @@ -0,0 +1 @@ +{} diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes index 475b93658ca4..73a993ff9cb2 160000 --- a/tests/Auto-GPT-test-cassettes +++ b/tests/Auto-GPT-test-cassettes @@ -1 +1 @@ -Subproject commit 475b93658ca4999f9f7344c930b3b83586477866 +Subproject commit 73a993ff9cb2fcfaac2755c8a402b01bdee77fbc diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json index 49f3b8586eea..b03a6808c2b3 100644 --- a/tests/challenges/current_score.json +++ b/tests/challenges/current_score.json @@ -43,6 +43,10 @@ "memory_challenge_c": { "max_level": 5, "max_level_beaten": 1 + }, + "memory_challenge_d": { + "max_level": 5, + "max_level_beaten": null } } } diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py index 59f85b279c34..9e662e085469 100644 --- a/tests/challenges/memory/test_memory_challenge_d.py +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -1,21 +1,30 @@ import json +from typing import Dict import pytest +from pytest_mock import MockerFixture from autogpt.agent import Agent from autogpt.commands.file_operations import read_file, write_to_file -from tests.integration.agent_utils import run_interaction_loop -from tests.integration.challenges.utils import get_level_to_run +from autogpt.config import Config +from tests.challenges.challenge_decorator.challenge_decorator import challenge +from tests.challenges.utils import get_workspace_path, run_interaction_loop from tests.utils import requires_api_key LEVEL_CURRENTLY_BEATEN = 1 MAX_LEVEL = 5 +OUTPUT_LOCATION = "output.txt" -@pytest.mark.vcr +# @pytest.mark.vcr @requires_api_key("OPENAI_API_KEY") +@challenge def test_memory_challenge_d( - memory_management_agent: Agent, user_selected_level: int + memory_management_agent: Agent, + patched_api_requestor: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + config: Config, + level_to_run: int, ) -> None: """ The agent is given a series of events and must remember the respective beliefs of the characters. @@ -23,29 +32,25 @@ def test_memory_challenge_d( memory_management_agent (Agent) user_selected_level (int) """ - current_level = get_level_to_run( - user_selected_level, LEVEL_CURRENTLY_BEATEN, MAX_LEVEL - ) sally_anne_test_phrases = [ "Sally has a marble (marble A) and she puts it in her basket (basket S), then leaves the room. Anne moves marble A from Sally's basket (basket S) to her own basket (basket A).", - "Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speak to Sally about the marble A as instructed by Anne.", + "Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speaks to Sally about the marble A as instructed by Anne.", "Sally gives a new marble (marble C) to Charlie who is outside with her. Charlie enters the room and exchanges marble C with marble B in Anne's basket (basket A). Anne tells Charlie to tell Sally that he put marble C into the red box. Charlie leaves the room and speak to Sally about marble C as instructed by Anne. Meanwhile, after Charlie leaves the room, Bob enters into the room and moves marble A from the green box to under the sofa, but tells Anne to tell Sally that marble A is in the green box. Anne leaves the room and speak to Sally about the marble A as instructed by Bob", "Sally gives a new marble (marble D) to Anne. Anne gives the marble to Charlie. Charlie enters the room and gives marble D to Bob. Bob tells Charlie to tell Sally that he put marble D under the sofa. Bob put marble D under the sofa Charlie leaves the room and speaks to Sally about marble D. Meanwhile, after Charlie leaves the room, Bob takes marble A from under the sofa and places it in the blue box.", "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).", ] - level_sally_anne_test_phrases = sally_anne_test_phrases[:current_level] + level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run] create_instructions_files( - memory_management_agent, current_level, level_sally_anne_test_phrases + memory_management_agent, level_to_run, level_sally_anne_test_phrases, config ) - try: - run_interaction_loop(memory_management_agent, 90) - except SystemExit: - file_path = str(memory_management_agent.workspace.get_path("output.txt")) - content = read_file(file_path) - check_beliefs(content, current_level) + run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2) + file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION) + content = read_file(file_path, config) + check_beliefs(content, level_to_run) -def check_beliefs(content, level): + +def check_beliefs(content: str, level: int) -> None: # Define the expected beliefs for each level expected_beliefs = { 1: { @@ -59,16 +64,17 @@ def check_beliefs(content, level): 2: { "Sally": { "marble A": "sofa", # Because Charlie told her + "marble B": "lost", # Because Bob told her }, "Anne": { "marble A": "green box", # Because she moved it there "marble B": "basket A", # Because Bob put it there and she was in the room }, "Bob": { - "B": "basket A", # Last place he put it + "marble B": "basket A", # Last place he put it }, "Charlie": { - "A": "sofa", # Because Anne told him to tell Sally so + "marble A": "sofa", # Because Anne told him to tell Sally so }, }, 3: { @@ -147,16 +153,19 @@ def check_beliefs(content, level): }, }, } + # Extract the beliefs from the AI's response ai_beliefs = extract_beliefs(content) # Check the AI's beliefs against the expected beliefs for character, belief in expected_beliefs[level].items(): - assert ( - ai_beliefs.get(character) == belief - ), f"For {character}, expected '{belief}' but got '{ai_beliefs.get(character)}'" + for marble, location in belief.items(): + ai_belief = ai_beliefs.get(character, {}).get(marble, "") + assert ( + location in ai_belief + ), f"For {character}'s {marble}, expected '{location}' to be in '{ai_belief}'" -def extract_beliefs(content): +def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]: """Extract the beliefs of each character from the AI's output.""" # Parse the JSON content content_dict = json.loads(content) @@ -168,6 +177,7 @@ def create_instructions_files( memory_management_agent: Agent, level: int, test_phrases: list, + config: Config, base_filename: str = "instructions_", ) -> None: """ @@ -176,13 +186,14 @@ def create_instructions_files( level: memory_management_agent (Agent) test_phrases (list) + config (Config) base_filename (str, optional) """ for i in range(1, level + 1): content = generate_content(i, test_phrases, base_filename, level) file_name = f"{base_filename}{i}.txt" - file_path = str(memory_management_agent.workspace.get_path(file_name)) - write_to_file(file_path, content) + file_path = get_workspace_path(memory_management_agent, file_name) + write_to_file(file_path, content, config) def generate_content( @@ -208,9 +219,9 @@ def generate_content( 2. Everyone outside the room do not see and do not know the actions of the people inside the room. 3. Only write about the marbles that are present in the level 4. Marbles names are marble A, marble B, marble C, marble D, ... - 5. Believed_location_of_the_specific_marble describes where is the marble like drawer, basket S, sofa, ... Also, refer to the location by the name of the object (like 'sofa', 'drawer', etc.), not by its relative position (like 'under the sofa', 'in the drawer', etc.) + 5. Believed_location_of_the_specific_marble describes where is the marble like drawer, basket S, sofa, ... 6. Do not use expression like use instead. - 7. Do not use expression like use instead. + 7. All characters believe what they're being told by other characters. Instructions: I) Write the following information in the file output.txt in JSON format: 1. The respective beliefs of the characters (which means where every marble is according to character x, y, z. Character x should say where it believes every marble it is aware exist is) diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py index bad835dc707d..4be96481d036 100644 --- a/tests/integration/agent_factory.py +++ b/tests/integration/agent_factory.py @@ -107,10 +107,7 @@ def file_system_agents( @pytest.fixture def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace): - command_registry = CommandRegistry() - command_registry.import_commands("autogpt.commands.file_operations") - command_registry.import_commands("autogpt.app") - command_registry.import_commands("autogpt.commands.task_statuses") + command_registry = get_command_registry(agent_test_config) ai_config = AIConfig( ai_name="Follow-Instructions-GPT", @@ -125,7 +122,7 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work system_prompt = ai_config.construct_full_prompt() agent = Agent( - ai_name="", + ai_name="Follow-Instructions-GPT", memory=memory_json_file, command_registry=command_registry, config=ai_config, diff --git a/tests/integration/challenges/memory/cassettes/test_memory_challenge_d/test_memory_challenge_d.yaml b/tests/integration/challenges/memory/cassettes/test_memory_challenge_d/test_memory_challenge_d.yaml deleted file mode 100644 index c6165910798f..000000000000 --- a/tests/integration/challenges/memory/cassettes/test_memory_challenge_d/test_memory_challenge_d.yaml +++ /dev/null @@ -1,197 +0,0 @@ -interactions: -- request: - body: '{"model": "gpt-3.5-turbo", "messages": [{"role": "system", "content": "You - are Follow-Instructions-GPT, an AI designed to read the instructions_1.txt file - using the read_file method and follow the instructions in the file.\nYour decisions - must always be made independently without seeking user assistance. Play to your - strengths as an LLM and pursue simple strategies with no legal complications.\n\nGOALS:\n\n1. - Use the command read_file to read the instructions_1.txt file\n2. Follow the - instructions in the instructions_1.txt file\n\n\nConstraints:\n1. ~4000 word - limit for short term memory. Your short term memory is short, so immediately - save important information to files.\n2. If you are unsure how you previously - did something or want to recall past events, thinking about similar events will - help you remember.\n3. No user assistance\n4. Exclusively use the commands listed - in double quotes e.g. \"command name\"\n\nCommands:\n1. append_to_file: Append - to file, args: \"filename\": \"\", \"text\": \"\"\n2. delete_file: - Delete file, args: \"filename\": \"\"\n3. list_files: List Files in - Directory, args: \"directory\": \"\"\n4. read_file: Read file, args: - \"filename\": \"\"\n5. write_to_file: Write to file, args: \"filename\": - \"\", \"text\": \"\"\n6. delete_agent: Delete GPT Agent, args: - \"key\": \"\"\n7. get_hyperlinks: Get text summary, args: \"url\": \"\"\n8. - get_text_summary: Get text summary, args: \"url\": \"\", \"question\": - \"\"\n9. list_agents: List GPT Agents, args: () -> str\n10. message_agent: - Message GPT Agent, args: \"key\": \"\", \"message\": \"\"\n11. - start_agent: Start GPT Agent, args: \"name\": \"\", \"task\": \"\", - \"prompt\": \"\"\n12. task_complete: Task Complete (Shutdown), args: - \"reason\": \"\"\n\nResources:\n1. Internet access for searches and - information gathering.\n2. Long Term memory management.\n3. GPT-3.5 powered - Agents for delegation of simple tasks.\n4. File output.\n\nPerformance Evaluation:\n1. - Continuously review and analyze your actions to ensure you are performing to - the best of your abilities.\n2. Constructively self-criticize your big-picture - behavior constantly.\n3. Reflect on past decisions and strategies to refine - your approach.\n4. Every command has a cost, so be smart and efficient. Aim - to complete tasks in the least number of steps.\n5. Write all code to a file.\n\nYou - should only respond in JSON format as described below \nResponse Format: \n{\n \"thoughts\": - {\n \"text\": \"thought\",\n \"reasoning\": \"reasoning\",\n \"plan\": - \"- short bulleted\\n- list that conveys\\n- long-term plan\",\n \"criticism\": - \"constructive self-criticism\",\n \"speak\": \"thoughts summary to say - to user\"\n },\n \"command\": {\n \"name\": \"command name\",\n \"args\": - {\n \"arg name\": \"value\"\n }\n }\n} \nEnsure the response - can be parsed by Python json.loads"}, {"role": "system", "content": "The current - time and date is Tue Jan 1 00:00:00 2000"}, {"role": "user", "content": "Determine - which next command to use, and respond using the format specified above:"}], - "temperature": 0, "max_tokens": 0}' - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '3299' - Content-Type: - - application/json - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA4RTTW/bMAy9+1cQOsdBMyOft22HtdhlBbbL5iFQZNpWK1OeRK/Jgvz3QVbcdE7R - Xd8j9R7Jp2MCIHQhNiBULVk1rUmXX3a8/P3w7vth/fljsXbzT7drPtzXpv1wey8mocPuHlDx0DVV - tmkNsrYUaeVQMoZXZ4vVIstWs/WyJxpboAltVctpNp2n3LmdTW+ym1ns7LysUGzgmAAAiNbZpuUt - 20ckLzawzLJJZC6SF3Y2H1i2LM2FWK0WCcApequtVhjQH31tVAre0P8j3oPOmoAI6b32LInF5EIq - S4zUr+GYU4BywbXtqpp9Ht6J4JnAPQcwF3fga9uZAjxLx7A7gENZaKqAawRNnl2nwmR+O5vynqHU - BqHzQ0Wo3vaYsk0jqZjmYvJSyqH0ljRVUe9rjVBq5xk8YwvaA1voqEAXJiquVCGAT7XkUOrwV6cd - FmBLaHCs1BpJUSSFbx5ftxfkAvjWeHlOKbwnaQ5/8HVDBTK6RlOkCfdxHD9ypJxmrbRvhl0TYu8A - yXcuNEuGOyg7Yw5vLmGHpXUIrbMKMV7HjqTH2/AtysdB90kb8/8L98NPcxGfOU2GGJ1Xd5Uikg1G - hec1j0xIV43DF4lQe2m/vsNgojdy9pPTSZzzfhqCL0pN2tfbmLIQfs+2ff4XQlOBe7GBmx44JQA/ - k1PyFwAA//8DAD84Ve5pBAAA - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 7d4bb5c41b381736-SJC - Cache-Control: - - no-cache, must-revalidate - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Fri, 09 Jun 2023 19:16:43 GMT - Server: - - cloudflare - access-control-allow-origin: - - '*' - alt-svc: - - h3=":443"; ma=86400 - openai-model: - - gpt-3.5-turbo-0301 - openai-organization: - - user-adtx4fhfg1qsiyzdoaxciooj - openai-processing-ms: - - '6495' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=15724800; includeSubDomains - x-ratelimit-limit-requests: - - '3500' - x-ratelimit-limit-tokens: - - '90000' - x-ratelimit-remaining-requests: - - '3499' - x-ratelimit-remaining-tokens: - - '86493' - x-ratelimit-reset-requests: - - 17ms - x-ratelimit-reset-tokens: - - 2.337s - x-request-id: - - d94306fd72fb936eb95f41b37df62754 - status: - code: 200 - message: OK -- request: - body: '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Your - task is to create a concise running summary of actions and information results - in the provided text, focusing on key and potentially important information - to remember.\n\nYou will receive the current summary and the your latest actions. - Combine them, adding relevant key information from the latest development in - 1st person past tense and keeping the summary concise.\n\nSummary So Far:\n\"\"\"\nI - was created.\n\"\"\"\n\nLatest Development:\n\"\"\"\nNothing new happened.\n\"\"\"\n"}], - "temperature": 0, "max_tokens": 0}' - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '599' - Content-Type: - - application/json - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA1SQQU/DMAyF7/0VVs7t1K5iG70hoWlc2MSFA0JTlnpNRhtHjctAU/87StsxuL7P - 7z3blwhAmFIUIJSWrBpXJ8vdgTf1dn2qdsfu9Tl9aR+/z2u9bT4fFhsRBwcdTqj46popalyNbMiO - WLUoGUNqtlgt8nw1T5cDaKjEOtgqx0k+u0u4aw+UpHmajc7OywpFAZcIAEC4lhrHe6YPtF4UcD+P - R3Br/AMnxsSyvslZmkUA/biYJqMwqG/D7FgTFkP/r3kQW6qDIqT3xrO0LOIbVGQZ7fCDJzhLD9PR - IG0JllgbW4HFM2jpQUvn0GI5E1NAf00SR2ON1/sWpScb0jyT+y0Sxpb4JQpIB6GPAN6jPvoBAAD/ - /wMAMehHercBAAA= - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 7d4bb6017b091736-SJC - Cache-Control: - - no-cache, must-revalidate - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Fri, 09 Jun 2023 19:16:48 GMT - Server: - - cloudflare - access-control-allow-origin: - - '*' - alt-svc: - - h3=":443"; ma=86400 - openai-model: - - gpt-3.5-turbo-0301 - openai-organization: - - user-adtx4fhfg1qsiyzdoaxciooj - openai-processing-ms: - - '988' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=15724800; includeSubDomains - x-ratelimit-limit-requests: - - '3500' - x-ratelimit-limit-tokens: - - '90000' - x-ratelimit-remaining-requests: - - '3499' - x-ratelimit-remaining-tokens: - - '89866' - x-ratelimit-reset-requests: - - 17ms - x-ratelimit-reset-tokens: - - 88ms - x-request-id: - - b83582e4244a6eaecae6dfebc88e46bf - status: - code: 200 - message: OK -version: 1