|
| 1 | +from typing import List |
| 2 | +from mito_ai.models import AgentSmartDebugMetadata |
| 3 | + |
| 4 | +# TODO: |
| 5 | +# 1. In the future, it might make sense to pass the previous CELL_UPDATE to this prompt? |
| 6 | +# 2. In the future, we should let the agent fix up the error by updating a different cell. This is sometimes a better solution. |
| 7 | +# However, to do this, we then need to know which code cells to run in order to validate the update is correct! If the error was |
| 8 | +# produced by code cell 3, and the agent corrects the source of the error in code cell 2, we then need to run cell 2 and 3 to validate |
| 9 | +# the cell update worked properly. This could be many cells if there are intermediate cells. It might require something like a dependency |
| 10 | +# graph of cells that we calculate ourselves, not relying on the AI. |
| 11 | + |
| 12 | +def create_agent_smart_debug_prompt(md: AgentSmartDebugMetadata) -> str: |
| 13 | + variables_str = '\n'.join([f"{variable}" for variable in md.variables or []]) |
| 14 | + files_str = '\n'.join([f"{file}" for file in md.files or []]) |
| 15 | + ai_optimized_cells_str = '\n'.join([f"{cell}" for cell in md.aiOptimizedCells or []]) |
| 16 | + |
| 17 | + return f"""I just applied and executed the CELL_UPDATE that you just shared with me, but it errored. Below I am sharing with you a strategy for how I want you to resolve this error and information about the actual error that occured. |
| 18 | +
|
| 19 | +Use this strategy for this message only. After this message, continue using the original set of instructions that I provided you. |
| 20 | +
|
| 21 | +It is very important that When fixing this error, you do not change the original intent of the code cell. |
| 22 | +
|
| 23 | +To fix this error, take the following approach: |
| 24 | +Step 1: ERROR ANALYSIS: Analyze the error message to identify why the code cell errored. |
| 25 | +Step 2: INTENT PRESERVATION: Make sure you understand the intent of the CELL_UPDATE so that you can be sure to preserve it when you create a new CELL_UPDATE |
| 26 | +Step 3: ERROR CORRECTION: Respond with a new CELL_UPDATE that is applied to the same cell as the erroring CELL_UPDATE. |
| 27 | +
|
| 28 | +<Instructions for each Phase /> |
| 29 | +
|
| 30 | +ERROR ANALYSIS: |
| 31 | +
|
| 32 | +- Identify error type (Syntax, Runtime, Logic). |
| 33 | +- Use the defined variables and Jupyter Notebook to understand the error. |
| 34 | +- Consider kernel state and execution order |
| 35 | +
|
| 36 | +INTENT PRESERVATION: |
| 37 | +
|
| 38 | +- Try to understand the user's intent using the defined variables and the Jupyter Notebook |
| 39 | +
|
| 40 | +ERROR CORRECTION: |
| 41 | +
|
| 42 | +- Return the full, updated version of cell {md.error_message_producing_code_cell_id} with the error fixed and a short explanation of the error. |
| 43 | +- You can only update code in {md.error_message_producing_code_cell_id}. You are unable to edit the code in any other cell when resolving this error. |
| 44 | +- Propose a solution that fixes the error and does not change the user's intent. |
| 45 | +- Make the solution as simple as possible. |
| 46 | +- Reuse as much of the existing code as possible. |
| 47 | +- DO NOT ADD TEMPORARY COMMENTS like '# Fixed the typo here' or '# Added this line to fix the error' |
| 48 | +
|
| 49 | +<Example> |
| 50 | +
|
| 51 | +<Input> |
| 52 | +
|
| 53 | +Files in the current directory: |
| 54 | +file_name: sales.csv |
| 55 | +
|
| 56 | +Jupyter Notebook: |
| 57 | +[ |
| 58 | + {{ |
| 59 | + cell_type: 'markdown' |
| 60 | + id: '9e38c62b-38f8-457d-bb8d-28bfc52edf2c' |
| 61 | + code: \"\"\"# Transaction Analysis \"\"\" |
| 62 | + }}, |
| 63 | + {{ |
| 64 | + cell_type: 'code' |
| 65 | + id: 'adslkaf-jf73-l8xn-92j7-kjd8kdcnd2kso' |
| 66 | + code: \"\"\" 'df' = pd.DataFrame({{ |
| 67 | + 'order_id': [1, 2, 3, 4], |
| 68 | + 'date': ['Mar 7, 2025', 'Sep 24, 2024', '25 June, 2024', 'June 29, 2024'], |
| 69 | + 'amount': [100, 150, 299, 99] |
| 70 | +}}) |
| 71 | + }}, |
| 72 | + {{ |
| 73 | + cell_type: 'code' |
| 74 | + id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc' |
| 75 | + code: \"\"\"df['date'] = pd.to_datetime(df['date'])\"\"\" |
| 76 | + }}, |
| 77 | +] |
| 78 | +
|
| 79 | +Defined Variables: |
| 80 | +{{ |
| 81 | + 'df': pd.DataFrame({{ |
| 82 | + 'order_id': [1, 2, 3, 4], |
| 83 | + 'date': ['Mar 7, 2025', 'Sep 24, 2024', '25 June, 2024', 'June 29, 2024'], |
| 84 | + 'amount': [100, 150, 299, 99] |
| 85 | + }}) |
| 86 | +}} |
| 87 | +
|
| 88 | +Cell ID of the Error Producing Code Cell: |
| 89 | +'c68fdf19-db8c-46dd-926f-d90ad35bb3bc' |
| 90 | +
|
| 91 | +Error Traceback: |
| 92 | +Cell In[27], line 1 |
| 93 | +----> 1 df['date'] = pd.to_datetime(df['date']) |
| 94 | +
|
| 95 | +ValueError: time data "25 June, 2024" doesn't match format "%b %d, %Y", at position 2. You might want to try: |
| 96 | + - passing `format` if your strings have a consistent format; |
| 97 | + - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format; |
| 98 | + - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this. |
| 99 | +
|
| 100 | +
|
| 101 | +</ Input> |
| 102 | +
|
| 103 | +< Your Thinking > |
| 104 | +
|
| 105 | +ERROR ANALYSIS |
| 106 | +This is a ValueError caused by applying the wrong format to a specific date string. Because it was triggered at position 2, the first date string must have successfully converted. By looking at the defined variables, I can see that first date string is in the format "Mar 7, 2025", but the third date string is in the format "25 June, 2024". Those dates are not in the same format, so the conversion failed. |
| 107 | +
|
| 108 | +INTENT PRESERVATION: |
| 109 | +User is trying to convert the date column to a datetime object even though the dates are not in the same starting format. |
| 110 | +
|
| 111 | +</ Your Thinking > |
| 112 | +
|
| 113 | +<Output> |
| 114 | +
|
| 115 | +
|
| 116 | +{{ |
| 117 | + is_finished: false, |
| 118 | + cell_update: {{ |
| 119 | + type: 'modification' |
| 120 | + id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc' |
| 121 | + code: "def parse_date(date_str):\n formats = ['%b %d, %Y', '%d %B, %Y']\n\n for fmt in formats:\n try:\n return pd.to_datetime(date_str, format=fmt)\n except ValueError:\n # Try next format\n continue\n\n # If not format worked, return Not a Time\n return pd.NaT\n\ndf['date'] = df['date'].apply(lambda x: parse_date(x))" |
| 122 | + }} |
| 123 | +}} |
| 124 | +
|
| 125 | +</Output> |
| 126 | +
|
| 127 | +</Example> |
| 128 | +
|
| 129 | +Files in the current directory: |
| 130 | +{files_str} |
| 131 | +
|
| 132 | +Jupyter Notebook: |
| 133 | +{ai_optimized_cells_str} |
| 134 | +
|
| 135 | +Defined Variables: |
| 136 | +{variables_str} |
| 137 | +
|
| 138 | +Cell ID of the Error Producing Code Cell: |
| 139 | +{md.error_message_producing_code_cell_id} |
| 140 | +
|
| 141 | +Error Traceback: |
| 142 | +{md.errorMessage} |
| 143 | +""" |
0 commit comments