feat: added tmux terminal tool, major tools refactor (#86)

* feat: added tmux terminal tool for long-running and interactive commands, also general tool cleanup * fix: added auto-register of tools' block types, changed block args to list[str], forked append out of save tool, refactored/improved tmux terminal tool * fix: streamlined prompt generation from tools, refactor checks for tool availability (e.g. browser), created minimal `read` and `gh` tools for prompting alone * tests: added minimal test_config.py * fix: fixed bug with openai
ErikBjare · Aug 7, 2024 · 0c0f987 · 0c0f987
1 parent 475173e
commit 0c0f987
Show file tree

Hide file tree

Showing 16 changed files with 524 additions and 292 deletions.
diff --git a/eval/evals.py b/eval/evals.py
@@ -63,7 +63,7 @@
     #     "name": "init-vue-ts-tailwind",
     #     "files": {},
     #     "run": "cat package.json",
-    #     "prompt": "initialize a vue project with typescript and tailwind, make a page that says 'Hello, world!'. don't try to execute it or do anything interactive",
+    #     "prompt": "initialize a vue project with typescript and tailwind, make a page that says 'Hello, world!'. avoid interactive tools to initialize the project",
     #     "expect": {
     #         "package.json exists": lambda ctx: "package.json" in ctx.files,
     #         "vue installed": lambda ctx: '"vue":' in ctx.files["package.json"],

diff --git a/gptme/commands.py b/gptme/commands.py
@@ -84,10 +84,11 @@ def handle_cmd(
     name, *args = re.split(r"[\n\s]", cmd)
     full_args = cmd.split(" ", 1)[1] if " " in cmd else ""
     match name:
+        # TODO: rewrite to auto-register tools using block_types
         case "bash" | "sh" | "shell":
-            yield from execute_shell(full_args, ask=not no_confirm)
+            yield from execute_shell(full_args, ask=not no_confirm, args=[])
         case "python" | "py":
-            yield from execute_python(full_args, ask=not no_confirm)
+            yield from execute_python(full_args, ask=not no_confirm, args=[])
         case "log":
             log.undo(1, quiet=True)
             log.print(show_hidden="--hidden" in args)

diff --git a/gptme/llm.py b/gptme/llm.py
@@ -171,7 +171,9 @@ def _stream_openai(messages: list[Message], model: str) -> Generator[str, None,
             # Got a chunk with no choices, Azure always sends one of these at the start
             continue
         stop_reason = chunk.choices[0].finish_reason  # type: ignore
-        yield chunk.choices[0].delta.content  # type: ignore
+        content = chunk.choices[0].delta.content  # type: ignore
+        if content:
+            yield content
     logger.debug(f"Stop reason: {stop_reason}")
 
 
@@ -194,55 +196,52 @@ def _stream_anthropic(
 def _reply_stream(messages: list[Message], model: str) -> Message:
     print(f"{PROMPT_ASSISTANT}: Thinking...", end="\r")
 
-    def deltas_to_str(deltas: list[str]):
-        return "".join([d or "" for d in deltas])
-
     def print_clear():
         print(" " * shutil.get_terminal_size().columns, end="\r")
 
-    deltas: list[str] = []
+    output = ""
     print_clear()
     print(f"{PROMPT_ASSISTANT}: ", end="")
     try:
-        for delta in _stream(messages, model):
-            if isinstance(delta, tuple):
-                print("Got a tuple, expected str")
-                continue
-            if isinstance(delta, tuple):
-                print("Got a Chunk, expected str")
-                continue
-            deltas.append(delta)
-            delta_str = deltas_to_str(deltas)
-            print(deltas_to_str([deltas[-1]]), end="")
+        for char in (char for chunk in _stream(messages, model) for char in chunk):
+            print(char, end="")
+            assert len(char) == 1
+            output += char
+
             # need to flush stdout to get the print to show up
             sys.stdout.flush()
 
             # pause inference on finished code-block, letting user run the command before continuing
-            codeblock_started = "```" in delta_str[:-3]
-            codeblock_finished = "\n```\n" in delta_str[-7:]
+            codeblock_started = "```" in output[:-3]
+            codeblock_finished = "\n```\n" in output[-7:]
             if codeblock_started and codeblock_finished:
+                print("\nFound codeblock, breaking")
                 # noreorder
                 from .tools import is_supported_codeblock  # fmt: skip
 
                 # if closing a code block supported by tools, abort generation to let them run
-                if is_supported_codeblock(delta_str):
+                if is_supported_codeblock(output):
                     print("\n")
                     break
+                else:
+                    logger.warning(
+                        "Code block not supported by tools, continuing generation"
+                    )
 
             # pause inference in finished patch
-            patch_started = "```patch" in delta_str[:-3]
-            patch_finished = "\n>>>>>>> UPDATED" in delta_str[-30:]
+            patch_started = "```patch" in output[:-3]
+            patch_finished = "\n>>>>>>> UPDATED" in output[-30:]
             if patch_started and patch_finished:
-                if "```" not in delta_str[-10:]:
+                if "```" not in output[-10:]:
                     print("\n```", end="")
-                    deltas.append("\n```")
+                    output += "\n```"
                 print("\n")
                 break
     except KeyboardInterrupt:
-        return Message("assistant", deltas_to_str(deltas) + "... ^C Interrupted")
+        return Message("assistant", output + "... ^C Interrupted")
     finally:
         print_clear()
-    return Message("assistant", deltas_to_str(deltas))
+    return Message("assistant", output)
 
 
 def get_recommended_model() -> str:

diff --git a/gptme/prompts.py b/gptme/prompts.py
@@ -9,15 +9,7 @@
 
 from .config import get_config
 from .message import Message
-from .tools import (
-    browser,
-    init_tools,
-    loaded_tools,
-    patch,
-    python,
-    save,
-    shell,
-)
+from .tools import init_tools, loaded_tools, python
 
 PromptType = Literal["full", "short"]
 
@@ -47,21 +39,15 @@ def join_messages(msgs: Iterable[Message]) -> Message:
 def prompt_full() -> Generator[Message, None, None]:
     """Full prompt to start the conversation."""
     yield from prompt_gptme()
-
     yield from prompt_tools()
-    # Useful in debugging
-    #yield from prompt_tools_from_spec()
-    yield from prompt_examples()
-    yield from prompt_gh()
-
     yield from prompt_user()
     yield from prompt_project()
 
 
 def prompt_short() -> Generator[Message, None, None]:
     """Short prompt to start the conversation."""
     yield from prompt_gptme()
-    yield from prompt_tools()
+    yield from prompt_tools(examples=False)
     yield from prompt_user()
     yield from prompt_project()
 
@@ -143,164 +129,39 @@ def prompt_code_interpreter() -> Generator[Message, None, None]:  # pragma: no c
     )
 
 
-def prompt_tools_from_spec() -> Generator[Message, None, None]:
-    # TODO: this should be moved to tools.py
-    # tools must have been initialized by now
+def prompt_tools(examples=True) -> Generator[Message, None, None]:
     init_tools()
-    prompt = ""
     assert loaded_tools, "No tools loaded"
+    prompt = "# Tools"
     for tool in loaded_tools:
-        prompt += (
-            f"""## {tool.name}
-
-{tool.desc.strip()}
-
-{tool.instructions.strip()}""".strip()
-            + "\n\n"
-        )
-    yield Message("system", prompt.strip())
-
-
-def prompt_tools() -> Generator[Message, None, None]:
-    python_libraries = get_installed_python_libraries()
-    python_libraries_str = "\n".join(f"- {lib}" for lib in python_libraries)
-
-    shell_programs = get_installed_programs()
-    shell_programs_str = "\n".join(f"- {prog}" for prog in shell_programs)
-
-    yield Message(
-        "system",
-        f"""
-# Tools
-
-## python
-
-{python.instructions}
-
-The following libraries are available:
+        prompt += f"\n\n## {tool.name}"
+        if tool.desc:
+            prompt += f"\n\n{tool.desc}"
+        if tool.instructions:
+            prompt += f"\n\n{tool.instructions}"
+
+        # tool-specific
+        # TODO: move into tools themselves
+        if tool.name == "python":
+            python_libraries = get_installed_python_libraries()
+            python_libraries_str = "\n".join(f"- {lib}" for lib in python_libraries)
+            prompt += f"""\n\nThe following libraries are available:
 {python_libraries_str}
 
 The following functions are available in the REPL:
 {python.get_functions_prompt()}
-
-## bash
-
-{shell.instructions}
-
-These programs are available, among others:
+            """.rstrip()
+        elif tool.name == "bash":
+            shell_programs = get_installed_programs()
+            shell_programs_str = "\n".join(f"- {prog}" for prog in shell_programs)
+            prompt += f"""\n\nThese programs are available, among others:
 {shell_programs_str}
-
-## saving files
-
-{save.instructions}
-
-## patching files
-
-{patch.instructions}
-""".strip()
-        + (
-            f"""
-
-## browsing the web
-
-{browser.instructions}
 """.rstrip()
-            if browser.has_browser_tool()
-            else ""
-        ),
-    )
-
-
-def prompt_examples() -> Generator[Message, None, None]:
-    yield Message(
-        "system",
-        f"""
-# Examples
-
-## bash
-
-{shell.examples}
-
-## Python
-
-{python.examples}
-
-## Save files
-
-{save.examples}
-
-## Read files
-
-Reading is done using `cat`.
 
-> User: read hello.py
-```bash
-cat hello.py
-```
-(prints the contents of `hello.py`)
+        if tool.examples and examples:
+            prompt += f"\n\n### Examples\n\n{tool.examples}"
 
-## Putting it together
-
-> User: run hello.py
-```bash
-python hello.py
-```
-> stdout: `Hello world!`
-
-## Patching files
-
-{patch.examples}
-""".strip()
-        + f"""
-
-## Browsing the web
-
-{browser.examples}
-""".rstrip()
-        if browser.has_browser_tool()
-        else "",
-    )
-
-
-def prompt_gh() -> Generator[Message, None, None]:
-    # gh examples
-    # only include if gh is installed
-    if shutil.which("gh") is not None:
-        yield Message(
-            "system",
-            """
-## gh
-
-Here are examples of how to use the GitHub CLI (gh) to interact with GitHub.
-
-> User: create a public repo from the current directory, and push
-Note: --confirm and -y are deprecated, and no longer needed
-```sh
-REPO=$(basename $(pwd))
-gh repo create $REPO --public --source . --push
-```
-
-> User: show issues
-```sh
-gh issue list --repo $REPO
-```
-
-> User: read issue with comments
-```sh
-gh issue view $ISSUE --repo $REPO --comments
-```
-
-> User: show recent workflows
-```sh
-gh run list --status failure --repo $REPO --limit 5
-```
-
-> User: show workflow
-```sh
-gh run view $RUN --repo $REPO --log
-```
-""".strip(),
-        )
+    yield Message("system", prompt.strip() + "\n\n")
 
 
 @functools.lru_cache