diff --git a/gptme/cli.py b/gptme/cli.py index 9f5d6db4..e932f6ac 100644 --- a/gptme/cli.py +++ b/gptme/cli.py @@ -22,6 +22,7 @@ # The above may be used as a prompt for the agent. import atexit +import errno import importlib.metadata import io import logging @@ -194,15 +195,25 @@ def main( log.print() print("--- ^^^ past messages ^^^ ---") + def parse_prompt(prompt: str) -> str: + try: + f = Path(prompt).expanduser() + if f.exists() and f.is_file(): + return f"```{prompt}\n{Path(prompt).expanduser().read_text()}\n```" + except OSError as oserr: + # some prompts are too long to be a path, so we can't read them + if oserr.errno != errno.ENAMETOOLONG: + pass + except UnicodeDecodeError: + # some files are not text files (images, audio, PDFs, binaries, etc), so we can't read them + # TODO: but can we handle them better than just printing the path? maybe with metadata from `file`? + pass + return prompt + # check if any prompt is a full path, if so, replace it with the contents of that file # TODO: add support for directories # TODO: maybe do this for all prompts, not just those passed on cli - prompts = [ - f"```{p}\n{Path(p).expanduser().read_text()}\n```" - if Path(p).expanduser().exists() and Path(p).expanduser().is_file() - else p - for p in prompts - ] + prompts = [parse_prompt(p) for p in prompts] # join prompts, grouped by `-` if present, since that's the separator for multiple-round prompts prompts = [p.strip() for p in "\n\n".join(prompts).split("\n\n-") if p] diff --git a/gptme/llm.py b/gptme/llm.py index f59b1d4c..38383b53 100644 --- a/gptme/llm.py +++ b/gptme/llm.py @@ -106,8 +106,13 @@ def print_clear(): codeblock_started = "```" in delta_str[:-3] codeblock_finished = "```" in delta_str[-5:] if codeblock_started and codeblock_finished: - # if closing a code block, wait for user to run command - break + # noreorder + from .tools import is_supported_codeblock # fmt: skip + + # if closing a code block supported by tools, abort generation to let them run + if is_supported_codeblock(delta_str): + print("\n") + break except KeyboardInterrupt: return Message("assistant", deltas_to_str(deltas) + "... ^C Interrupted") finally: diff --git a/gptme/logmanager.py b/gptme/logmanager.py index f944fe8d..9e5834c8 100644 --- a/gptme/logmanager.py +++ b/gptme/logmanager.py @@ -92,7 +92,7 @@ def prepare_messages(self) -> list[Message]: msgs = self.log msgs_reduced = list(reduce_log(msgs)) - if len(msgs) != len(msgs_reduced): + if len_tokens(msgs) != len_tokens(msgs_reduced): logger.info( f"Reduced log from {len_tokens(msgs)//1} to {len_tokens(msgs_reduced)//1} tokens" ) diff --git a/gptme/tools/__init__.py b/gptme/tools/__init__.py index bb81948d..8e076a4d 100644 --- a/gptme/tools/__init__.py +++ b/gptme/tools/__init__.py @@ -36,10 +36,11 @@ def execute_codeblock(codeblock: str, ask: bool) -> Generator[Message, None, Non codeblock_content = codeblock[len(lang_or_fn) :] is_filename = lang_or_fn.count(".") >= 1 + assert is_supported_codeblock(codeblock), "Codeblock is not supported" if lang_or_fn in ["python", "py"]: yield from execute_python(codeblock_content, ask=ask) - elif lang_or_fn in ["terminal", "bash", "sh"]: + elif lang_or_fn in ["bash", "sh"]: yield from execute_shell(codeblock_content, ask=ask) elif lang_or_fn.startswith("patch "): fn = lang_or_fn[len("patch ") :] @@ -52,6 +53,23 @@ def execute_codeblock(codeblock: str, ask: bool) -> Generator[Message, None, Non ) +def is_supported_codeblock(codeblock: str) -> bool: + """Returns whether a codeblock is supported by tools.""" + lang_or_fn = codeblock.splitlines()[0].strip() + is_filename = lang_or_fn.count(".") >= 1 + + if lang_or_fn in ["python", "py"]: + return True + elif lang_or_fn in ["bash", "sh"]: + return True + elif lang_or_fn.startswith("patch "): + return True + elif is_filename: + return True + else: + return False + + def init_tools() -> None: """Runs initialization logic for tools.""" init_python() diff --git a/gptme/tools/patch.py b/gptme/tools/patch.py index 95127683..d5258f8c 100644 --- a/gptme/tools/patch.py +++ b/gptme/tools/patch.py @@ -41,6 +41,7 @@ def apply(codeblock: str, content: str) -> str: """ Applies the patch to the file. """ + # TODO: support multiple patches in one file, or make it clear that this is not supported (one patch per codeblock) codeblock = codeblock.strip() # get the original chunk diff --git a/gptme/util.py b/gptme/util.py index ea743093..7ee9cb06 100644 --- a/gptme/util.py +++ b/gptme/util.py @@ -15,10 +15,12 @@ # FIXME: model assumption -def len_tokens(content: str | list[Message], model: str = "gpt-4") -> int: - """Get the number of tokens in a string.""" +def len_tokens(content: str | Message | list[Message], model: str = "gpt-4") -> int: + """Get the number of tokens in a string, message, or list of messages.""" if isinstance(content, list): return sum(len_tokens(msg.content, model) for msg in content) + if isinstance(content, Message): + return len_tokens(content.content, model) return len(get_tokenizer(model).encode(content)) diff --git a/tests/test-integration.sh b/tests/test-integration.sh index 326170cb..d41e2e47 100755 --- a/tests/test-integration.sh +++ b/tests/test-integration.sh @@ -43,6 +43,14 @@ gptme 'write a implementation of wireworld with curses to wireworld.py' gptme 'plot up to the 5rd degree taylor expansion of sin(x), save to sin.png' # works! +# ask it to manipulate sin.png with imagemagick +gptme 'rotate sin.png 90 degrees clockwise with imagemagick' +# works! + +# ask it to manipulate sin.png with PIL +gptme 'rotate sin.png 90 degrees clockwise with PIL' +# needs PIL to be installed + # write C code and apply patch gptme 'write a hello world program in c to hello.c, then patch it to ask for your name and print it' # works!