fix: fixed bug in codeblock handling, use gpt-3.5-turbo in tests, imp…

…roved testing
ErikBjare · Oct 30, 2023 · 99eda23 · 99eda23
1 parent 84e68c6
commit 99eda23
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 7 deletions.
diff --git a/gptme/llm.py b/gptme/llm.py
@@ -80,7 +80,7 @@ def _reply_stream(messages: list[Message], model: str) -> Message:
         top_p=top_p,
         stream=True,
         # the llama-cpp-python server needs this explicitly set, otherwise unreliable results
-        max_tokens=1000 if model not in ["gpt-3.5-turbo", "gpt-4"] else None,
+        max_tokens=1000 if not model.startswith("gpt-") else None,
     )
 
     def deltas_to_str(deltas: list[dict]):

diff --git a/gptme/tools/__init__.py b/gptme/tools/__init__.py
@@ -36,7 +36,6 @@ def execute_codeblock(codeblock: str, ask: bool) -> Generator[Message, None, Non
     codeblock_content = codeblock[len(lang_or_fn) :]
 
     is_filename = lang_or_fn.count(".") >= 1
-    assert is_supported_codeblock(codeblock), "Codeblock is not supported"
 
     if lang_or_fn in ["python", "py"]:
         yield from execute_python(codeblock_content, ask=ask)
@@ -48,16 +47,31 @@ def execute_codeblock(codeblock: str, ask: bool) -> Generator[Message, None, Non
     elif is_filename:
         yield from execute_save(lang_or_fn, codeblock_content, ask=ask)
     else:
-        logger.warning(
+        assert not is_supported_codeblock(codeblock)
+        logger.debug(
             f"Unknown codeblock type '{lang_or_fn}', neither supported language or filename."
         )
 
 
 def is_supported_codeblock(codeblock: str) -> bool:
     """Returns whether a codeblock is supported by tools."""
-    lang_or_fn = codeblock.splitlines()[0].strip()
+    # TODO: refactor to share code with `LogManager.get_last_code_block()`
+    # passed argument might not be a clean string, could have leading text and even leading codeblocks
+    # strip everything but the last occurring codeblock
+
+    # extract contents of codeblock, including the lang/filename
+    contents = codeblock.split("```")[-2]
+
+    # extract lang/filename
+    lang_or_fn = contents.splitlines()[0].strip()
     is_filename = lang_or_fn.count(".") >= 1
 
+    # remove lang/filename from contents
+    contents = contents.split("\n", 1)[-1]
+
+    # reconstruct clean codeblock
+    codeblock = f"```{lang_or_fn}\n{contents}```"
+
     if lang_or_fn in ["python", "py"]:
         return True
     elif lang_or_fn in ["bash", "sh"]:

diff --git a/tests/test-integration.sh b/tests/test-integration.sh
@@ -1,7 +1,36 @@
 #!/bin/bash
 
-set -e
-set -x
+# We test with gpt-4 and gpt-3.5-turbo.
+# gpt-3.5-turbo is a lot faster, so makes running the tests faster,
+# but gpt-4 is more accurate, so passes more complex tests where gpt-3.5-turbo stumbles.
+# there is also gpt-3.5-turbo-16k, which handles contexts up to 16k tokens (vs gpt-4's 8k and gpt-3.5-turbo's 4k).
+MODEL="gpt-3.5-turbo"   
+ARGS="--model $MODEL"
+
+
+# if one of the args to this script was --ask, ask if test passed/failed/idk after each test
+if [ "$1" = "--ask" ]; then
+    ASK="1"
+fi
+
+# overwrite gptme using a function that adds the arguments and calls the original, supporting several arguments
+function gptme() {
+    echo "$ gptme $ARGS $@"
+    /usr/bin/env gptme $ARGS "$@" </dev/null
+    if [ "$ASK" = "1" ]; then
+        echo -n "Did the test pass? (y/n/I) "
+        read -r
+        echo
+        if [[ $REPLY =~ ^[Yy]$ ]]; then
+            echo "Test passed"
+        elif [[ $REPLY =~ ^[Nn]$ ]]; then
+            echo "Test failed"
+            exit 1
+        else
+            echo "I don't know"
+        fi
+    fi
+}
 
 # set pwd to the output directory under this script
 cd "$(dirname "$0")"
@@ -14,8 +43,12 @@ interactive=${GITHUB_ACTIONS:-1}
 # set this to indicate tests are run (non-interactive)
 export PYTEST_CURRENT_TEST=1
 
+set -e
+
 # test stdin and cli-provided prompt
-echo "The project mascot is a flying pig" | gptme "What is the project mascot?"
+# NOTE: we do not do this as part of the suite, because our gptme function wrapper above does not support stdin
+#       if you want to run it, copy the line into your terminal
+# echo "The project mascot is a flying pig" | gptme "What is the project mascot?"
 
 # test load context from file
 echo "The project mascot is a flying pig" > mascot.txt
@@ -63,3 +96,5 @@ if [ "$interactive" = "1" ]; then
     # interactive matplotlib
     gptme 'plot an x^2 graph'
 fi
+
+gptme 'render mandelbrot set to mandelbrot.png'
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -106,6 +106,8 @@ def test_generate_primes(name: str):
                 "--name",
                 name,
                 "print the first 10 prime numbers",
+                "--model",
+                "gpt-3.5-turbo",
             ],
         )
         # check that the 9th and 10th prime is present