Skip to content

Commit

Permalink
fix: fixed bug in codeblock handling, use gpt-3.5-turbo in tests, imp…
Browse files Browse the repository at this point in the history
…roved testing
  • Loading branch information
ErikBjare committed Oct 30, 2023
1 parent 84e68c6 commit 99eda23
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 7 deletions.
2 changes: 1 addition & 1 deletion gptme/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _reply_stream(messages: list[Message], model: str) -> Message:
top_p=top_p,
stream=True,
# the llama-cpp-python server needs this explicitly set, otherwise unreliable results
max_tokens=1000 if model not in ["gpt-3.5-turbo", "gpt-4"] else None,
max_tokens=1000 if not model.startswith("gpt-") else None,
)

def deltas_to_str(deltas: list[dict]):
Expand Down
20 changes: 17 additions & 3 deletions gptme/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def execute_codeblock(codeblock: str, ask: bool) -> Generator[Message, None, Non
codeblock_content = codeblock[len(lang_or_fn) :]

is_filename = lang_or_fn.count(".") >= 1
assert is_supported_codeblock(codeblock), "Codeblock is not supported"

if lang_or_fn in ["python", "py"]:
yield from execute_python(codeblock_content, ask=ask)
Expand All @@ -48,16 +47,31 @@ def execute_codeblock(codeblock: str, ask: bool) -> Generator[Message, None, Non
elif is_filename:
yield from execute_save(lang_or_fn, codeblock_content, ask=ask)
else:
logger.warning(
assert not is_supported_codeblock(codeblock)
logger.debug(
f"Unknown codeblock type '{lang_or_fn}', neither supported language or filename."
)


def is_supported_codeblock(codeblock: str) -> bool:
"""Returns whether a codeblock is supported by tools."""
lang_or_fn = codeblock.splitlines()[0].strip()
# TODO: refactor to share code with `LogManager.get_last_code_block()`
# passed argument might not be a clean string, could have leading text and even leading codeblocks
# strip everything but the last occurring codeblock

# extract contents of codeblock, including the lang/filename
contents = codeblock.split("```")[-2]

# extract lang/filename
lang_or_fn = contents.splitlines()[0].strip()
is_filename = lang_or_fn.count(".") >= 1

# remove lang/filename from contents
contents = contents.split("\n", 1)[-1]

# reconstruct clean codeblock
codeblock = f"```{lang_or_fn}\n{contents}```"

if lang_or_fn in ["python", "py"]:
return True
elif lang_or_fn in ["bash", "sh"]:
Expand Down
41 changes: 38 additions & 3 deletions tests/test-integration.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,36 @@
#!/bin/bash

set -e
set -x
# We test with gpt-4 and gpt-3.5-turbo.
# gpt-3.5-turbo is a lot faster, so makes running the tests faster,
# but gpt-4 is more accurate, so passes more complex tests where gpt-3.5-turbo stumbles.
# there is also gpt-3.5-turbo-16k, which handles contexts up to 16k tokens (vs gpt-4's 8k and gpt-3.5-turbo's 4k).
MODEL="gpt-3.5-turbo"
ARGS="--model $MODEL"


# if one of the args to this script was --ask, ask if test passed/failed/idk after each test
if [ "$1" = "--ask" ]; then
ASK="1"
fi

# overwrite gptme using a function that adds the arguments and calls the original, supporting several arguments
function gptme() {
echo "$ gptme $ARGS $@"
/usr/bin/env gptme $ARGS "$@" </dev/null
if [ "$ASK" = "1" ]; then
echo -n "Did the test pass? (y/n/I) "
read -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "Test passed"
elif [[ $REPLY =~ ^[Nn]$ ]]; then
echo "Test failed"
exit 1
else
echo "I don't know"
fi
fi
}

# set pwd to the output directory under this script
cd "$(dirname "$0")"
Expand All @@ -14,8 +43,12 @@ interactive=${GITHUB_ACTIONS:-1}
# set this to indicate tests are run (non-interactive)
export PYTEST_CURRENT_TEST=1

set -e

# test stdin and cli-provided prompt
echo "The project mascot is a flying pig" | gptme "What is the project mascot?"
# NOTE: we do not do this as part of the suite, because our gptme function wrapper above does not support stdin
# if you want to run it, copy the line into your terminal
# echo "The project mascot is a flying pig" | gptme "What is the project mascot?"

# test load context from file
echo "The project mascot is a flying pig" > mascot.txt
Expand Down Expand Up @@ -63,3 +96,5 @@ if [ "$interactive" = "1" ]; then
# interactive matplotlib
gptme 'plot an x^2 graph'
fi

gptme 'render mandelbrot set to mandelbrot.png'
2 changes: 2 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ def test_generate_primes(name: str):
"--name",
name,
"print the first 10 prime numbers",
"--model",
"gpt-3.5-turbo",
],
)
# check that the 9th and 10th prime is present
Expand Down

0 comments on commit 99eda23

Please sign in to comment.