zhudotexe
diff --git a/‎docs/api_reference.rst
+14 b/‎docs/api_reference.rst
+14
diff --git a/‎docs/engines/huggingface.rst
+1 b/‎docs/engines/huggingface.rst
+1
diff --git a/‎docs/engines/implementing.rst
+6 b/‎docs/engines/implementing.rst
+6
diff --git a/‎examples/4_engines_zoo.py
+25-30 b/‎examples/4_engines_zoo.py
+25-30
diff --git a/‎examples/4_engines_llama2.py renamed to ‎examples/old/4_engines_llama2.py b/‎examples/4_engines_llama2.py renamed to ‎examples/old/4_engines_llama2.py
diff --git a/‎examples/4_engines_vicuna.py renamed to ‎examples/old/4_engines_vicuna.py b/‎examples/4_engines_vicuna.py renamed to ‎examples/old/4_engines_vicuna.py
diff --git a/‎kani/prompts/impl/README.md
+4 b/‎kani/prompts/impl/README.md
+4
diff --git a/‎kani/prompts/impl/mistral.py
+4-92 b/‎kani/prompts/impl/mistral.py
+4-92
diff --git a/‎kani/tool_parsers/__init__.py
+4 b/‎kani/tool_parsers/__init__.py
+4
diff --git a/‎kani/tool_parsers/base.py
+89 b/‎kani/tool_parsers/base.py
+89
@@ -102,3 +102,17 @@ Message Formatters
 ^^^^^^^^^^^^^^^^^^
 .. automodule:: kani.utils.message_formatters
     :members:
+
+.. _tool-parsers:
+
+Tool Parsers
+^^^^^^^^^^^^
+Tool parsers are used when you have an LLM's text output, which may contain tool calls in their raw format (e.g., JSON).
+They translate the raw text format into Kani's tool calling specification.
+
+.. autoclass:: kani.tool_parsers.BaseToolCallParser
+    :members:
+
+.. autoclass:: kani.tool_parsers.NaiveJSONToolCallParser
+
+.. autoclass:: kani.tool_parsers.MistralToolCallParser
@@ -13,6 +13,7 @@ If your language model backend is available on HuggingFace or is compatible with
         This means you can safely ignore this section of the documentation for most use cases! Just use:
 
         .. code-block:: python
+
             from kani.engines.huggingface import HuggingEngine
             engine = HuggingEngine(model_id="your-org/your-model-id")
 
 
@@ -7,6 +7,7 @@ Implementing an Engine
     prompt format.
 
     .. code-block:: python
+
             from kani.engines.huggingface import HuggingEngine
             engine = HuggingEngine(model_id="your-org/your-model-id")
 
@@ -42,6 +43,11 @@ the underlying model, and kani needs to know about the extra tokens added by thi
 
 Adding Function Calling
 -----------------------
+
+.. important::
+    Already have a way to build function calling prompts but just need a way to parse the outputs? Check out the list
+    of :ref:`tool-parsers`.
+
 If you're writing an engine for a model with function calling, there are a couple additional steps you need to take.
 
 Generally, to use function calling, you need to do the following:
 
@@ -10,18 +10,26 @@
 
 # ==== OpenAI (GPT) ====
 from kani.engines.openai import OpenAIEngine
-engine = OpenAIEngine(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4")
+engine = OpenAIEngine(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
 
 # ==== Anthropic (Claude) ====
 # see https://docs.anthropic.com/claude/docs/models-overview for a list of model IDs
 from kani.engines.anthropic import AnthropicEngine
-engine = AnthropicEngine(api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-opus-20240229")
+engine = AnthropicEngine(api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-5-sonnet-latest")
 
 # ========== Hugging Face ==========
 # ---- Any Model (Chat Templates) ----
 from kani.engines.huggingface import HuggingEngine
 engine = HuggingEngine(model_id="org-id/model-id")
 
+# ---- DeepSeek R1 (Hugging Face) ----
+from kani.engines.huggingface import HuggingEngine
+from kani.tool_parsers.deepseek import DeepSeekR1ToolCallParser
+# this method is the same for all distills of R1 as well - simply replace the model ID!
+model = HuggingEngine(model_id="deepseek-ai/DeepSeek-R1")
+engine = DeepSeekR1ToolCallParser(model)
+
+
 # ---- LLaMA v3 (Hugging Face) ----
 import torch
 from kani.engines.huggingface import HuggingEngine
@@ -37,44 +45,31 @@
 # NOTE: If you're running transformers<4.40 and LLaMA 3 continues generating after the <|eot_id|> token,
 # add `eos_token_id=[128001, 128009]` or upgrade transformers
 
-# ---- LLaMA v2 (Hugging Face) ----
-from kani.engines.huggingface.llama2 import LlamaEngine
-engine = LlamaEngine(model_id="meta-llama/Llama-2-7b-chat-hf", use_auth_token=True)  # log in with huggingface-cli
-
 # ---- Mistral Small/Large (Hugging Face) ----
 from kani.engines.huggingface import HuggingEngine
-from kani.prompts.impl.mistral import MISTRAL_V3_PIPELINE, MistralFunctionCallingAdapter
+from kani.prompts.impl.mistral import MISTRAL_V3_PIPELINE
+from kani.tool_parsers.mistral import MistralToolCallParser
 # small (22B):  mistralai/Mistral-Small-Instruct-2409
 # large (123B): mistralai/Mistral-Large-Instruct-2407
 model = HuggingEngine(model_id="mistralai/Mistral-Small-Instruct-2409", prompt_pipeline=MISTRAL_V3_PIPELINE)
-engine = MistralFunctionCallingAdapter(model)
-
-# ---- Mistral-7B (Hugging Face) ----
-# v0.3 (supports function calling):
-from kani.engines.huggingface import HuggingEngine
-from kani.prompts.impl.mistral import MISTRAL_V3_PIPELINE, MistralFunctionCallingAdapter
-model = HuggingEngine(model_id="mistralai/Mistral-7B-Instruct-v0.3", prompt_pipeline=MISTRAL_V3_PIPELINE)
-engine = MistralFunctionCallingAdapter(model)
-
-# v0.2:
-from kani.engines.huggingface import HuggingEngine
-from kani.prompts.impl import MISTRAL_V1_PIPELINE
-engine = HuggingEngine(model_id="mistralai/Mistral-7B-Instruct-v0.2", prompt_pipeline=MISTRAL_V1_PIPELINE)
-
-# Also use the MISTRAL_V1_PIPELINE for Mixtral-8x7B (i.e. mistralai/Mixtral-8x7B-Instruct-v0.1).
+engine = MistralToolCallParser(model)
 
 # ---- Command R (Hugging Face) ----
 from kani.engines.huggingface.cohere import CommandREngine
-engine = CommandREngine(model_id="CohereForAI/c4ai-command-r-v01")
+engine = CommandREngine(model_id="CohereForAI/c4ai-command-r-08-2024")
 
-# ---- Gemma (Hugging Face) ----
-from kani.engines.huggingface import HuggingEngine
-from kani.prompts.impl import GEMMA_PIPELINE
-engine = HuggingEngine(model_id="google/gemma-1.1-7b-it", prompt_pipeline=GEMMA_PIPELINE, use_auth_token=True)
+# --------- older models ----------
+# ---- LLaMA v2 (Hugging Face) ----
+from kani.engines.huggingface.llama2 import LlamaEngine
+engine = LlamaEngine(model_id="meta-llama/Llama-2-7b-chat-hf", use_auth_token=True)  # log in with huggingface-cli
 
-# ---- Vicuna v1.3 (Hugging Face) ----
-from kani.engines.huggingface.vicuna import VicunaEngine
-engine = VicunaEngine(model_id="lmsys/vicuna-7b-v1.3")
+# ---- Mistral-7B (Hugging Face) ----
+# v0.3 (supports function calling):
+from kani.engines.huggingface import HuggingEngine
+from kani.prompts.impl.mistral import MISTRAL_V3_PIPELINE
+from kani.tool_parsers.mistral import MistralToolCallParser
+model = HuggingEngine(model_id="mistralai/Mistral-7B-Instruct-v0.3", prompt_pipeline=MISTRAL_V3_PIPELINE)
+engine = MistralToolCallParser(model)
 
 # ========== llama.cpp ==========
 # ---- LLaMA v2 (llama.cpp) ----
 
@@ -0,0 +1,4 @@
+This directory contains concrete implementations of prompting pipelines for some models. It is now deprecated - use
+HuggingEngine to automatically load chat templates for these models instead.
+
+See `tool_adapters` for tool calling adapters for popular models.
@@ -1,10 +1,8 @@
 import json
 import logging
-import re
 
 from kani.ai_function import AIFunction
-from kani.engines import Completion, WrapperEngine
-from kani.models import ChatMessage, ChatRole, FunctionCall, ToolCall
+from kani.models import ChatMessage, ChatRole, ToolCall
 from kani.prompts import ApplyContext, PromptPipeline
 
 log = logging.getLogger(__name__)
@@ -186,93 +184,7 @@ def ensure_available_tools(msgs: list[ChatMessage], functions: list[AIFunction])
 
 
 # ==== function call parsing ====
-# [TOOL_CALLS][{'name': 'get_current_weather', 'arguments': {'location': 'Paris, France', 'format': 'celsius'}}]</s>
-class MixtralFunctionCallingAdapter(WrapperEngine):
-    """Common Mixtral-8x22B function calling parsing wrapper."""
+# implemented in tool_adapters/mistral - here for back-compat
+from kani.tool_parsers.mistral import MistralToolCallParser as MistralFunctionCallingAdapter  # noqa E402
 
-    def __init__(self, *args, tool_call_token="[TOOL_CALLS]", eos_token="</s>", **kwargs):
-        super().__init__(*args, **kwargs)
-        self.tool_call_token = tool_call_token
-        self.eos_token = eos_token
-
-    def _parse_tool_calls(self, content: str) -> tuple[str, list[ToolCall]]:
-        tool_json = re.search(
-            rf"{re.escape(self.tool_call_token)}\s*(.+?)\s*({re.escape(self.eos_token)})?$",
-            content,
-            re.IGNORECASE | re.DOTALL,
-        )
-        if tool_json is None:
-            return content, []
-        log.debug(f"Found tool JSON while parsing: {tool_json.group(1)}")
-        actions = json.loads(tool_json.group(1))
-
-        # translate back to kani spec
-        tool_calls = []
-        for action in actions:
-            tool_name = action["name"]
-            tool_args = json.dumps(action["arguments"])
-            tool_id = action.get("id")
-            tool_call = ToolCall.from_function_call(FunctionCall(name=tool_name, arguments=tool_args), call_id_=tool_id)
-            tool_calls.append(tool_call)
-
-        # return trimmed content and tool calls
-        return content[: tool_json.start()], tool_calls
-
-    async def predict(self, messages: list[ChatMessage], functions: list[AIFunction] | None = None, **hyperparams):
-        hyperparams.setdefault("decode_kwargs", dict(skip_special_tokens=False))
-        completion = await super().predict(messages, functions, **hyperparams)
-
-        # if we have tools, parse
-        if functions:
-            completion.message.content, completion.message.tool_calls = self._parse_tool_calls(completion.message.text)
-        completion.message.content = completion.message.content.removesuffix(self.eos_token).strip()
-
-        return completion
-
-    async def stream(self, messages: list[ChatMessage], functions: list[AIFunction] | None = None, **hyperparams):
-        content_parts = []
-        in_tool_call = False
-        inner_completion = None
-        hyperparams.setdefault("decode_kwargs", dict(skip_special_tokens=False))
-
-        # consume from the inner iterator, yielding as normal until we see a tool call or a completion
-        async for elem in super().stream(messages, functions, **hyperparams):
-            log.debug(f"Got stream element: {elem!r}")
-            if isinstance(elem, str):
-                content_parts.append(elem)
-                # if we see the start of a tool call, stop yielding and start buffering
-                if self.tool_call_token in elem:
-                    yield elem[: elem.index(self.tool_call_token)]
-                    in_tool_call = True
-                # otherwise yield the string
-                if not in_tool_call:
-                    yield elem.removesuffix(self.eos_token)
-            else:
-                # save the inner completion
-                inner_completion = elem
-
-        # we have consumed all the elements - construct a new completion
-        # if we don't have a tool call we can just yield the inner completion
-        if not in_tool_call and inner_completion:
-            yield inner_completion
-        # otherwise, parse tool calls from the content (preserving inner tool calls if necessary)
-        else:
-            content = "".join(content_parts)
-            log.debug(f"Content before parsing tool calls: {content!r}")
-            content, tool_calls = self._parse_tool_calls(content)
-            if inner_completion:
-                tool_calls = (inner_completion.message.tool_calls or []) + tool_calls
-                prompt_tokens = inner_completion.prompt_tokens
-                completion_tokens = inner_completion.completion_tokens
-            else:
-                prompt_tokens = None
-                completion_tokens = None
-            clean_content = content.removesuffix(self.eos_token).strip()
-            yield Completion(
-                ChatMessage.assistant(clean_content, tool_calls=tool_calls),
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens,
-            )
-
-
-MistralFunctionCallingAdapter = MixtralFunctionCallingAdapter
+MixtralFunctionCallingAdapter = MistralFunctionCallingAdapter
@@ -0,0 +1,4 @@
+from .base import BaseToolCallParser
+from .deepseek import DeepSeekR1ToolCallParser
+from .json import NaiveJSONToolCallParser
+from .mistral import MistralToolCallParser
@@ -0,0 +1,89 @@
+import logging
+from abc import ABC
+
+from kani.engines import Completion, WrapperEngine
+from kani.engines.base import BaseCompletion
+from kani.models import ChatMessage, ToolCall
+
+log = logging.getLogger(__name__)
+
+
+class BaseToolCallParser(WrapperEngine, ABC):
+    """
+    Abstract base class for tool call parsers.
+
+    To implement your own tool call parser, subclass this class and:
+
+    * implement ``parse_tool_calls(self, content: str) -> tuple[str, list[ToolCall]]``
+    * pass default values of ``tool_call_start_token`` and ``tool_call_end_token`` to ``super().__init__(...)``
+
+    This class will handle calling the parser and interrupting streams when tool calls are detected.
+    """
+
+    def __init__(self, *args, tool_call_start_token: str, tool_call_end_token: str, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.tool_call_start_token = tool_call_start_token
+        self.tool_call_end_token = tool_call_end_token
+
+    def parse_tool_calls(self, content: str) -> tuple[str, list[ToolCall]]:
+        """Given the string completion of the model, return the content without tool calls and the parsed tool calls."""
+        raise NotImplementedError
+
+    async def predict(self, messages, functions=None, **hyperparams) -> BaseCompletion:
+        completion = await super().predict(messages, functions, **hyperparams)
+
+        # if we have tools, parse them
+        if functions:
+            completion.message.content, completion.message.tool_calls = self.parse_tool_calls(completion.message.text)
+
+        return completion
+
+    async def stream(self, messages, functions=None, **hyperparams):
+        content_parts = []
+        in_tool_call = False
+        inner_completion = None
+
+        # consume from the inner iterator, yielding as normal until we see a tool call or a completion
+        async for elem in super().stream(messages, functions, **hyperparams):
+            log.debug(f"Got stream element: {elem!r}")
+            if isinstance(elem, str):
+                content_parts.append(elem)
+                # if we see the start of a tool call, stop yielding and start buffering
+                if self.tool_call_start_token in elem:
+                    if len(elem) > len(self.tool_call_start_token):
+                        yield elem[: elem.index(self.tool_call_start_token)]
+                    in_tool_call = True
+                # if we see the end of a tool call, start yielding and stop buffering
+                if self.tool_call_end_token in elem:
+                    if len(elem) > len(self.tool_call_end_token):
+                        yield elem[elem.index(self.tool_call_end_token) + len(self.tool_call_end_token) :]
+                    in_tool_call = False
+                # otherwise yield the string
+                if not in_tool_call:
+                    yield elem
+            else:
+                # save the inner completion
+                inner_completion = elem
+
+        # we have consumed all the elements - construct a new completion
+        # if we don't have a tool call we can just yield the inner completion
+        if not in_tool_call and inner_completion:
+            yield inner_completion
+        # otherwise, parse tool calls from the content (preserving inner tool calls if necessary)
+        else:
+            content = "".join(content_parts)
+            log.debug(f"Content before parsing tool calls: {content!r}")
+            content, tool_calls = self.parse_tool_calls(content)
+            if inner_completion:
+                tool_calls = (inner_completion.message.tool_calls or []) + tool_calls
+                prompt_tokens = inner_completion.prompt_tokens
+                completion_tokens = inner_completion.completion_tokens
+            else:
+                prompt_tokens = None
+                completion_tokens = None
+            clean_content = content.strip()
+            yield Completion(
+                ChatMessage.assistant(clean_content, tool_calls=tool_calls),
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+            )