Skip to content

Commit feb8117

Browse files
committed
Add the ability to include YouTube videos on Gemini models
1 parent cb2a261 commit feb8117

File tree

2 files changed

+33
-8
lines changed

2 files changed

+33
-8
lines changed

core/aimodels/gemini/infer.py

+33-5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import logging
1515
import typing
1616
import random
17+
import re
1718

1819
class APIParams:
1920
def __init__(self):
@@ -104,8 +105,7 @@ async def _fetch_tool(self, db_conn) -> dict:
104105
# Check if tool is code execution
105106
if _Tool:
106107
if "gemini-2.0-flash-thinking" in self._model_name:
107-
await self._discord_method_send("> ⚠️ The Gemini 2.0 Flash Thinking only supports code execution as a tool, tools won't be used with this model.")
108-
_tool_schema = None
108+
raise CustomErrorMessage("⚠️ The Gemini 2.0 Flash Thinking doesn't support tools, please switch to another Gemini model.")
109109
else:
110110
if _tool_selection_name == "code_execution":
111111
_tool_schema = [types.Tool(code_execution=types.ToolCodeExecution())]
@@ -206,6 +206,35 @@ async def chat_completion(self, prompt, db_conn, system_instruction: str = None)
206206
if _chat_thread is None:
207207
_chat_thread = []
208208

209+
# Check if YouTube link is in the prompt
210+
if "/youtube:" in prompt:
211+
_REGEX_YOUTUDOTBE = r"https:\/\/youtu.be\/[\w|-]+"
212+
_REGEX_YOUTUBEDOTCOM = r"https:\/\/(www.youtube.com|youtube.com)\/watch\?v=[\w|-]+"
213+
214+
# Extract the URL and remove parameters if exists
215+
if "youtu.be" in prompt:
216+
_youtube_url = re.search(_REGEX_YOUTUDOTBE, prompt)[0]
217+
# Remove the URL from the prompt
218+
prompt = re.sub(fr"\/youtube:{_REGEX_YOUTUDOTBE}", "", prompt)
219+
else:
220+
_youtube_url = re.search(_REGEX_YOUTUBEDOTCOM, prompt)[0]
221+
# Remove the URL from the prompt
222+
prompt = re.sub(fr"\/youtube:{_REGEX_YOUTUBEDOTCOM}", "", prompt)
223+
224+
if _youtube_url:
225+
# Add it to part
226+
logging.info("YouTube URL detected: %s", _youtube_url)
227+
await self._discord_method_send(f"✅ Watching YouTube Video: **<{_youtube_url}>\nNote: You can only include one YouTube video per conversation. To add more videos, clear the chat history**")
228+
_chat_thread.append(
229+
types.Content(
230+
parts=[types.Part.from_uri(
231+
file_uri=_youtube_url,
232+
mime_type="video/*"
233+
)],
234+
role="user"
235+
).model_dump(exclude_unset=True)
236+
)
237+
209238
# Attach file attachment if it exists
210239
if hasattr(self, "_file_data"): _chat_thread.append(self._file_data)
211240

@@ -250,7 +279,6 @@ async def chat_completion(self, prompt, db_conn, system_instruction: str = None)
250279
elif _response.candidates[0].finish_reason != "STOP":
251280
raise CustomErrorMessage("⚠️ An error has occurred while giving you an answer, please try again later.")
252281

253-
254282
# Iterate through the parts and perform tasks
255283
_toolParts = []
256284
_toHalt = False
@@ -309,7 +337,7 @@ async def chat_completion(self, prompt, db_conn, system_instruction: str = None)
309337
_toolParts.append(types.Part.from_function_response(
310338
name=_part.function_call.name,
311339
response=_toolResult
312-
).model_dump(exclude_unset=True)
340+
)
313341
)
314342

315343
# Function calling and code execution doesn't mix
@@ -339,7 +367,7 @@ async def chat_completion(self, prompt, db_conn, system_instruction: str = None)
339367
await _interstitial.edit(f"✅ Used: **{_Tool['tool_human_name']}**")
340368

341369
# Append the tool parts to the chat thread
342-
_chat_thread.append(types.Content(parts=_toolParts))
370+
_chat_thread.append(types.Content(parts=_toolParts).model_dump(exclude_unset=True))
343371

344372
# Add function call parts to the response
345373
_response = await self.completion(prompt=_chat_thread, system_instruction=system_instruction, return_text=False)

data/models.yaml

-3
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
- model: gemini::gemini-2.0-flash-thinking-exp-01-21
1515
name: Gemini 2.0 Flash Thinking Experimental
1616
description: Google's latest model that thinks before it responds
17-
- model: gemini::gemini-2.0-flash-exp
18-
name: Gemini 2.0 Flash Experimental
19-
description: Early version of Gemini 2.0 Flash
2017
- model: gemini::gemini-1.5-pro-002
2118
name: Gemini 1.5 Pro
2219
description: Advanced chat tasks

0 commit comments

Comments
 (0)