Skip to content

Commit a2c663d

Browse files
🧠 Release: Mind Alpha v0.1
1 parent 0b67db6 commit a2c663d

25 files changed

+1476
-43
lines changed

‎.env-example

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
OPENAI_API_KEY=sk-proj-ghj...
2-
PEXELS_API_KEY=hAi...
2+
PEXELS_API_KEY=hAi...
3+
PIXABAY_API_KEY=098...

‎MindGUI.py

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import gradio as gr
2+
import json
3+
from openai import OpenAI
4+
import os
5+
import logging
6+
from dotenv import load_dotenv
7+
from src.json_2_video.json_2_video import PyJson2Video # Import the process_video function
8+
import asyncio
9+
import uuid
10+
11+
logging.basicConfig(level=logging.INFO)
12+
13+
load_dotenv()
14+
15+
# Load the reference JSON
16+
with open('src/json_2_video/tests/json2video_template_clean.json', 'r') as f:
17+
reference_json = json.load(f)
18+
19+
# Initialize the OpenAI client
20+
openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
21+
22+
def generate_from_json(json_input):
23+
try:
24+
output_filename = f"output_{uuid.uuid4()}.mp4"
25+
output_path = os.path.join(os.path.abspath("result"), output_filename)
26+
pyjson2video = PyJson2Video(json_input, output_path)
27+
output_path = asyncio.run(pyjson2video.convert())
28+
return {"status": "success", "message": "Video generated successfully", "output_path": output_path}
29+
except Exception as e:
30+
return {"status": "error", "message": f"Error processing video: {str(e)}"}
31+
32+
def generate_and_process_video(instructions):
33+
try:
34+
messages = [
35+
{"role": "system", "content": f"""You are an AI assistant that generates JSON structures for video creation based on user instructions. Use the provided reference JSON as a template. Focus on the following key points:
36+
1. Generate a script that is at least 100 words long.
37+
2. Always synchronize image timings with the script by using dynamic references:
38+
- For start times: use ["script_id"].start_time or ["script_id"].voice_start_time
39+
- For end times: use ["script_id"].end_time or ["script_id"].voice_end_time
40+
3. Ensure the JSON structure includes images, text, and script elements.
41+
Reference JSON structure for a video:\n\n{json.dumps(reference_json, indent=2)}
42+
"""},
43+
{"role": "user", "content": f"Please generate a similar JSON structure based on the following instructions:\n\n{instructions}"}
44+
]
45+
46+
response = openai.chat.completions.create(
47+
model="gpt-3.5-turbo-0125",
48+
messages=messages,
49+
max_tokens=2000,
50+
n=1,
51+
temperature=0.3,
52+
response_format={"type": "json_object"} # Ensure JSON response
53+
)
54+
55+
generated_json = json.loads(response.choices[0].message.content)
56+
verification = json_verification(generated_json)
57+
58+
if verification["status"] == "corrected":
59+
generated_json = verification["data"]
60+
elif verification["status"] == "feedback":
61+
return None, verification["message"]
62+
63+
output_filename = f"output_{uuid.uuid4()}.mp4"
64+
output_path = os.path.join(os.path.abspath("result"), output_filename)
65+
pyjson2video = PyJson2Video(generated_json, output_path)
66+
output_path = asyncio.run(pyjson2video.convert())
67+
68+
return {"status": "success", "message": "Video generated successfully", "output_path": output_path}, json.dumps(generated_json, indent=2)
69+
except Exception as e:
70+
return {"status": "error", "message": f"Error processing video: {str(e)}"}, None
71+
72+
def json_verification(json_data):
73+
try:
74+
parsed_json = json.loads(json_data) if isinstance(json_data, str) else json_data
75+
verification_prompt = f"""
76+
Please verify the following JSON structure for a video creation template:
77+
1. Ensure all required elements (images, text, script) are present.
78+
2. Verify that the timing is correct and synchronized.
79+
3. Check that image and text timings use script_id references (e.g., 'script_id.start_time', 'script_id.end_time') instead of hard-coded numbers.
80+
4. Validate that the script is at least 100 words long.
81+
Reference JSON structure:\n{json.dumps(reference_json, indent=2)}
82+
JSON structure to verify:\n{json.dumps(parsed_json, indent=2)}
83+
"""
84+
85+
verification = openai.chat.completions.create(
86+
model="gpt-3.5-turbo-0125",
87+
messages=[
88+
{"role": "system", "content": "You are an AI assistant specialized in verifying JSON structures for video creation."},
89+
{"role": "user", "content": verification_prompt}
90+
],
91+
response_format={"type": "json_object"},
92+
max_tokens=2000,
93+
n=1,
94+
temperature=0.3,
95+
)
96+
97+
verification_result = json.loads(verification.choices[0].message.content)
98+
99+
if verification_result["status"] == "corrected":
100+
return {"status": "corrected", "message": "JSON structure corrected.", "data": verification_result["data"]}
101+
else:
102+
return {"status": "feedback", "message": verification_result["message"]}
103+
except json.JSONDecodeError:
104+
return {"status": "error", "message": "Error: Input is not valid JSON. Please provide a valid JSON structure."}
105+
except Exception as e:
106+
return {"status": "error", "message": f"Error during verification: {str(e)}"}
107+
108+
def download_json_template():
109+
return json.dumps(reference_json, indent=2)
110+
111+
def process_result(result):
112+
if isinstance(result, str):
113+
try:
114+
result = eval(result)
115+
except:
116+
return {"status": "error", "message": result}, gr.update(visible=False), None
117+
118+
if result["status"] == "success":
119+
output_message = f"Status: {result['status']}\nMessage: {result['message']}\nOutput Path: {result['output_path']}"
120+
return output_message, gr.update(visible=True), gr.update(value=result['output_path'], visible=True)
121+
else:
122+
return f"Status: {result['status']}\nMessage: {result['message']}", gr.update(visible=False), None
123+
124+
# Update the Gradio interface
125+
with gr.Blocks() as iface:
126+
gr.Markdown("# Mind")
127+
gr.Markdown("Enter instructions for your video or provide a JSON structure directly. The AI will generate and process the video based on the input.")
128+
129+
with gr.Tab("Text Instructions"):
130+
input_text = gr.Textbox(lines=5, label="Enter your video instructions")
131+
generate_button_text = gr.Button("Generate Video from Text", variant="primary")
132+
text_output = gr.Textbox(label="Result")
133+
video_output_text = gr.File(label="Download Generated Video", visible=False)
134+
json_output = gr.Textbox(label="JSON template or Error Message", lines=10)
135+
136+
with gr.Tab("JSON Input"):
137+
json_input = gr.Textbox(lines=10, label="Enter your JSON structure directly")
138+
json_template = gr.File(label="JSON Template", file_count="single", file_types=[".json"])
139+
generate_button_json = gr.Button("Generate Video from JSON", variant="primary")
140+
json_output_result = gr.Textbox(label="Result")
141+
video_output_json = gr.File(label="Download Generated Video", visible=False)
142+
143+
generate_button_text.click(
144+
generate_and_process_video,
145+
inputs=[input_text],
146+
outputs=[text_output, json_output]
147+
).then(
148+
process_result,
149+
inputs=text_output,
150+
outputs=[text_output, generate_button_text, video_output_text]
151+
)
152+
153+
generate_button_json.click(
154+
generate_from_json,
155+
inputs=[json_input],
156+
outputs=json_output_result
157+
).then(
158+
process_result,
159+
inputs=json_output_result,
160+
outputs=[json_output_result, generate_button_json, video_output_json]
161+
)
162+
163+
# Launch the interface
164+
iface.launch()

‎README.md

+15-6
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,19 @@ If you would like to see a world where anyone can make awesome videos, please co
1818

1919
This helps support development and consumption of services like OpenAI, ElevenLabs, etc.
2020

21+
## 📅 26-10-2024: Launched the Alpha version of Mind 🧠, The AI Youtuber.
22+
23+
Read more about it [here](https://docs.turboreelgpt.tech/blog/how-we-built-mind-an-ai-youtuber-that-creates-500-videos-per-day-on-your-laptop2024-10-24/).
24+
2125
## 🚀 Roadmap (not set in stone)
2226

23-
- [ ] Improve story stelling
24-
- [ ] Improve video quality
27+
- [ ] Improve AI agent of Mind 🧠
28+
- [ ] Create some general templates to use with the Json2Video parser.
29+
- [ ] Images and Videos generated by AI
2530
- [ ] Dockerize it
2631
- [ ] Documentation
2732
- [ ] Add translation feature, to reach a wider audience
28-
- [ ] Create more formats (Educational, Product Demo, etc)
29-
- [ ] Find trending audios and videos on the internet and use them to generate videos
30-
- [ ] Video editing software
33+
- [ ] Trending audios and videos on the internet and use them to generate videos
3134

3235
## 💡 Getting Started
3336

@@ -62,12 +65,18 @@ Ready to dive in? Here’s how to get started with TurboReelGPT:
6265
```
6366
6. **Grab Your API Keys**: You’ll need keys for OPENAI (for generating scripts) and PEXELS (for fetching images). Get your PEXELS API key [here](https://www.pexels.com/api/key/).
6467

68+
Sometimes Pexels might not have the image you want, so in that case you can use Pixabay as a backup. Get your Pixabay API key [here](https://pixabay.com/api/docs/).
69+
6570
7. **Set Up Your Config**: Create a `.env` file in the root folder. Clone `.env-example` and fill it in with your OPENAI_API_KEY and PEXELS_API_KEY.
6671

67-
8. **Gradio UI**: Run:
72+
8. **Gradio UI for Reddit and Script Engine**: Run:
6873
```bash
6974
python3 GUI.py
7075
```
76+
8.1 **Gradio UI for Mind 🧠 and Json to Video Engine**: Run:
77+
```bash
78+
python3 MindGUI.py
79+
```
7180
Fill in all the inputs and generate your video!
7281

7382
![GUI Preview](https://drive.google.com/uc?export=view&id=1t_K6zgJrJl5ATv585i1VDF6-YwJ5htI-)

‎src/captions/caption_handler.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ def __init__(self):
1818
self.video_captioner = VideoCaptioner()
1919
self.default_font = "Dacherry.ttf"
2020

21-
async def process(self, audio_file: str, captions_color="white", shadow_color="cyan", font_size=30, font=None):
21+
async def process(self, audio_file: str, captions_color="white", shadow_color="cyan", font_size=60, font=None):
2222
subtitles_file = await self.subtitle_generator.generate_subtitles(audio_file)
2323
caption_clips = self.video_captioner.generate_captions_to_video(
2424
subtitles_file,
25-
font=None,
25+
font=font,
2626
captions_color=captions_color,
2727
shadow_color=shadow_color,
2828
font_size=font_size
15.7 KB
Binary file not shown.

‎src/captions/fonts/DeepShadow.ttf

11.3 KB
Binary file not shown.
33.8 KB
Binary file not shown.

‎src/captions/subtitle_generator.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,22 @@ async def speech_to_text(self, audio_file: str):
5656
current_words.append(word_info.word.strip())
5757

5858
#check if current subtitle is long enough or if the next word is too long
59-
if len(current_words) >= 4 or (i > 0 and word_start_time.ordinal - previous_word_end.ordinal >= 600):
60-
formatted_text = " ".join(current_words[:2]) + "\n" + " ".join(current_words[2:])
59+
if len(current_words) >= 2 or (i > 0 and word_start_time.ordinal - previous_word_end.ordinal >= 600):
60+
#formatted_text = " ".join(current_words[:1]) + "\n" + " ".join(current_words[1:])
61+
formatted_text = " ".join(current_words)
6162
subtitles.append((subtitle_start_time, word_end_time, formatted_text))
6263
current_words = []
6364
subtitle_start_time = None
6465

6566
# Handle any remaining word
6667
if current_words:
67-
formatted_text = " ".join(current_words[:2])
68-
if len(current_words) > 2:
69-
formatted_text += "\n" + " ".join(current_words[2:])
68+
# Old multi-line approach (commented out)
69+
# formatted_text = " ".join(current_words[:1])
70+
# if len(current_words) > 1:
71+
# formatted_text += "\n" + " ".join(current_words[1:])
72+
73+
# New single-line approach
74+
formatted_text = " ".join(current_words)
7075
subtitles.append((subtitle_start_time, word_end_time, formatted_text))
7176

7277
logging.info(f"Speech-to-text transcription completed.")

‎src/captions/video_captioner.py

+14-15
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
1-
from moviepy.editor import TextClip, CompositeVideoClip, vfx
2-
from PIL import Image, ImageFilter
3-
import numpy as np
1+
from moviepy.editor import TextClip, CompositeVideoClip
42
import pysrt
53
import logging
64
import os
7-
from skimage import filters
85

96
class VideoCaptioner:
107
def __init__(self):
@@ -22,34 +19,35 @@ def get_font_path(self, font_name):
2219
return None
2320

2421
def create_shadow_text(self, txt, fontsize, font, color, shadow_color, shadow_offset, blur_color):
25-
# Create the blurred shadow
22+
""" # Create the blurred shadow
2623
blur_size = int(fontsize * 1.08) # 10% larger than the main text
2724
blur_clip = TextClip(txt, fontsize=blur_size, font=font, color=blur_color, size=(1000, None), method='caption')
2825
blur_clip = blur_clip.set_opacity(0.15) # Set the opacity to 15%
29-
26+
"""
3027
# Create the offset shadow
3128
shadow_clip = TextClip(txt, fontsize=fontsize, font=font, color=shadow_color, size=(1000, None), method='caption')
3229
shadow_clip = shadow_clip.set_position((shadow_offset, shadow_offset))
33-
30+
3431
# Create the main text
3532
text_clip = TextClip(txt, fontsize=fontsize, font=font, color=color, size=(1000, None), method='caption')
3633

3734
# Composite all layers
38-
return CompositeVideoClip([blur_clip, shadow_clip, text_clip])
35+
#return CompositeVideoClip([blur_clip, shadow_clip, text_clip])
36+
return CompositeVideoClip([shadow_clip, text_clip])
3937

4038
""" Call this function to generate the captions to video """
4139
def generate_captions_to_video(self,
42-
subtitles_path,
40+
subtitles_path,
41+
font=None,
4342
captions_color='#BA4A00',
4443
shadow_color='white',
45-
font_size=60,
46-
font=None,
44+
font_size=60
4745
):
48-
font = font or self.default_font
46+
font = self.get_font_path(font) if font else self.default_font
4947
try:
5048
subtitles = subtitles_path
5149
subtitle_clips = []
52-
shadow_offset = font_size / 30
50+
shadow_offset = font_size / 10
5351

5452
logging.info(f"Received subtitles: {type(subtitles)}") # Debug log
5553

@@ -65,9 +63,10 @@ def generate_captions_to_video(self,
6563

6664
for subtitle in subtitles:
6765
if isinstance(subtitle, pysrt.SubRipItem):
68-
start_time, end_time, text = subtitle.start, subtitle.end, subtitle.text
66+
start_time, end_time, text = subtitle.start, subtitle.end, subtitle.text.upper()
6967
elif isinstance(subtitle, tuple) and len(subtitle) == 3:
7068
start_time, end_time, text = subtitle
69+
text = text.upper()
7170
else:
7271
logging.warning(f"Skipping invalid subtitle format: {subtitle}")
7372
continue
@@ -89,7 +88,7 @@ def generate_captions_to_video(self,
8988
subtitle_clip = (shadow_text
9089
.set_start(start_seconds)
9190
.set_duration(duration)
92-
.set_position(('center', 0.55), relative=True))
91+
.set_position(('center', 0.4), relative=True))
9392
subtitle_clips.append(subtitle_clip)
9493

9594
logging.info(f"Generated {len(subtitle_clips)} subtitle clips") # Debug log

‎src/image_handler.py

+31
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class ImageHandler:
1515
def __init__(self, pexels_api_key, openai_api_key):
1616
self.pexels_api_key = pexels_api_key
1717
self.openai_api_key = openai_api_key
18+
self.pixabay_api_key = os.getenv('PIXABAY_API_KEY') or ''
1819
self.openai = OpenAI(api_key=self.openai_api_key)
1920
self.base_dir = os.path.dirname(os.path.abspath(__file__))
2021

@@ -45,6 +46,31 @@ def search_pexels_images(self, query):
4546
image_urls = [photo['src']['original'] for photo in search_results.get('photos', [])] # Extract image URLs
4647
return image_urls
4748

49+
def search_pixabay_images(self, query):
50+
"""Search for images using Pixabay API and return the URLs."""
51+
search_url = "https://pixabay.com/api/"
52+
53+
params = {
54+
'key': self.pixabay_api_key,
55+
'q': query,
56+
'image_type': 'all',
57+
'per_page': 3
58+
}
59+
60+
try:
61+
response = requests.get(search_url, params=params)
62+
response.raise_for_status() # Raise an error for bad responses
63+
except requests.exceptions.HTTPError as e:
64+
logging.error(f"HTTP error occurred: {e}") # Log the error
65+
return [] # Return an empty list on error
66+
except Exception as e:
67+
logging.error(f"An error occurred during the request: {e}")
68+
return []
69+
70+
search_results = response.json()
71+
image_urls = [hit['largeImageURL'] for hit in search_results.get('hits', [])] # Extract image URLs
72+
return image_urls
73+
4874
def search_google_images(self, query):
4975
"""Search for images using Google Custom Search API and return the URLs."""
5076
search_url = "https://customsearch.googleapis.com/customsearch/v1?"
@@ -172,6 +198,11 @@ def get_images_from_subtitles(self, subtitles_file_path, video_context, video_du
172198

173199
try:
174200
image_urls = self.search_pexels_images(refined_keyword)
201+
if not image_urls:
202+
image_urls = self.search_pixabay_images(refined_keyword)
203+
logging.info(f"No images found on Pexels, searching on Pixabay: {image_urls}")
204+
if not image_urls:
205+
logging.info(f"No images found on Pixabay")
175206
except Exception as e:
176207
logging.error(f"Error searching for images: {e}")
177208
image_paths.append(None) # Add None for failed image search

‎src/json_2_video/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!.gitignore
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!.gitignore

0 commit comments

Comments
 (0)