aprenderia
diff --git a/‎.env-example
+2-1 b/‎.env-example
+2-1
diff --git a/‎MindGUI.py
+164 b/‎MindGUI.py
+164
diff --git a/‎README.md
+15-6 b/‎README.md
+15-6
diff --git a/‎src/captions/caption_handler.py
+2-2 b/‎src/captions/caption_handler.py
+2-2
diff --git a/‎src/captions/fonts/CartoonCheck-Black.ttf
15.7 KB b/‎src/captions/fonts/CartoonCheck-Black.ttf
15.7 KB
diff --git a/‎src/captions/fonts/DeepShadow.ttf
11.3 KB b/‎src/captions/fonts/DeepShadow.ttf
11.3 KB
diff --git a/‎src/captions/fonts/LEMONMILK-Bold.otf
33.8 KB b/‎src/captions/fonts/LEMONMILK-Bold.otf
33.8 KB
diff --git a/‎src/captions/subtitle_generator.py
+10-5 b/‎src/captions/subtitle_generator.py
+10-5
diff --git a/‎src/captions/video_captioner.py
+14-15 b/‎src/captions/video_captioner.py
+14-15
diff --git a/‎src/image_handler.py
+31 b/‎src/image_handler.py
+31
diff --git a/‎src/json_2_video/__init__.py b/‎src/json_2_video/__init__.py
diff --git a/‎src/json_2_video/assets/audios/.gitignore
+2 b/‎src/json_2_video/assets/audios/.gitignore
+2
diff --git a/‎src/json_2_video/assets/images/.gitignore
+2 b/‎src/json_2_video/assets/images/.gitignore
+2
@@ -1,2 +1,3 @@
 OPENAI_API_KEY=sk-proj-ghj...
-PEXELS_API_KEY=hAi...
+PEXELS_API_KEY=hAi...
+PIXABAY_API_KEY=098...
@@ -0,0 +1,164 @@
+import gradio as gr
+import json
+from openai import OpenAI
+import os
+import logging
+from dotenv import load_dotenv
+from src.json_2_video.json_2_video import PyJson2Video  # Import the process_video function
+import asyncio
+import uuid
+
+logging.basicConfig(level=logging.INFO)
+
+load_dotenv()
+
+# Load the reference JSON
+with open('src/json_2_video/tests/json2video_template_clean.json', 'r') as f:
+    reference_json = json.load(f)
+
+# Initialize the OpenAI client
+openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+def generate_from_json(json_input):
+    try:
+        output_filename = f"output_{uuid.uuid4()}.mp4"
+        output_path = os.path.join(os.path.abspath("result"), output_filename)
+        pyjson2video = PyJson2Video(json_input, output_path)
+        output_path = asyncio.run(pyjson2video.convert())
+        return {"status": "success", "message": "Video generated successfully", "output_path": output_path}
+    except Exception as e:
+        return {"status": "error", "message": f"Error processing video: {str(e)}"}
+
+def generate_and_process_video(instructions):
+    try:
+        messages = [
+            {"role": "system", "content": f"""You are an AI assistant that generates JSON structures for video creation based on user instructions. Use the provided reference JSON as a template. Focus on the following key points:
+                1. Generate a script that is at least 100 words long.
+                2. Always synchronize image timings with the script by using dynamic references:
+                    - For start times: use ["script_id"].start_time or ["script_id"].voice_start_time
+                    - For end times: use ["script_id"].end_time or ["script_id"].voice_end_time
+                3. Ensure the JSON structure includes images, text, and script elements.
+                Reference JSON structure for a video:\n\n{json.dumps(reference_json, indent=2)}
+            """},
+            {"role": "user", "content": f"Please generate a similar JSON structure based on the following instructions:\n\n{instructions}"}
+        ]
+
+        response = openai.chat.completions.create(
+            model="gpt-3.5-turbo-0125",
+            messages=messages,
+            max_tokens=2000,
+            n=1,
+            temperature=0.3,
+            response_format={"type": "json_object"}  # Ensure JSON response
+        )
+
+        generated_json = json.loads(response.choices[0].message.content)
+        verification = json_verification(generated_json)
+
+        if verification["status"] == "corrected":
+            generated_json = verification["data"]
+        elif verification["status"] == "feedback":
+            return None, verification["message"]
+
+        output_filename = f"output_{uuid.uuid4()}.mp4"
+        output_path = os.path.join(os.path.abspath("result"), output_filename)
+        pyjson2video = PyJson2Video(generated_json, output_path)
+        output_path = asyncio.run(pyjson2video.convert())
+        
+        return {"status": "success", "message": "Video generated successfully", "output_path": output_path}, json.dumps(generated_json, indent=2)
+    except Exception as e:
+        return {"status": "error", "message": f"Error processing video: {str(e)}"}, None
+
+def json_verification(json_data):
+    try:
+        parsed_json = json.loads(json_data) if isinstance(json_data, str) else json_data
+        verification_prompt = f"""
+        Please verify the following JSON structure for a video creation template:
+        1. Ensure all required elements (images, text, script) are present.
+        2. Verify that the timing is correct and synchronized.
+        3. Check that image and text timings use script_id references (e.g., 'script_id.start_time', 'script_id.end_time') instead of hard-coded numbers.
+        4. Validate that the script is at least 100 words long.
+        Reference JSON structure:\n{json.dumps(reference_json, indent=2)}
+        JSON structure to verify:\n{json.dumps(parsed_json, indent=2)}
+        """
+
+        verification = openai.chat.completions.create(
+            model="gpt-3.5-turbo-0125",
+            messages=[
+                {"role": "system", "content": "You are an AI assistant specialized in verifying JSON structures for video creation."},
+                {"role": "user", "content": verification_prompt}
+            ],
+            response_format={"type": "json_object"},
+            max_tokens=2000,
+            n=1,
+            temperature=0.3,
+        )
+        
+        verification_result = json.loads(verification.choices[0].message.content)
+
+        if verification_result["status"] == "corrected":
+            return {"status": "corrected", "message": "JSON structure corrected.", "data": verification_result["data"]}
+        else:
+            return {"status": "feedback", "message": verification_result["message"]}
+    except json.JSONDecodeError:
+        return {"status": "error", "message": "Error: Input is not valid JSON. Please provide a valid JSON structure."}
+    except Exception as e:
+        return {"status": "error", "message": f"Error during verification: {str(e)}"}
+
+def download_json_template():
+    return json.dumps(reference_json, indent=2)
+
+def process_result(result):
+    if isinstance(result, str):
+        try:
+            result = eval(result)
+        except:
+            return {"status": "error", "message": result}, gr.update(visible=False), None
+
+    if result["status"] == "success":
+        output_message = f"Status: {result['status']}\nMessage: {result['message']}\nOutput Path: {result['output_path']}"
+        return output_message, gr.update(visible=True), gr.update(value=result['output_path'], visible=True)
+    else:
+        return f"Status: {result['status']}\nMessage: {result['message']}", gr.update(visible=False), None
+
+# Update the Gradio interface
+with gr.Blocks() as iface:
+    gr.Markdown("# Mind")
+    gr.Markdown("Enter instructions for your video or provide a JSON structure directly. The AI will generate and process the video based on the input.")
+    
+    with gr.Tab("Text Instructions"):
+        input_text = gr.Textbox(lines=5, label="Enter your video instructions")
+        generate_button_text = gr.Button("Generate Video from Text", variant="primary")
+        text_output = gr.Textbox(label="Result")
+        video_output_text = gr.File(label="Download Generated Video", visible=False)
+        json_output = gr.Textbox(label="JSON template or Error Message", lines=10)
+    
+    with gr.Tab("JSON Input"):
+        json_input = gr.Textbox(lines=10, label="Enter your JSON structure directly")
+        json_template = gr.File(label="JSON Template", file_count="single", file_types=[".json"])
+        generate_button_json = gr.Button("Generate Video from JSON", variant="primary")
+        json_output_result = gr.Textbox(label="Result")
+        video_output_json = gr.File(label="Download Generated Video", visible=False)
+    
+    generate_button_text.click(
+        generate_and_process_video, 
+        inputs=[input_text], 
+        outputs=[text_output, json_output]
+    ).then(
+        process_result,
+        inputs=text_output,
+        outputs=[text_output, generate_button_text, video_output_text]
+    )
+
+    generate_button_json.click(
+        generate_from_json, 
+        inputs=[json_input], 
+        outputs=json_output_result
+    ).then(
+        process_result,
+        inputs=json_output_result,
+        outputs=[json_output_result, generate_button_json, video_output_json]
+    )
+
+# Launch the interface
+iface.launch()
@@ -18,16 +18,19 @@ If you would like to see a world where anyone can make awesome videos, please co
 
 This helps support development and consumption of services like OpenAI, ElevenLabs, etc.
 
+## 📅 26-10-2024: Launched the Alpha version of Mind 🧠, The AI Youtuber.
+
+Read more about it [here](https://docs.turboreelgpt.tech/blog/how-we-built-mind-an-ai-youtuber-that-creates-500-videos-per-day-on-your-laptop2024-10-24/).
+
 ## 🚀 Roadmap (not set in stone)
 
-- [ ] Improve story stelling
-- [ ] Improve video quality
+- [ ] Improve AI agent of Mind 🧠
+- [ ] Create some general templates to use with the Json2Video parser.
+- [ ] Images and Videos generated by AI
 - [ ] Dockerize it
 - [ ] Documentation
 - [ ] Add translation feature, to reach a wider audience
-- [ ] Create more formats (Educational, Product Demo, etc)
-- [ ] Find trending audios and videos on the internet and use them to generate videos
-- [ ] Video editing software
+- [ ] Trending audios and videos on the internet and use them to generate videos
 
 ## 💡 Getting Started
 
@@ -62,12 +65,18 @@ Ready to dive in? Here’s how to get started with TurboReelGPT:
    ```
 6. **Grab Your API Keys**: You’ll need keys for OPENAI (for generating scripts) and PEXELS (for fetching images). Get your PEXELS API key [here](https://www.pexels.com/api/key/).
 
+Sometimes Pexels might not have the image you want, so in that case you can use Pixabay as a backup. Get your Pixabay API key [here](https://pixabay.com/api/docs/).
+
 7. **Set Up Your Config**: Create a `.env` file in the root folder. Clone `.env-example` and fill it in with your OPENAI_API_KEY and PEXELS_API_KEY.
 
-8. **Gradio UI**: Run:
+8. **Gradio UI for Reddit and Script Engine**: Run:
    ```bash
    python3 GUI.py
    ```
+8.1 **Gradio UI for Mind 🧠 and Json to Video Engine**: Run:
+   ```bash
+   python3 MindGUI.py
+   ```
 Fill in all the inputs and generate your video!
 
 ![GUI Preview](https://drive.google.com/uc?export=view&id=1t_K6zgJrJl5ATv585i1VDF6-YwJ5htI-)
 
@@ -18,11 +18,11 @@ def __init__(self):
         self.video_captioner = VideoCaptioner()
         self.default_font = "Dacherry.ttf"
 
-    async def process(self, audio_file: str, captions_color="white", shadow_color="cyan", font_size=30, font=None):
+    async def process(self, audio_file: str, captions_color="white", shadow_color="cyan", font_size=60, font=None):
         subtitles_file = await self.subtitle_generator.generate_subtitles(audio_file)
         caption_clips = self.video_captioner.generate_captions_to_video(
             subtitles_file,
-            font=None,
+            font=font,
             captions_color=captions_color,
             shadow_color=shadow_color,
             font_size=font_size
 
@@ -56,17 +56,22 @@ async def speech_to_text(self, audio_file: str):
                 current_words.append(word_info.word.strip())
 
                 #check if current subtitle is long enough or if the next word is too long
-                if len(current_words) >= 4 or (i > 0 and word_start_time.ordinal - previous_word_end.ordinal >= 600):
-                    formatted_text = " ".join(current_words[:2]) + "\n" + " ".join(current_words[2:])
+                if len(current_words) >= 2 or (i > 0 and word_start_time.ordinal - previous_word_end.ordinal >= 600):
+                    #formatted_text = " ".join(current_words[:1]) + "\n" + " ".join(current_words[1:])
+                    formatted_text = " ".join(current_words)
                     subtitles.append((subtitle_start_time, word_end_time, formatted_text))
                     current_words = []
                     subtitle_start_time = None
 
             # Handle any remaining word
             if current_words:
-                formatted_text = " ".join(current_words[:2])
-                if len(current_words) > 2:
-                    formatted_text += "\n" + " ".join(current_words[2:])
+                # Old multi-line approach (commented out)
+                # formatted_text = " ".join(current_words[:1])
+                # if len(current_words) > 1:
+                #     formatted_text += "\n" + " ".join(current_words[1:])
+                
+                # New single-line approach
+                formatted_text = " ".join(current_words)
                 subtitles.append((subtitle_start_time, word_end_time, formatted_text))
 
             logging.info(f"Speech-to-text transcription completed.")
 
@@ -1,10 +1,7 @@
-from moviepy.editor import TextClip, CompositeVideoClip, vfx
-from PIL import Image, ImageFilter
-import numpy as np
+from moviepy.editor import TextClip, CompositeVideoClip
 import pysrt
 import logging
 import os
-from skimage import filters
 
 class VideoCaptioner:
     def __init__(self):
@@ -22,34 +19,35 @@ def get_font_path(self, font_name):
             return None
 
     def create_shadow_text(self, txt, fontsize, font, color, shadow_color, shadow_offset, blur_color):
-        # Create the blurred shadow
+        """ # Create the blurred shadow
         blur_size = int(fontsize * 1.08)  # 10% larger than the main text
         blur_clip = TextClip(txt, fontsize=blur_size, font=font, color=blur_color, size=(1000, None), method='caption')
         blur_clip = blur_clip.set_opacity(0.15)  # Set the opacity to 15%
-        
+         """
         # Create the offset shadow
         shadow_clip = TextClip(txt, fontsize=fontsize, font=font, color=shadow_color, size=(1000, None), method='caption')
         shadow_clip = shadow_clip.set_position((shadow_offset, shadow_offset))
-        
+
         # Create the main text
         text_clip = TextClip(txt, fontsize=fontsize, font=font, color=color, size=(1000, None), method='caption')
 
         # Composite all layers
-        return CompositeVideoClip([blur_clip, shadow_clip, text_clip])
+        #return CompositeVideoClip([blur_clip, shadow_clip, text_clip])
+        return CompositeVideoClip([shadow_clip, text_clip])
 
     """ Call this function to generate the captions to video """
     def generate_captions_to_video(self, 
-                                   subtitles_path, 
+                                   subtitles_path,
+                                   font=None, 
                                    captions_color='#BA4A00', 
                                    shadow_color='white',
-                                   font_size=60,
-                                   font=None,
+                                   font_size=60
                                    ):
-        font = font or self.default_font
+        font = self.get_font_path(font) if font else self.default_font
         try:
             subtitles = subtitles_path
             subtitle_clips = []
-            shadow_offset = font_size / 30
+            shadow_offset = font_size / 10
 
             logging.info(f"Received subtitles: {type(subtitles)}")  # Debug log
 
@@ -65,9 +63,10 @@ def generate_captions_to_video(self,
 
             for subtitle in subtitles:
                 if isinstance(subtitle, pysrt.SubRipItem):
-                    start_time, end_time, text = subtitle.start, subtitle.end, subtitle.text
+                    start_time, end_time, text = subtitle.start, subtitle.end, subtitle.text.upper()
                 elif isinstance(subtitle, tuple) and len(subtitle) == 3:
                     start_time, end_time, text = subtitle
+                    text = text.upper()
                 else:
                     logging.warning(f"Skipping invalid subtitle format: {subtitle}")
                     continue
@@ -89,7 +88,7 @@ def generate_captions_to_video(self,
                 subtitle_clip = (shadow_text
                                  .set_start(start_seconds)
                                  .set_duration(duration)
-                                 .set_position(('center', 0.55), relative=True))
+                                 .set_position(('center', 0.4), relative=True))
                 subtitle_clips.append(subtitle_clip)
 
             logging.info(f"Generated {len(subtitle_clips)} subtitle clips")  # Debug log
 
@@ -15,6 +15,7 @@ class ImageHandler:
     def __init__(self, pexels_api_key, openai_api_key):
         self.pexels_api_key = pexels_api_key
         self.openai_api_key = openai_api_key
+        self.pixabay_api_key = os.getenv('PIXABAY_API_KEY') or ''
         self.openai = OpenAI(api_key=self.openai_api_key)
         self.base_dir = os.path.dirname(os.path.abspath(__file__))
 
@@ -45,6 +46,31 @@ def search_pexels_images(self, query):
         image_urls = [photo['src']['original'] for photo in search_results.get('photos', [])]  # Extract image URLs
         return image_urls
 
+    def search_pixabay_images(self, query):
+        """Search for images using Pixabay API and return the URLs."""
+        search_url = "https://pixabay.com/api/"
+        
+        params = {
+            'key': self.pixabay_api_key,
+            'q': query,
+            'image_type': 'all',
+            'per_page': 3
+        }
+        
+        try:
+            response = requests.get(search_url, params=params)
+            response.raise_for_status()  # Raise an error for bad responses
+        except requests.exceptions.HTTPError as e:
+            logging.error(f"HTTP error occurred: {e}")  # Log the error
+            return []  # Return an empty list on error
+        except Exception as e:
+            logging.error(f"An error occurred during the request: {e}")
+            return []
+
+        search_results = response.json()
+        image_urls = [hit['largeImageURL'] for hit in search_results.get('hits', [])]  # Extract image URLs
+        return image_urls
+
     def search_google_images(self, query):
         """Search for images using Google Custom Search API and return the URLs."""
         search_url = "https://customsearch.googleapis.com/customsearch/v1?"
@@ -172,6 +198,11 @@ def get_images_from_subtitles(self, subtitles_file_path, video_context, video_du
 
             try:
                 image_urls = self.search_pexels_images(refined_keyword)
+                if not image_urls:
+                    image_urls = self.search_pixabay_images(refined_keyword)
+                    logging.info(f"No images found on Pexels, searching on Pixabay: {image_urls}")
+                if not image_urls:
+                    logging.info(f"No images found on Pixabay")
             except Exception as e:
                 logging.error(f"Error searching for images: {e}")
                 image_paths.append(None)  # Add None for failed image search
 
@@ -0,0 +1,2 @@
+*
+!.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore