vocodedev · HHousen · Jun 1, 2023 · Jun 1, 2023 · Jun 2, 2023 · Jun 3, 2023
diff --git a/apps/discord_bot/bot.py b/apps/discord_bot/bot.py
@@ -0,0 +1,215 @@
+import os
+import io
+import logging
+import asyncio
+from functools import partial
+import threading
+
+import discord
+from discord.sinks import Sink, Filters, default_filters, AudioData
+from discord.ext import tasks
+
+from vocode.streaming.transcriber import DeepgramTranscriber, GoogleTranscriber
+from vocode.streaming.models.audio_encoding import AudioEncoding
+from vocode.streaming.models.transcriber import DeepgramTranscriberConfig, GoogleTranscriberConfig
+from pydub import AudioSegment
+from vocode.streaming.models.agent import ChatGPTAgentConfig
+from vocode.streaming.agent import ChatGPTAgent
+from vocode.streaming.synthesizer import AzureSynthesizer, StreamElementsSynthesizer
+from vocode.streaming.models.synthesizer import AzureSynthesizerConfig, StreamElementsSynthesizerConfig
+from vocode.streaming.models.transcriber import PunctuationEndpointingConfig
+from discord_output_device import DiscordOutputDevice
+from discord_streaming_conversation import DiscordStreamingConversation
+
+bot = discord.Bot(debug_guilds=[1113610683737722913])
+connections = {}
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+conversation = None
+discord_output = None
+IDLE_TIME_SECONDS = 10
+
+
+import wave
+
+# f = wave.open("receive.wav", "wb")
+# f.setnchannels(1)
+# f.setsampwidth(2)
+# f.setframerate(44100)
+
+import time
+class VocodeSink(Sink):
+    @Filters.container
+    def write(self, data, user):
+        # if user not in self.audio_data:
+        #     file = io.BytesIO()
+        #     self.audio_data.update({user: AudioData(file)})
+
+        # file = self.audio_data[user]
+        # file.write(data)
+        data = AudioSegment.from_raw(
+            io.BytesIO(data), sample_width=2, frame_rate=48000, channels=2
+        )
+        # convert data audio segment to wav with 1 channel and frame rate 44100
+        data = data.set_channels(1)
+        data = data.set_frame_rate(44100)
+        data = data.set_sample_width(2)
+        raw = io.BytesIO()
+        data.export(raw, format="raw")
+        # f.writeframes(raw.getvalue())
+        raw.seek(0)
+        # print(time.time())
+        conversation.receive_audio(raw.read())
+        # transcriber.send_audio(raw)
+        # write the data to a wave file
+
+    def format_audio(self, *args, **kwargs):
+        pass
+
+
+async def finished_callback(
+    sink, channel: discord.TextChannel, conversation=None, *args
+):
+    recorded_users = [f"<@{user_id}>" for user_id, audio in sink.audio_data.items()]
+    await sink.vc.disconnect()
+    # files = [
+    #     discord.File(audio.file, f"{user_id}.{sink.encoding}")
+    #     for user_id, audio in sink.audio_data.items()
+    # ]
+    if conversation.active:
+        conversation.terminate()
+    else:
+        await channel.send("Leaving voice channel due to lack of voice activity.")
+    await channel.send(
+        f"Finished! Recorded audio for {', '.join(recorded_users)}.",  # files=files
+    )
+
+def create_loop_in_thread(loop: asyncio.AbstractEventLoop, long_running_task=None):
+    asyncio.set_event_loop(loop)
+    if long_running_task:
+        loop.run_until_complete(long_running_task)
+    else:
+        loop.run_forever()
+
+async def start_conversation():
+    global conversation, discord_output
+    discord_output = DiscordOutputDevice(None)
+    # transcriber = GoogleTranscriber(
+    #     GoogleTranscriberConfig(
+    #         sampling_rate=44100,
+    #         audio_encoding=AudioEncoding.LINEAR16,
+    #         chunk_size=2048,
+    #     )
+    # )
+    transcriber = DeepgramTranscriber(
+        DeepgramTranscriberConfig(
+            sampling_rate=44100,
+            audio_encoding=AudioEncoding.LINEAR16,
+            chunk_size=2048,
+            endpointing_config=PunctuationEndpointingConfig(),
+        )
+    )
+    agent = ChatGPTAgent(
+        ChatGPTAgentConfig(
+            initial_message=None,
+            prompt_preamble="""The AI is having a pleasant conversation about life""",
+            allowed_idle_time_seconds=IDLE_TIME_SECONDS,
+            generate_responses=False,
+        )
+    )
+    # synthesizer = AzureSynthesizer(
+    #     AzureSynthesizerConfig.from_output_device(discord_output)
+    # )
+    synthesizer = StreamElementsSynthesizer(
+        StreamElementsSynthesizerConfig.from_output_device(discord_output)
+    )
+
+    conversation = DiscordStreamingConversation(
+        output_device=discord_output,
+        transcriber=transcriber,
+        agent=agent,
+        synthesizer=synthesizer,
+        logger=logger,
+    )
+    print("Conversation created")
+
+    await conversation.start()
+
+create_loop_in_thread(asyncio.new_event_loop(), start_conversation())
+
+@bot.command()
+async def start(ctx: discord.ApplicationContext):
+    """Record your voice!"""
+    global conversation, discord_output
+
+    voice = ctx.author.voice
+
+    if not voice:
+        return await ctx.respond("You're not in a vc right now")
+
+    vc = await voice.channel.connect()
+    discord_output.init(vc)
+    conversation.provide_discord_connection(vc, connections)
+    connections.update({ctx.guild.id: vc})
+
+    vc.start_recording(
+        VocodeSink(),
+        partial(finished_callback, conversation=conversation),
+        ctx.channel,
+    )
+    # asyncio.create_task(print_transcription_task())
+
+    await ctx.respond("The recording has started!")
+
+
+@bot.command()
+async def stop(ctx: discord.ApplicationContext):
+    """Stop recording."""
+    if ctx.guild.id in connections:
+        vc = connections[ctx.guild.id]
+        vc.stop_recording()
+        del connections[ctx.guild.id]
+        await ctx.delete()
+    else:
+        await ctx.respond("Not recording in this guild.")
+
+
+@bot.event
+async def on_ready():
+    pass
+
+
+# async def main():
+# transcriber.start()
+# async def print_transcription_task():
+#     while True:
+#         transcription = await transcriber.output_queue.get()
+#         print(transcription)
+
+# asyncio.create_task(print_transcription_task())
+
+bot.run(os.environ["DISCORD_BOT_TOKEN"])
+
+# if __name__ == "__main__":
+#     main()
+
+
+# @client.tree.command()
+# async def join(interaction: discord.Interaction):
+#     """Joins the voice channel you are in."""
+#     # get the name of the voice channel that the user is in
+#     voice_channel = interaction.user.voice
+#     if voice_channel is None:
+#         await interaction.response.send_message('You are not in a voice channel.')
+#     else:
+#         channel = interaction.user.voice.channel
+#         await interaction.response.send_message(f'You are in {channel.name}')
+#         voice_client = await channel.connect()
+#         source = discord.PCMVolumeTransformer(discord.FFmpegPCMAudio("test.wav"))
+#         voice_client.play(source, after=lambda e: print(f'Player error: {e}') if e else None)
+#         while voice_client.is_playing():
+#             await asyncio.sleep(1)
+#         await voice_client.disconnect()
diff --git a/apps/discord_bot/bot_rec.py b/apps/discord_bot/bot_rec.py
@@ -0,0 +1,65 @@
+from enum import Enum
+
+import discord
+
+bot = discord.Bot(debug_guilds=[1113610683737722913])
+connections = {}
+
+
+class Sinks(Enum):
+    mp3 = discord.sinks.MP3Sink()
+    wav = discord.sinks.WaveSink()
+    pcm = discord.sinks.PCMSink()
+    ogg = discord.sinks.OGGSink()
+    mka = discord.sinks.MKASink()
+    mkv = discord.sinks.MKVSink()
+    mp4 = discord.sinks.MP4Sink()
+    m4a = discord.sinks.M4ASink()
+
+
+async def finished_callback(sink, channel: discord.TextChannel, *args):
+    recorded_users = [f"<@{user_id}>" for user_id, audio in sink.audio_data.items()]
+    await sink.vc.disconnect()
+    files = [
+        discord.File(audio.file, f"{user_id}.{sink.encoding}")
+        for user_id, audio in sink.audio_data.items()
+    ]
+    await channel.send(
+        f"Finished! Recorded audio for {', '.join(recorded_users)}.", files=files
+    )
+
+
+@bot.command()
+async def start(ctx: discord.ApplicationContext, sink: Sinks):
+    """Record your voice!"""
+    voice = ctx.author.voice
+
+    if not voice:
+        return await ctx.respond("You're not in a vc right now")
+
+    vc = await voice.channel.connect()
+    connections.update({ctx.guild.id: vc})
+
+    vc.start_recording(
+        sink.value,
+        finished_callback,
+        ctx.channel,
+    )
+
+    await ctx.respond("The recording has started!")
+
+
+@bot.command()
+async def stop(ctx: discord.ApplicationContext):
+    """Stop recording."""
+    if ctx.guild.id in connections:
+        vc = connections[ctx.guild.id]
+        vc.stop_recording()
+        del connections[ctx.guild.id]
+        await ctx.delete()
+    else:
+        await ctx.respond("Not recording in this guild.")
+
+
+import os
+bot.run(os.environ["DISCORD_BOT_TOKEN"])
diff --git a/apps/discord_bot/discord_output_device.py b/apps/discord_bot/discord_output_device.py
@@ -0,0 +1,109 @@
+import io
+from pydub import AudioSegment
+import discord
+from janus import Queue
+import asyncio
+import numpy as np
+
+from vocode.streaming.output_device.base_output_device import BaseOutputDevice
+from vocode.streaming.models.audio_encoding import AudioEncoding
+from vocode.streaming.utils.worker import ThreadAsyncWorker
+
+
+class PlayFuncWriterWorker(ThreadAsyncWorker):
+    def __init__(self, input_queue: Queue, vc) -> None:
+        super().__init__(input_queue)
+        self.ready_to_send = True
+        self.vc = vc
+
+    def play_audio(self, buffer):
+        data = AudioSegment.from_raw(
+            io.BytesIO(buffer), sample_width=2, frame_rate=44100, channels=1
+        )
+        data = data.set_channels(2)
+        data = data.set_frame_rate(48000)
+        data = data.set_sample_width(2)
+        raw = io.BytesIO()
+        data.export(raw, format="raw")
+        audio = discord.PCMAudio(raw)
+
+        def after(error):
+            if error:
+                print(f"Player error: {error}")
+            self.ready_to_send = True
+
+        self.vc.play(discord.PCMVolumeTransformer(audio), after=after)
+
+    def _run_loop(self):
+        while True:
+            try:
+                if self.ready_to_send:
+                    self.ready_to_send = False
+                    block = self.input_janus_queue.sync_q.get()
+                    self.play_audio(block)
+            except asyncio.CancelledError:
+                return
+
+
+class DiscordOutputDevice(BaseOutputDevice):
+    DEFAULT_SAMPLING_RATE = 44100
+
+    def __init__(
+        self,
+        play_func,
+        sampling_rate: int = DEFAULT_SAMPLING_RATE,
+        audio_encoding: AudioEncoding = AudioEncoding.LINEAR16,
+    ):
+        super().__init__(sampling_rate, audio_encoding)
+        self.blocksize = self.sampling_rate
+        self.queue: Queue[np.ndarray] = Queue()
+        # self.thread_worker = None
+        self.process_task = asyncio.create_task(self.process())
+        self.ready_to_send = True
+
+    async def process(self):
+        while True:
+            if self.ready_to_send:
+                self.ready_to_send = False
+                block = await self.queue.async_q.get()
+                self.play_audio(block)
+
+    def play_audio(self, buffer):
+        data = AudioSegment.from_raw(
+            io.BytesIO(buffer), sample_width=2, frame_rate=44100, channels=1
+        )
+        data = data.set_channels(2)
+        data = data.set_frame_rate(48000)
+        data = data.set_sample_width(2)
+        raw = io.BytesIO()
+        data.export(raw, format="raw")
+        audio = discord.PCMAudio(raw)
+
+        def after(error):
+            if error:
+                print(f"Player error: {error}")
+            self.ready_to_send = True
+
+        self.vc.play(discord.PCMVolumeTransformer(audio), after=after)
+
+    def consume_nonblocking(self, chunk):
+        # if self.thread_worker is None:
+        #     raise RuntimeError(
+        #         "Output device not inited. Call discord_output.init(voice_channel) first."
+        #     )
+
+        chunk_arr = np.frombuffer(chunk, dtype=np.int16)
+        for i in range(0, chunk_arr.shape[0], self.blocksize):
+            block = np.zeros(self.blocksize, dtype=np.int16)
+            size = min(self.blocksize, chunk_arr.shape[0] - i)
+            block[:size] = chunk_arr[i : i + size]
+            self.queue.sync_q.put_nowait(block)
+
+    def init(self, vc):
+        self.vc = vc
+        # self.thread_worker = PlayFuncWriterWorker(self.queue, vc)
+        # self.thread_worker.start()
+        pass
+
+    def terminate(self):
+        self.thread_worker.terminate()