Moved protocol conformances

PreternaturalAI · Jan 15, 2025 · 114db0b · 114db0b
1 parent f74b0f5
commit 114db0b
Show file tree

Hide file tree

Showing 18 changed files with 300 additions and 4 deletions.
diff --git a/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift b/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift
@@ -0,0 +1,44 @@
+//
+//  HumeAI+ElevenLabsClientProtocol.swift
+//  Voice
+//
+//  Created by Jared Davidson on 11/22/24.
+//
+
+import Foundation
+import SwiftUI
+import AVFoundation
+import LargeLanguageModels
+
+extension HumeAI.Client: SpeechSynthesisRequestHandling {
+    public func availableVoices() async throws -> [AbstractVoice] {
+        return try await getAllAvailableVoices().map(
+            { voice in
+                return AbstractVoice(
+                    voiceID: voice.id,
+                    name: voice.name,
+                    description: nil
+                )
+        })
+    }
+
+    public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
+        throw HumeAI.APIError.unknown(message: "Text to speech not supported")
+    }
+
+    public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
+        throw HumeAI.APIError.unknown(message: "Speech to speech not supported")
+    }
+
+    public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID {
+        throw HumeAI.APIError.unknown(message: "Voice creation is not supported")
+    }
+
+    public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool {
+        throw HumeAI.APIError.unknown(message: "Voice creation is not supported")
+    }
+
+    public func delete(voice: AbstractVoice.ID) async throws {
+        throw HumeAI.APIError.unknown(message: "Voice creation is not supported")
+    }
+}
diff --git a/...- Move Somewhere Else/AbstractVoice.swift → ...r/AbstractVoice (WIP)/AbstractVoice.swift b/...- Move Somewhere Else/AbstractVoice.swift → ...r/AbstractVoice (WIP)/AbstractVoice.swift
diff --git a/...omewhere Else/AbstractVoiceSettings.swift → ...ctVoice (WIP)/AbstractVoiceSettings.swift b/...omewhere Else/AbstractVoiceSettings.swift → ...ctVoice (WIP)/AbstractVoiceSettings.swift
diff --git a/...Else/SpeechSynthesisRequestHandling.swift → ...WIP)/SpeechSynthesisRequestHandling.swift b/...Else/SpeechSynthesisRequestHandling.swift → ...WIP)/SpeechSynthesisRequestHandling.swift
diff --git a/...Else/VideoGenerationRequestHandling.swift → ...WIP)/VideoGenerationRequestHandling.swift b/...Else/VideoGenerationRequestHandling.swift → ...WIP)/VideoGenerationRequestHandling.swift
@@ -5,7 +5,6 @@
 import AVFoundation
 import Foundation
 import SwiftUI
-import LargeLanguageModels
 
 public protocol VideoGenerationRequestHandling {
     func availableModels() async throws -> [VideoModel]

diff --git a/...s/VideoGenerationSettings.FrameRate.swift → ...)/VideoGenerationSettings.FrameRate.swift b/...s/VideoGenerationSettings.FrameRate.swift → ...)/VideoGenerationSettings.FrameRate.swift
diff --git a/...eoGenerationSettings.MotionSettings.swift → ...eoGenerationSettings.MotionSettings.swift b/...eoGenerationSettings.MotionSettings.swift → ...eoGenerationSettings.MotionSettings.swift
diff --git a/...ngs/VideoGenerationSettings.Quality.swift → ...IP)/VideoGenerationSettings.Quality.swift b/...ngs/VideoGenerationSettings.Quality.swift → ...IP)/VideoGenerationSettings.Quality.swift
diff --git a/.../VideoGenerationSettings.Resolution.swift → .../VideoGenerationSettings.Resolution.swift b/.../VideoGenerationSettings.Resolution.swift → .../VideoGenerationSettings.Resolution.swift
diff --git a/...deoGenerationSettings.StyleStrength.swift → ...deoGenerationSettings.StyleStrength.swift b/...deoGenerationSettings.StyleStrength.swift → ...deoGenerationSettings.StyleStrength.swift
diff --git a/...n Setttings/VideoGenerationSettings.swift → ...ation (WIP)/VideoGenerationSettings.swift b/...n Setttings/VideoGenerationSettings.swift → ...ation (WIP)/VideoGenerationSettings.swift
diff --git a/...IP - Move Somewhere Else/VideoModel.swift → ...ar/VideoGeneration (WIP)/VideoModel.swift b/...IP - Move Somewhere Else/VideoModel.swift → ...ar/VideoGeneration (WIP)/VideoModel.swift
diff --git a/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift b/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift
@@ -6,6 +6,7 @@
 //
 
 import Foundation
+import LargeLanguageModels
 
 extension NeetsAI {
     public struct Voice: Codable, Hashable {
@@ -15,3 +16,24 @@ extension NeetsAI {
         public let supportedModels: [String]
     }
 }
+
+extension NeetsAI.Voice: AbstractVoiceConvertible {
+    public func __conversion() throws -> AbstractVoice {
+        return AbstractVoice(
+            voiceID: self.id,
+            name: self.title ?? "",
+            description: self.aliasOf
+        )
+    }
+}
+
+extension NeetsAI.Voice: AbstractVoiceInitiable {
+    public init(voice: AbstractVoice) throws {
+        self.init(
+            id: voice.voiceID,
+            title: voice.name,
+            aliasOf: voice.description,
+            supportedModels: []
+        )
+    }
+}
diff --git a/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift b/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift
@@ -0,0 +1,41 @@
+//
+//  NeetsAI.Client+SpeechSynthesisRequestHandling.swift
+//  Voice
+//
+
+import Foundation
+import SwiftUI
+import AVFoundation
+import LargeLanguageModels
+
+extension NeetsAI.Client: SpeechSynthesisRequestHandling {
+    public func availableVoices() async throws -> [AbstractVoice] {
+        return try await getAllAvailableVoices().map( { try $0.__conversion() } )
+    }
+
+    public func speech(for text: String, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data {
+        let audio = try await generateSpeech(
+            text: text,
+            voiceId: voiceID,
+            model: .init(rawValue: model) ?? .mistralai
+        )
+        return audio
+    }
+
+    public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data {
+        throw NeetsAI.APIError.unknown(message: "Speech to speech not supported")
+
+    }
+
+    public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> LargeLanguageModels.AbstractVoice.ID {
+        throw NeetsAI.APIError.unknown(message: "Uploading Voice is not supported")
+    }
+
+    public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool {
+        throw NeetsAI.APIError.unknown(message: "Editing Voice is not supported")
+    }
+
+    public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws {
+        throw NeetsAI.APIError.unknown(message: "Deleting Voice is not supported")
+    }
+}
diff --git a/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift b/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift
@@ -0,0 +1,56 @@
+//
+//  PlayHT+SpeechSynthesisRequestHandling.swift
+//  Voice
+//
+//  Created by Jared Davidson on 11/20/24.
+//
+
+import Foundation
+import AI
+import ElevenLabs
+import SwiftUI
+import AVFoundation
+import LargeLanguageModels
+
+extension PlayHT.Client: SpeechSynthesisRequestHandling {
+    public func availableVoices() async throws -> [AbstractVoice] {
+        let voices: [AbstractVoice] = try await getAllAvailableVoices().map { try $0.__conversion() }
+        return voices
+    }
+
+    public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
+        let data: Data = try await streamTextToSpeech(
+            text: text,
+            voice: voiceID,
+            settings: .init(),
+            model: .playHT2Turbo
+        )
+
+        return data
+    }
+
+    public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data {
+        throw PlayHT.APIError.unknown(message: "Speech to speech not supported")
+    }
+
+    public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID {
+        let mp4URL = try await fileURL.convertAudioToMP4()
+        let fileURLString = mp4URL.absoluteString
+        let voiceID = try await instantCloneVoice(
+            sampleFileURL: fileURLString,
+            name: name
+        )
+
+        try? FileManager.default.removeItem(at: mp4URL)
+
+        return .init(rawValue: voiceID.rawValue)
+    }
+
+    public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool {
+        throw PlayHT.APIError.unknown(message: "Voice editing not supported")
+    }
+
+    public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws {
+        try await deleteClonedVoice(voice: .init(rawValue: voice.rawValue))
+    }
+}
diff --git a/Sources/PlayHT/Intramodular/PlayHT.Client.swift b/Sources/PlayHT/Intramodular/PlayHT.Client.swift
@@ -59,14 +59,14 @@ extension PlayHT.Client: CoreMI._ServiceClientProtocol {
 extension PlayHT.Client {
 
     public func getAllAvailableVoices() async throws -> [PlayHT.Voice] {
-        async let htVoices = availableVoices()
-        async let clonedVoices = clonedVoices()
+        async let htVoices = self.getAvailableVoices()
+        async let clonedVoices = self.clonedVoices()
 
         let (available, cloned) = try await (htVoices, clonedVoices)
         return available + cloned
     }
 
-    public func availableVoices() async throws -> [PlayHT.Voice] {
+    public func getAvailableVoices() async throws -> [PlayHT.Voice] {
         try await run(\.listVoices).voices
     }
 

diff --git a/Sources/PlayHT/Intramodular/URL++.swift b/Sources/PlayHT/Intramodular/URL++.swift
@@ -0,0 +1,79 @@
+//
+//  URL++.swift
+//  AI
+//
+//  Created by Jared Davidson on 1/14/25.
+//
+
+import AVFoundation
+import AudioToolbox
+
+// FIXME: - This needs to be moved somewhere else (@archetapp)
+
+extension URL {
+    func convertAudioToMP4() async throws -> URL {
+        let outputURL = FileManager.default.temporaryDirectory
+            .appendingPathComponent(UUID().uuidString)
+            .appendingPathExtension("mp4")
+
+        let asset = AVURLAsset(url: self)
+
+        let composition = AVMutableComposition()
+        guard let compositionTrack = composition.addMutableTrack(
+            withMediaType: .audio,
+            preferredTrackID: kCMPersistentTrackID_Invalid
+        ) else {
+            throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create composition track"])
+        }
+
+        guard let audioTrack = try await asset.loadTracks(withMediaType: .audio).first else {
+            throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "No audio track found"])
+        }
+
+        let timeRange = CMTimeRange(start: .zero, duration: try await asset.load(.duration))
+        for i in 0..<4 {
+            try compositionTrack.insertTimeRange(
+                timeRange,
+                of: audioTrack,
+                at: CMTime(seconds: Double(i) * timeRange.duration.seconds, preferredTimescale: 600)
+            )
+        }
+
+        guard let exportSession = AVAssetExportSession(
+            asset: composition,
+            presetName: AVAssetExportPresetPassthrough
+        ) else {
+            throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create export session"])
+        }
+
+        exportSession.outputURL = outputURL
+        exportSession.outputFileType = AVFileType.mp4
+        exportSession.shouldOptimizeForNetworkUse = true
+
+        // Create a tuple of values we need to check after export
+        try await withCheckedThrowingContinuation { continuation in
+            let mainQueue = DispatchQueue.main
+            exportSession.exportAsynchronously {
+                mainQueue.async {
+                    switch exportSession.status {
+                    case .completed:
+                        continuation.resume()
+                    case .failed:
+                        continuation.resume(throwing: exportSession.error ?? NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export failed"]))
+                    case .cancelled:
+                        continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export cancelled"]))
+                    default:
+                        continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unknown export error"]))
+                    }
+                }
+            }
+        }
+
+        let fileSize = try FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? Int ?? 0
+        if fileSize < 5000 { // 5KB minimum
+            throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Converted file too small"])
+        }
+
+        return outputURL
+    }
+}
diff --git a/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift b/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift
@@ -0,0 +1,55 @@
+//
+//  Rime+SpeechSynthesisRequestHandling.swift
+//  Voice
+//
+//  Created by Jared Davidson on 11/21/24.
+//
+
+import Foundation
+import AI
+import ElevenLabs
+import SwiftUI
+import AVFoundation
+
+extension Rime.Client: SpeechSynthesisRequestHandling {
+    public func availableVoices() async throws -> [AbstractVoice] {
+        return try await getAllAvailableVoiceDetails().map { try $0.__conversion() }
+    }
+
+    public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
+        return try await streamTextToSpeech(
+            text: text,
+            voice: voiceID,
+            outputAudio: .MP3,
+            model: .mist
+        )
+    }
+
+    public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
+        throw Rime.APIError.unknown(message: "Speech to speech not supported")
+    }
+
+    public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID {
+        throw Rime.APIError.unknown(message: "Voice creation is not supported")
+    }
+
+    public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool {
+        throw Rime.APIError.unknown(message: "Voice creation is not supported")
+    }
+
+    public func delete(voice: AbstractVoice.ID) async throws {
+        throw Rime.APIError.unknown(message: "Voice creation is not supported")
+    }
+
+    public func availableVoices() async throws -> [ElevenLabs.Voice] {
+        return try await getAllAvailableVoiceDetails().map { voice in
+            ElevenLabs.Voice(
+                voiceID: voice.name,
+                name: voice.name,
+                description: voice.demographic,
+                isOwner: false
+            )
+        }
+    }
+
+}