From 114db0bd655f650447ad7841aabe89d9aa2229a7 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Tue, 14 Jan 2025 17:08:08 -0700 Subject: [PATCH] Moved protocol conformances --- ...lient+SpeechSynthesisRequestHandling.swift | 44 +++++++++++ .../AbstractVoice.swift | 0 .../AbstractVoiceSettings.swift | 0 .../SpeechSynthesisRequestHandling.swift | 0 .../VideoGenerationRequestHandling.swift | 1 - .../VideoGenerationSettings.FrameRate.swift | 0 ...deoGenerationSettings.MotionSettings.swift | 0 .../VideoGenerationSettings.Quality.swift | 0 .../VideoGenerationSettings.Resolution.swift | 0 ...ideoGenerationSettings.StyleStrength.swift | 0 .../VideoGenerationSettings.swift | 0 .../VideoModel.swift | 0 .../Intramodular/Models/NeetsAI.Voice.swift | 22 ++++++ ...lient+SpeechSynthesisRequestHandling.swift | 41 ++++++++++ ...lient+SpeechSynthesisRequestHandling.swift | 56 +++++++++++++ .../PlayHT/Intramodular/PlayHT.Client.swift | 6 +- Sources/PlayHT/Intramodular/URL++.swift | 79 +++++++++++++++++++ ...lient+SpeechSynthesisRequestHandling.swift | 55 +++++++++++++ 18 files changed, 300 insertions(+), 4 deletions(-) create mode 100644 Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => AbstractVoice (WIP)}/AbstractVoice.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => AbstractVoice (WIP)}/AbstractVoiceSettings.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => AbstractVoice (WIP)}/SpeechSynthesisRequestHandling.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => VideoGeneration (WIP)}/VideoGenerationRequestHandling.swift (98%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.FrameRate.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.MotionSettings.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.Quality.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.Resolution.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.StyleStrength.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else/Video Generation Setttings => VideoGeneration (WIP)}/VideoGenerationSettings.swift (100%) rename Sources/LargeLanguageModels/Intramodular/{WIP - Move Somewhere Else => VideoGeneration (WIP)}/VideoModel.swift (100%) create mode 100644 Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift create mode 100644 Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift create mode 100644 Sources/PlayHT/Intramodular/URL++.swift create mode 100644 Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift diff --git a/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift b/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..cd5a4e8f --- /dev/null +++ b/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,44 @@ +// +// HumeAI+ElevenLabsClientProtocol.swift +// Voice +// +// Created by Jared Davidson on 11/22/24. +// + +import Foundation +import SwiftUI +import AVFoundation +import LargeLanguageModels + +extension HumeAI.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await getAllAvailableVoices().map( + { voice in + return AbstractVoice( + voiceID: voice.id, + name: voice.name, + description: nil + ) + }) + } + + public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + throw HumeAI.APIError.unknown(message: "Text to speech not supported") + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + throw HumeAI.APIError.unknown(message: "Speech to speech not supported") + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { + throw HumeAI.APIError.unknown(message: "Voice creation is not supported") + } + + public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw HumeAI.APIError.unknown(message: "Voice creation is not supported") + } + + public func delete(voice: AbstractVoice.ID) async throws { + throw HumeAI.APIError.unknown(message: "Voice creation is not supported") + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoice.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoice.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoice.swift rename to Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoice.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoiceSettings.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/AbstractVoiceSettings.swift rename to Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/SpeechSynthesisRequestHandling.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/SpeechSynthesisRequestHandling.swift rename to Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/SpeechSynthesisRequestHandling.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationRequestHandling.swift similarity index 98% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationRequestHandling.swift index d9f9ab1c..bc82693e 100644 --- a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoGenerationRequestHandling.swift +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationRequestHandling.swift @@ -5,7 +5,6 @@ import AVFoundation import Foundation import SwiftUI -import LargeLanguageModels public protocol VideoGenerationRequestHandling { func availableModels() async throws -> [VideoModel] diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.FrameRate.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.FrameRate.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.FrameRate.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.MotionSettings.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.MotionSettings.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.MotionSettings.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Quality.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Quality.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Quality.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Resolution.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.Resolution.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Resolution.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.StyleStrength.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.StyleStrength.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.StyleStrength.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/Video Generation Setttings/VideoGenerationSettings.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.swift diff --git a/Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoModel.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoModel.swift similarity index 100% rename from Sources/LargeLanguageModels/Intramodular/WIP - Move Somewhere Else/VideoModel.swift rename to Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoModel.swift diff --git a/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift b/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift index 2f035154..4422720b 100644 --- a/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift +++ b/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift @@ -6,6 +6,7 @@ // import Foundation +import LargeLanguageModels extension NeetsAI { public struct Voice: Codable, Hashable { @@ -15,3 +16,24 @@ extension NeetsAI { public let supportedModels: [String] } } + +extension NeetsAI.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.id, + name: self.title ?? "", + description: self.aliasOf + ) + } +} + +extension NeetsAI.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + id: voice.voiceID, + title: voice.name, + aliasOf: voice.description, + supportedModels: [] + ) + } +} diff --git a/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift b/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..3fa5844b --- /dev/null +++ b/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,41 @@ +// +// NeetsAI.Client+SpeechSynthesisRequestHandling.swift +// Voice +// + +import Foundation +import SwiftUI +import AVFoundation +import LargeLanguageModels + +extension NeetsAI.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await getAllAvailableVoices().map( { try $0.__conversion() } ) + } + + public func speech(for text: String, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { + let audio = try await generateSpeech( + text: text, + voiceId: voiceID, + model: .init(rawValue: model) ?? .mistralai + ) + return audio + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { + throw NeetsAI.APIError.unknown(message: "Speech to speech not supported") + + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> LargeLanguageModels.AbstractVoice.ID { + throw NeetsAI.APIError.unknown(message: "Uploading Voice is not supported") + } + + public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw NeetsAI.APIError.unknown(message: "Editing Voice is not supported") + } + + public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws { + throw NeetsAI.APIError.unknown(message: "Deleting Voice is not supported") + } +} diff --git a/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift b/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..c987b479 --- /dev/null +++ b/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,56 @@ +// +// PlayHT+SpeechSynthesisRequestHandling.swift +// Voice +// +// Created by Jared Davidson on 11/20/24. +// + +import Foundation +import AI +import ElevenLabs +import SwiftUI +import AVFoundation +import LargeLanguageModels + +extension PlayHT.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + let voices: [AbstractVoice] = try await getAllAvailableVoices().map { try $0.__conversion() } + return voices + } + + public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + let data: Data = try await streamTextToSpeech( + text: text, + voice: voiceID, + settings: .init(), + model: .playHT2Turbo + ) + + return data + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { + throw PlayHT.APIError.unknown(message: "Speech to speech not supported") + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { + let mp4URL = try await fileURL.convertAudioToMP4() + let fileURLString = mp4URL.absoluteString + let voiceID = try await instantCloneVoice( + sampleFileURL: fileURLString, + name: name + ) + + try? FileManager.default.removeItem(at: mp4URL) + + return .init(rawValue: voiceID.rawValue) + } + + public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw PlayHT.APIError.unknown(message: "Voice editing not supported") + } + + public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws { + try await deleteClonedVoice(voice: .init(rawValue: voice.rawValue)) + } +} diff --git a/Sources/PlayHT/Intramodular/PlayHT.Client.swift b/Sources/PlayHT/Intramodular/PlayHT.Client.swift index 66e6e80f..eb63bfa8 100644 --- a/Sources/PlayHT/Intramodular/PlayHT.Client.swift +++ b/Sources/PlayHT/Intramodular/PlayHT.Client.swift @@ -59,14 +59,14 @@ extension PlayHT.Client: CoreMI._ServiceClientProtocol { extension PlayHT.Client { public func getAllAvailableVoices() async throws -> [PlayHT.Voice] { - async let htVoices = availableVoices() - async let clonedVoices = clonedVoices() + async let htVoices = self.getAvailableVoices() + async let clonedVoices = self.clonedVoices() let (available, cloned) = try await (htVoices, clonedVoices) return available + cloned } - public func availableVoices() async throws -> [PlayHT.Voice] { + public func getAvailableVoices() async throws -> [PlayHT.Voice] { try await run(\.listVoices).voices } diff --git a/Sources/PlayHT/Intramodular/URL++.swift b/Sources/PlayHT/Intramodular/URL++.swift new file mode 100644 index 00000000..f584da1f --- /dev/null +++ b/Sources/PlayHT/Intramodular/URL++.swift @@ -0,0 +1,79 @@ +// +// URL++.swift +// AI +// +// Created by Jared Davidson on 1/14/25. +// + +import AVFoundation +import AudioToolbox + +// FIXME: - This needs to be moved somewhere else (@archetapp) + +extension URL { + func convertAudioToMP4() async throws -> URL { + let outputURL = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension("mp4") + + let asset = AVURLAsset(url: self) + + let composition = AVMutableComposition() + guard let compositionTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create composition track"]) + } + + guard let audioTrack = try await asset.loadTracks(withMediaType: .audio).first else { + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "No audio track found"]) + } + + let timeRange = CMTimeRange(start: .zero, duration: try await asset.load(.duration)) + for i in 0..<4 { + try compositionTrack.insertTimeRange( + timeRange, + of: audioTrack, + at: CMTime(seconds: Double(i) * timeRange.duration.seconds, preferredTimescale: 600) + ) + } + + guard let exportSession = AVAssetExportSession( + asset: composition, + presetName: AVAssetExportPresetPassthrough + ) else { + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create export session"]) + } + + exportSession.outputURL = outputURL + exportSession.outputFileType = AVFileType.mp4 + exportSession.shouldOptimizeForNetworkUse = true + + // Create a tuple of values we need to check after export + try await withCheckedThrowingContinuation { continuation in + let mainQueue = DispatchQueue.main + exportSession.exportAsynchronously { + mainQueue.async { + switch exportSession.status { + case .completed: + continuation.resume() + case .failed: + continuation.resume(throwing: exportSession.error ?? NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export failed"])) + case .cancelled: + continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export cancelled"])) + default: + continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unknown export error"])) + } + } + } + } + + let fileSize = try FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? Int ?? 0 + if fileSize < 5000 { // 5KB minimum + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Converted file too small"]) + } + + return outputURL + } +} diff --git a/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift b/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..93126293 --- /dev/null +++ b/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,55 @@ +// +// Rime+SpeechSynthesisRequestHandling.swift +// Voice +// +// Created by Jared Davidson on 11/21/24. +// + +import Foundation +import AI +import ElevenLabs +import SwiftUI +import AVFoundation + +extension Rime.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await getAllAvailableVoiceDetails().map { try $0.__conversion() } + } + + public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + return try await streamTextToSpeech( + text: text, + voice: voiceID, + outputAudio: .MP3, + model: .mist + ) + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + throw Rime.APIError.unknown(message: "Speech to speech not supported") + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { + throw Rime.APIError.unknown(message: "Voice creation is not supported") + } + + public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw Rime.APIError.unknown(message: "Voice creation is not supported") + } + + public func delete(voice: AbstractVoice.ID) async throws { + throw Rime.APIError.unknown(message: "Voice creation is not supported") + } + + public func availableVoices() async throws -> [ElevenLabs.Voice] { + return try await getAllAvailableVoiceDetails().map { voice in + ElevenLabs.Voice( + voiceID: voice.name, + name: voice.name, + description: voice.demographic, + isOwner: false + ) + } + } + +}