From 676ef05101efad6d5f253406df5563b7870ecd33 Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Thu, 2 Jan 2025 08:15:53 -0700 Subject: [PATCH 1/3] language support for playht --- Package.resolved | 191 +++++++++++++++++- ...layHT.APISpecification.RequestBodies.swift | 19 +- .../API/PlayHT.APISpecification.swift | 7 +- .../Intramodular/Models/PlayHT.Voice.swift | 21 ++ .../PlayHT/Intramodular/PlayHT.Client.swift | 32 +-- .../PlayHT/Intramodular/PlayHT.Model.swift | 4 + 6 files changed, 236 insertions(+), 38 deletions(-) diff --git a/Package.resolved b/Package.resolved index 44c6cdec..23ea1ed3 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,6 +1,33 @@ { - "originHash" : "094840915419b625ed8a43083bdf164ab8d3f6bbb7fda2dcec07cb5e55a2b736", + "originHash" : "a7afdc02c33e043aef77037e39006816570e8a6bd0c65d3499a410c01f230282", "pins" : [ + { + "identity" : "abseil-cpp-binary", + "kind" : "remoteSourceControl", + "location" : "https://github.com/google/abseil-cpp-binary.git", + "state" : { + "revision" : "194a6706acbd25e4ef639bcaddea16e8758a3e27", + "version" : "1.2024011602.0" + } + }, + { + "identity" : "app-check", + "kind" : "remoteSourceControl", + "location" : "https://github.com/google/app-check.git", + "state" : { + "revision" : "61b85103a1aeed8218f17c794687781505fbbef5", + "version" : "11.2.0" + } + }, + { + "identity" : "chatkit", + "kind" : "remoteSourceControl", + "location" : "https://github.com/PreternaturalAI/ChatKit.git", + "state" : { + "branch" : "main", + "revision" : "8b19deb1b0f74f091fec9a8c4755998a00f2b4cb" + } + }, { "identity" : "corepersistence", "kind" : "remoteSourceControl", @@ -10,6 +37,87 @@ "revision" : "3fc10b8e55c3be60ca4695200cecfc046c0ba29a" } }, + { + "identity" : "fal", + "kind" : "remoteSourceControl", + "location" : "https://github.com/preternatural-fork/Fal", + "state" : { + "revision" : "a58ca8a926a56a69ba3c454583f626b3629a4223", + "version" : "0.5.6" + } + }, + { + "identity" : "firebase-ios-sdk", + "kind" : "remoteSourceControl", + "location" : "https://github.com/firebase/firebase-ios-sdk.git", + "state" : { + "revision" : "2e02253fd1ce99145bcbf1bb367ccf61bd0ca46b", + "version" : "11.6.0" + } + }, + { + "identity" : "googleappmeasurement", + "kind" : "remoteSourceControl", + "location" : "https://github.com/google/GoogleAppMeasurement.git", + "state" : { + "revision" : "4f234bcbdae841d7015258fbbf8e7743a39b8200", + "version" : "11.4.0" + } + }, + { + "identity" : "googledatatransport", + "kind" : "remoteSourceControl", + "location" : "https://github.com/google/GoogleDataTransport.git", + "state" : { + "revision" : "617af071af9aa1d6a091d59a202910ac482128f9", + "version" : "10.1.0" + } + }, + { + "identity" : "googleutilities", + "kind" : "remoteSourceControl", + "location" : "https://github.com/google/GoogleUtilities.git", + "state" : { + "revision" : "53156c7ec267db846e6b64c9f4c4e31ba4cf75eb", + "version" : "8.0.2" + } + }, + { + "identity" : "grpc-binary", + "kind" : "remoteSourceControl", + "location" : "https://github.com/google/grpc-binary.git", + "state" : { + "revision" : "f56d8fc3162de9a498377c7b6cea43431f4f5083", + "version" : "1.65.1" + } + }, + { + "identity" : "gtm-session-fetcher", + "kind" : "remoteSourceControl", + "location" : "https://github.com/google/gtm-session-fetcher.git", + "state" : { + "revision" : "5cfe5f090c982de9c58605d2a82a4fc77b774fbd", + "version" : "4.1.0" + } + }, + { + "identity" : "interop-ios-for-google-sdks", + "kind" : "remoteSourceControl", + "location" : "https://github.com/google/interop-ios-for-google-sdks.git", + "state" : { + "revision" : "2d12673670417654f08f5f90fdd62926dc3a2648", + "version" : "100.0.0" + } + }, + { + "identity" : "leveldb", + "kind" : "remoteSourceControl", + "location" : "https://github.com/firebase/leveldb.git", + "state" : { + "revision" : "a0bc79961d7be727d258d33d5a6b2f1023270ba1", + "version" : "1.22.5" + } + }, { "identity" : "merge", "kind" : "remoteSourceControl", @@ -19,6 +127,15 @@ "revision" : "4bc71ce650b79b3dbe1a26acf7e54b29d750e0b6" } }, + { + "identity" : "nanopb", + "kind" : "remoteSourceControl", + "location" : "https://github.com/firebase/nanopb.git", + "state" : { + "revision" : "b7e1104502eca3a213b46303391ca4d3bc8ddec1", + "version" : "2.30910.0" + } + }, { "identity" : "networkkit", "kind" : "remoteSourceControl", @@ -28,6 +145,24 @@ "revision" : "8daa1ba22e5d18e1b8e469d5a0dd0c58b675eb87" } }, + { + "identity" : "promises", + "kind" : "remoteSourceControl", + "location" : "https://github.com/google/promises.git", + "state" : { + "revision" : "540318ecedd63d883069ae7f1ed811a2df00b6ac", + "version" : "2.4.0" + } + }, + { + "identity" : "sideproject", + "kind" : "remoteSourceControl", + "location" : "https://github.com/PreternaturalAI/Sideproject", + "state" : { + "branch" : "main", + "revision" : "82a434ea0c586612c0facb18108948547e24b9ee" + } + }, { "identity" : "swallow", "kind" : "remoteSourceControl", @@ -46,6 +181,24 @@ "version" : "1.1.4" } }, + { + "identity" : "swift-msgpack", + "kind" : "remoteSourceControl", + "location" : "https://github.com/nnabeyang/swift-msgpack.git", + "state" : { + "revision" : "7843723ab63aae2d7fa3b30a86cd1da578a441a3", + "version" : "0.6.0" + } + }, + { + "identity" : "swift-protobuf", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-protobuf.git", + "state" : { + "revision" : "ebc7251dd5b37f627c93698e4374084d98409633", + "version" : "1.28.2" + } + }, { "identity" : "swift-syntax", "kind" : "remoteSourceControl", @@ -64,6 +217,15 @@ "revision" : "3e47cc5f9b0cefe9ed1d0971aff22583bd9ac7b0" } }, + { + "identity" : "swiftui-introspect", + "kind" : "remoteSourceControl", + "location" : "https://github.com/siteline/SwiftUI-Introspect", + "state" : { + "revision" : "121c146fe591b1320238d054ae35c81ffa45f45a", + "version" : "0.12.0" + } + }, { "identity" : "swiftuix", "kind" : "remoteSourceControl", @@ -72,6 +234,33 @@ "branch" : "master", "revision" : "50e2aacd7b124ffb5d06b4bfa5a4f255052a559b" } + }, + { + "identity" : "swiftuiz", + "kind" : "remoteSourceControl", + "location" : "https://github.com/SwiftUIX/SwiftUIZ.git", + "state" : { + "branch" : "main", + "revision" : "194190e802249ba05e02903d06471eaff024caa0" + } + }, + { + "identity" : "swipeactions", + "kind" : "remoteSourceControl", + "location" : "https://github.com/aheze/SwipeActions", + "state" : { + "revision" : "41e6f6dce02d8cfa164f8c5461a41340850ca3ab", + "version" : "1.1.0" + } + }, + { + "identity" : "zipfoundation", + "kind" : "remoteSourceControl", + "location" : "https://github.com/weichsel/ZIPFoundation", + "state" : { + "revision" : "02b6abe5f6eef7e3cbd5f247c5cc24e246efcfe0", + "version" : "0.9.19" + } } ], "version" : 3 diff --git a/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.RequestBodies.swift b/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.RequestBodies.swift index 79ddf26e..563b91c1 100644 --- a/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.RequestBodies.swift +++ b/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.RequestBodies.swift @@ -15,7 +15,7 @@ extension PlayHT.APISpecification { public let text: String public let voice: String public let voiceEngine: PlayHT.Model - public let quality: String +// public let quality: String public let outputFormat: String // public let speed: Double? @@ -26,10 +26,11 @@ extension PlayHT.APISpecification { // public let voiceGuidance: Double? // public let styleGuidance: Double? // public let textGuidance: Double? - // public let language: String? + public let language: String? // private enum CodingKeys: String, CodingKey { - case text, voice, quality + case text, voice +// case quality case voiceEngine = "voice_engine" case outputFormat = "output_format" // case speed @@ -38,15 +39,15 @@ extension PlayHT.APISpecification { // case voiceGuidance = "voice_guidance" // case styleGuidance = "style_guidance" // case textGuidance = "text_guidance" - // case language + case language } public init( text: String, voice: String, voiceEngine: PlayHT.Model = .playHT2, - quality: String = "medium", - outputFormat: String = "mp3" +// quality: String = "medium", + outputFormat: String = "mp3", // speed: Double? = nil, // sampleRate: Int? = 48000, // seed: Int? = nil, @@ -55,12 +56,12 @@ extension PlayHT.APISpecification { // voiceGuidance: Double? = nil, // styleGuidance: Double? = nil, // textGuidance: Double? = nil, - // language: String? = nil + language: String? = nil ) { self.text = text self.voice = voice self.voiceEngine = voiceEngine - self.quality = quality +// self.quality = quality self.outputFormat = outputFormat // self.speed = speed // self.sampleRate = sampleRate @@ -70,7 +71,7 @@ extension PlayHT.APISpecification { // self.voiceGuidance = voiceGuidance // self.styleGuidance = styleGuidance // self.textGuidance = textGuidance - // self.language = language + self.language = language } } diff --git a/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.swift b/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.swift index 525ef06d..bbe1e997 100644 --- a/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.swift +++ b/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.swift @@ -71,7 +71,7 @@ extension PlayHT { @POST @Path("/tts/stream") @Body(json: \.input, keyEncodingStrategy: .convertToSnakeCase) - var streamTextToSpeech = Endpoint() + var streamTextToSpeech = Endpoint() @GET @Path("/cloned-voices") @@ -107,10 +107,10 @@ extension PlayHT.APISpecification { from: input, context: context ) - + request = request .header("X-USER-ID", context.root.configuration.userId) - .header("accept", "application/json") + .header(.accept(.mpeg)) .header("AUTHORIZATION", context.root.configuration.apiKey) .header(.contentType(.json)) @@ -122,6 +122,7 @@ extension PlayHT.APISpecification { context: DecodeOutputContext ) throws -> Output { do { + dump(response) try response.validate() } catch { let apiError: Error diff --git a/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift b/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift index 3ac8907f..c9708c13 100644 --- a/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift +++ b/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift @@ -31,6 +31,27 @@ extension PlayHT { case id, name, language, languageCode, voiceEngine, isCloned case gender, accent, age, style, sample, texture, loudness, tempo } + + public init( + id: String, + name: String, + language: String + ) { + self.id = .init(rawValue: id) + self.name = name + self.language = language + self.languageCode = nil + self.voiceEngine = "" + self.isCloned = nil + self.gender = nil + self.accent = nil + self.age = nil + self.style = nil + self.sample = nil + self.texture = nil + self.loudness = nil + self.tempo = nil + } // Add custom decoding if needed to handle any special cases public init(from decoder: Decoder) throws { diff --git a/Sources/PlayHT/Intramodular/PlayHT.Client.swift b/Sources/PlayHT/Intramodular/PlayHT.Client.swift index 66e6e80f..e98849ee 100644 --- a/Sources/PlayHT/Intramodular/PlayHT.Client.swift +++ b/Sources/PlayHT/Intramodular/PlayHT.Client.swift @@ -63,7 +63,7 @@ extension PlayHT.Client { async let clonedVoices = clonedVoices() let (available, cloned) = try await (htVoices, clonedVoices) - return available + cloned + return cloned } public func availableVoices() async throws -> [PlayHT.Voice] { @@ -78,6 +78,7 @@ extension PlayHT.Client { text: String, voice: String, settings: PlayHT.VoiceSettings, + language: String, outputSettings: PlayHT.OutputSettings = .default, model: PlayHT.Model ) async throws -> Data { @@ -86,33 +87,14 @@ extension PlayHT.Client { text: text, voice: voice, voiceEngine: model, - quality: outputSettings.quality.rawValue, - outputFormat: outputSettings.format.rawValue +// quality: outputSettings.quality.rawValue, + outputFormat: outputSettings.format.rawValue, + language: language ) let responseData = try await run(\.streamTextToSpeech, with: input) - - guard let url = URL(string: responseData.href) else { - throw PlayHTError.invalidURL - } - - var request = URLRequest(url: url) - request.httpMethod = "GET" - request.addValue("application/json", forHTTPHeaderField: "Content-Type") - request.addValue(interface.configuration.userId ?? "", forHTTPHeaderField: "X-USER-ID") - request.addValue(interface.configuration.apiKey ?? "", forHTTPHeaderField: "AUTHORIZATION") - - let (audioData, response) = try await URLSession.shared.data(for: request) - - guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else { - throw PlayHTError.audioFetchFailed - } - - guard !audioData.isEmpty else { - throw PlayHTError.audioFetchFailed - } - - return audioData + + return responseData } diff --git a/Sources/PlayHT/Intramodular/PlayHT.Model.swift b/Sources/PlayHT/Intramodular/PlayHT.Model.swift index 9eeb0a28..20579363 100644 --- a/Sources/PlayHT/Intramodular/PlayHT.Model.swift +++ b/Sources/PlayHT/Intramodular/PlayHT.Model.swift @@ -18,6 +18,10 @@ extension PlayHT { case playHT1 = "PlayHT1.0" case playHT2Turbo = "PlayHT2.0-turbo" + + case play3_0Mini = "Play3.0-mini" + + case playDialog = "PlayDialog" } } From 520073a4811c58ce98baf1d8c038e454a0f6172c Mon Sep 17 00:00:00 2001 From: "Jared Davidson (Archetapp)" Date: Wed, 8 Jan 2025 18:11:17 -0700 Subject: [PATCH 2/3] ElevenLabs+Dubbing --- ...nLabs.APISpecification.RequestBodies.swift | 185 ++++++++++++++++++ ...Labs.APISpecification.ResponseBodies.swift | 23 +++ .../API/ElevenLabs.APISpecification.swift | 18 ++ .../ElevenLabs.Client+Dubbing.swift | 83 ++++++++ .../Intramodular/ElevenLabs.Model.swift | 2 + .../Models/ElevenLabs.DubbingOptions.swift | 33 ++++ .../Models/ElevenLabs.DubbingProgress.swift | 16 ++ .../Models/ElevenLabs.DubbingResult.swift | 16 ++ .../{ => Models}/ElevenLabs.Voice.swift | 0 .../ElevenLabs.VoiceSettings.swift | 0 SwallowUI | 1 + 11 files changed, 377 insertions(+) create mode 100644 Sources/ElevenLabs/Intramodular/ElevenLabs.Client+Dubbing.swift create mode 100644 Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingOptions.swift create mode 100644 Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingProgress.swift create mode 100644 Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingResult.swift rename Sources/ElevenLabs/Intramodular/{ => Models}/ElevenLabs.Voice.swift (100%) rename Sources/ElevenLabs/Intramodular/{ => Models}/ElevenLabs.VoiceSettings.swift (100%) create mode 160000 SwallowUI diff --git a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.RequestBodies.swift b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.RequestBodies.swift index 3404e48e..1b72b81b 100644 --- a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.RequestBodies.swift +++ b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.RequestBodies.swift @@ -14,6 +14,7 @@ extension ElevenLabs.APISpecification { enum RequestBodies { public struct SpeechRequest: Codable, Hashable, Equatable { public let text: String + public let languageCode: String? public let voiceSettings: ElevenLabs.VoiceSettings public let model: ElevenLabs.Model @@ -21,14 +22,17 @@ extension ElevenLabs.APISpecification { case text case voiceSettings = "voice_settings" case model = "model_id" + case languageCode = "language_code" } public init( text: String, + languageCode: String?, voiceSettings: ElevenLabs.VoiceSettings, model: ElevenLabs.Model ) { self.text = text + self.languageCode = languageCode self.voiceSettings = voiceSettings self.model = model } @@ -47,17 +51,20 @@ extension ElevenLabs.APISpecification { public struct SpeechToSpeechInput: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible, Equatable { public let voiceId: String public let audioURL: URL + public let languageCode: String? public let model: ElevenLabs.Model public let voiceSettings: ElevenLabs.VoiceSettings public init( voiceId: String, audioURL: URL, + languageCode: String?, model: ElevenLabs.Model, voiceSettings: ElevenLabs.VoiceSettings ) { self.voiceId = voiceId self.audioURL = audioURL + self.languageCode = languageCode self.model = model self.voiceSettings = voiceSettings } @@ -72,6 +79,15 @@ extension ElevenLabs.APISpecification { ) ) + if let languageCode { + result.append( + .text( + named: "language_code", + value: languageCode + ) + ) + } + let encoder = JSONEncoder() encoder.keyEncodingStrategy = .convertToSnakeCase if let voiceSettingsData = try? encoder.encode(voiceSettings), @@ -201,5 +217,174 @@ extension ElevenLabs.APISpecification { return result } } + + public struct DubbingRequest: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible { + public let name: String? + public let sourceURL: URL? + public let sourceLang: String? + public let targetLang: String + public let numSpeakers: Int? + public let watermark: Bool? + public let startTime: Int? + public let endTime: Int? + public let highestResolution: Bool? + public let dropBackgroundAudio: Bool? + public let useProfanityFilter: Bool? + public let fileData: Data? + + public init( + name: String? = nil, + sourceURL: URL? = nil, + sourceLang: String? = nil, + targetLang: String, + numSpeakers: Int? = nil, + watermark: Bool? = nil, + startTime: Int? = nil, + endTime: Int? = nil, + highestResolution: Bool? = nil, + dropBackgroundAudio: Bool? = nil, + useProfanityFilter: Bool? = nil, + fileData: Data? = nil + ) { + self.name = name + self.sourceURL = sourceURL + self.sourceLang = sourceLang + self.targetLang = targetLang + self.numSpeakers = numSpeakers + self.watermark = watermark + self.startTime = startTime + self.endTime = endTime + self.highestResolution = highestResolution + self.dropBackgroundAudio = dropBackgroundAudio + self.useProfanityFilter = useProfanityFilter + self.fileData = fileData + } + + public func __conversion() throws -> HTTPRequest.Multipart.Content { + var result = HTTPRequest.Multipart.Content() + + if let name { + result.append(.text(named: "name", value: name)) + } + + if let sourceURL { + result.append(.text(named: "source_url", value: sourceURL.absoluteString)) + } + + if let sourceLang { + result.append(.text(named: "source_lang", value: sourceLang)) + } + + result.append(.text(named: "target_lang", value: targetLang)) + + if let numSpeakers { + result.append(.text(named: "num_speakers", value: String(numSpeakers))) + } + + if let watermark { + result.append(.text(named: "watermark", value: String(watermark))) + } + + if let startTime { + result.append(.text(named: "start_time", value: String(startTime))) + } + + if let endTime { + result.append(.text(named: "end_time", value: String(endTime))) + } + + if let highestResolution { + result.append(.text(named: "highest_resolution", value: String(highestResolution))) + } + + if let dropBackgroundAudio { + result.append(.text(named: "drop_background_audio", value: String(dropBackgroundAudio))) + } + + if let useProfanityFilter { + result.append(.text(named: "use_profanity_filter", value: String(useProfanityFilter))) + } + + if let fileData { + result.append( + .file( + named: "file", + data: fileData, + filename: "input.mp4", + contentType: .mp4 + ) + ) + } + + return result + } + } + public struct DubbingInput: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible { + public let voiceId: String + public let audioURL: URL + public let languageCode: String + public let model: ElevenLabs.Model + public let voiceSettings: ElevenLabs.VoiceSettings + + public init( + voiceId: String, + audioURL: URL, + languageCode: String, + model: ElevenLabs.Model, + voiceSettings: ElevenLabs.VoiceSettings + ) { + self.voiceId = voiceId + self.audioURL = audioURL + self.languageCode = languageCode + self.model = model + self.voiceSettings = voiceSettings + } + + public func __conversion() throws -> HTTPRequest.Multipart.Content { + var result = HTTPRequest.Multipart.Content() + + result.append( + .text( + named: "model_id", + value: model.rawValue + ) + ) + + result.append( + .text( + named: "language_code", + value: languageCode + ) + ) + + let encoder = JSONEncoder() + encoder.keyEncodingStrategy = .convertToSnakeCase + if let voiceSettingsData = try? encoder.encode(voiceSettings), + let voiceSettingsString = String( + data: voiceSettingsData, + encoding: .utf8 + ) { + result.append( + .text( + named: "voice_settings", + value: voiceSettingsString + ) + ) + } + + if let fileData = try? Data(contentsOf: audioURL) { + result.append( + .file( + named: "audio", + data: fileData, + filename: audioURL.lastPathComponent, + contentType: .mpeg + ) + ) + } + + return result + } + } } } diff --git a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.ResponseBodies.swift b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.ResponseBodies.swift index b80abbd5..658d8de4 100644 --- a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.ResponseBodies.swift +++ b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.ResponseBodies.swift @@ -16,5 +16,28 @@ extension ElevenLabs.APISpecification { public struct VoiceID: Codable { public let voiceId: String } + + public struct DubbingResponse: Codable { + public let dubbingId: String + public let expectedDurationSec: Double + } + + public struct DubbingStatus: Codable { + public enum State: String, Codable { + case processing + case completed + case failed + } + + public let state: State + public let failure_reason: String? + public let progress: Double? + } + + public struct DubbingProgress: Codable { + public let status: DubbingStatus + public let expectedDuration: TimeInterval + public let dubbingId: String + } } } diff --git a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift index fa442c34..c520694a 100644 --- a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift +++ b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift @@ -94,6 +94,24 @@ extension ElevenLabs { "/v1/voices/\(context.input)" }) var deleteVoice = Endpoint() + + // Dubbing + @POST + @Path("/v1/dubbing") + @Body(multipart: .input) + var initiateDubbing = Endpoint() + + @GET + @Path({ context -> String in + "/v1/dubbing/\(context.input)/status" + }) + var getDubbingStatus = Endpoint() + + @GET + @Path({ context -> String in + "/v1/dubbing/\(context.input)" + }) + var getDubbingResult = Endpoint() } } diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.Client+Dubbing.swift b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client+Dubbing.swift new file mode 100644 index 00000000..35ca928c --- /dev/null +++ b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client+Dubbing.swift @@ -0,0 +1,83 @@ +// +// ElevenLabs.Client+Dubbing.swift +// AI +// +// Created by Jared Davidson on 1/7/25. +// + +import Foundation + +extension ElevenLabs.Client { + public func dub( + fileData: Data? = nil, + sourceURL: URL? = nil, + name: String? = nil, + sourceLang: String? = nil, + targetLang: String,app + numSpeakers: Int? = nil, + options: DubbingOptions = .init(), + progress: @escaping (DubbingProgress) async -> Void + ) async throws -> DubbingResult { + guard fileData != nil || sourceURL != nil else { + throw NSError(domain: "ElevenLabs", code: -1, userInfo: [ + NSLocalizedDescriptionKey: "Either fileData or sourceURL must be provided" + ]) + } + + let request = ElevenLabs.APISpecification.RequestBodies.DubbingRequest( + name: name, + sourceURL: sourceURL, + sourceLang: sourceLang, + targetLang: targetLang, + numSpeakers: numSpeakers, + watermark: options.watermark, + startTime: options.startTime, + endTime: options.endTime, + highestResolution: options.highestResolution, + dropBackgroundAudio: options.dropBackgroundAudio, + useProfanityFilter: options.useProfanityFilter, + fileData: fileData + ) + + // Start dubbing process + let response = try await run(\.initiateDubbing, with: request) + let dubbingId = response.dubbingId + let expectedDuration = response.expectedDurationSec + + // Poll for status + let pollingInterval: TimeInterval = 5 // seconds + let maxAttempts = Int(ceil(expectedDuration / pollingInterval)) + 10 // Add some buffer attempts + + for _ in 0.. Date: Wed, 8 Jan 2025 18:19:50 -0700 Subject: [PATCH 3/3] update --- Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift index 2093a3ea..b33cb1e4 100644 --- a/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift +++ b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift @@ -42,11 +42,13 @@ extension ElevenLabs.Client { public func speech( for text: String, voiceID: String, + languageCode: String?, voiceSettings: ElevenLabs.VoiceSettings, model: ElevenLabs.Model ) async throws -> Data { let requestBody = ElevenLabs.APISpecification.RequestBodies.SpeechRequest( text: text, + languageCode: languageCode, voiceSettings: voiceSettings, model: model ) @@ -57,12 +59,14 @@ extension ElevenLabs.Client { public func speechToSpeech( inputAudioURL: URL, voiceID: String, + languageCode: String?, voiceSettings: ElevenLabs.VoiceSettings, model: ElevenLabs.Model ) async throws -> Data { let input = ElevenLabs.APISpecification.RequestBodies.SpeechToSpeechInput( voiceId: voiceID, audioURL: inputAudioURL, + languageCode: languageCode, model: model, voiceSettings: voiceSettings )