Skip to content

Commit

Permalink
Moved protocol conformances
Browse files Browse the repository at this point in the history
  • Loading branch information
Archetapp committed Jan 15, 2025
1 parent f74b0f5 commit 114db0b
Show file tree
Hide file tree
Showing 18 changed files with 300 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//
// HumeAI+ElevenLabsClientProtocol.swift
// Voice
//
// Created by Jared Davidson on 11/22/24.
//

import Foundation
import SwiftUI
import AVFoundation
import LargeLanguageModels

extension HumeAI.Client: SpeechSynthesisRequestHandling {
public func availableVoices() async throws -> [AbstractVoice] {
return try await getAllAvailableVoices().map(
{ voice in
return AbstractVoice(
voiceID: voice.id,
name: voice.name,
description: nil
)
})
}

public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
throw HumeAI.APIError.unknown(message: "Text to speech not supported")
}

public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
throw HumeAI.APIError.unknown(message: "Speech to speech not supported")
}

public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID {
throw HumeAI.APIError.unknown(message: "Voice creation is not supported")
}

public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool {
throw HumeAI.APIError.unknown(message: "Voice creation is not supported")
}

public func delete(voice: AbstractVoice.ID) async throws {
throw HumeAI.APIError.unknown(message: "Voice creation is not supported")
}
}
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import AVFoundation
import Foundation
import SwiftUI
import LargeLanguageModels

public protocol VideoGenerationRequestHandling {
func availableModels() async throws -> [VideoModel]
Expand Down
22 changes: 22 additions & 0 deletions Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//

import Foundation
import LargeLanguageModels

extension NeetsAI {
public struct Voice: Codable, Hashable {
Expand All @@ -15,3 +16,24 @@ extension NeetsAI {
public let supportedModels: [String]
}
}

extension NeetsAI.Voice: AbstractVoiceConvertible {
public func __conversion() throws -> AbstractVoice {
return AbstractVoice(
voiceID: self.id,
name: self.title ?? "",
description: self.aliasOf
)
}
}

extension NeetsAI.Voice: AbstractVoiceInitiable {
public init(voice: AbstractVoice) throws {
self.init(
id: voice.voiceID,
title: voice.name,
aliasOf: voice.description,
supportedModels: []
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//
// NeetsAI.Client+SpeechSynthesisRequestHandling.swift
// Voice
//

import Foundation
import SwiftUI
import AVFoundation
import LargeLanguageModels

extension NeetsAI.Client: SpeechSynthesisRequestHandling {
public func availableVoices() async throws -> [AbstractVoice] {
return try await getAllAvailableVoices().map( { try $0.__conversion() } )
}

public func speech(for text: String, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data {
let audio = try await generateSpeech(
text: text,
voiceId: voiceID,
model: .init(rawValue: model) ?? .mistralai
)
return audio
}

public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data {
throw NeetsAI.APIError.unknown(message: "Speech to speech not supported")

}

public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> LargeLanguageModels.AbstractVoice.ID {
throw NeetsAI.APIError.unknown(message: "Uploading Voice is not supported")
}

public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool {
throw NeetsAI.APIError.unknown(message: "Editing Voice is not supported")
}

public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws {
throw NeetsAI.APIError.unknown(message: "Deleting Voice is not supported")
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//
// PlayHT+SpeechSynthesisRequestHandling.swift
// Voice
//
// Created by Jared Davidson on 11/20/24.
//

import Foundation
import AI
import ElevenLabs
import SwiftUI
import AVFoundation
import LargeLanguageModels

extension PlayHT.Client: SpeechSynthesisRequestHandling {
public func availableVoices() async throws -> [AbstractVoice] {
let voices: [AbstractVoice] = try await getAllAvailableVoices().map { try $0.__conversion() }
return voices
}

public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
let data: Data = try await streamTextToSpeech(
text: text,
voice: voiceID,
settings: .init(),
model: .playHT2Turbo
)

return data
}

public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data {
throw PlayHT.APIError.unknown(message: "Speech to speech not supported")
}

public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID {
let mp4URL = try await fileURL.convertAudioToMP4()
let fileURLString = mp4URL.absoluteString
let voiceID = try await instantCloneVoice(
sampleFileURL: fileURLString,
name: name
)

try? FileManager.default.removeItem(at: mp4URL)

return .init(rawValue: voiceID.rawValue)
}

public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool {
throw PlayHT.APIError.unknown(message: "Voice editing not supported")
}

public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws {
try await deleteClonedVoice(voice: .init(rawValue: voice.rawValue))
}
}
6 changes: 3 additions & 3 deletions Sources/PlayHT/Intramodular/PlayHT.Client.swift
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ extension PlayHT.Client: CoreMI._ServiceClientProtocol {
extension PlayHT.Client {

public func getAllAvailableVoices() async throws -> [PlayHT.Voice] {
async let htVoices = availableVoices()
async let clonedVoices = clonedVoices()
async let htVoices = self.getAvailableVoices()
async let clonedVoices = self.clonedVoices()

let (available, cloned) = try await (htVoices, clonedVoices)
return available + cloned
}

public func availableVoices() async throws -> [PlayHT.Voice] {
public func getAvailableVoices() async throws -> [PlayHT.Voice] {
try await run(\.listVoices).voices
}

Expand Down
79 changes: 79 additions & 0 deletions Sources/PlayHT/Intramodular/URL++.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
//
// URL++.swift
// AI
//
// Created by Jared Davidson on 1/14/25.
//

import AVFoundation
import AudioToolbox

// FIXME: - This needs to be moved somewhere else (@archetapp)

extension URL {
func convertAudioToMP4() async throws -> URL {
let outputURL = FileManager.default.temporaryDirectory
.appendingPathComponent(UUID().uuidString)
.appendingPathExtension("mp4")

let asset = AVURLAsset(url: self)

let composition = AVMutableComposition()
guard let compositionTrack = composition.addMutableTrack(
withMediaType: .audio,
preferredTrackID: kCMPersistentTrackID_Invalid
) else {
throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create composition track"])
}

guard let audioTrack = try await asset.loadTracks(withMediaType: .audio).first else {
throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "No audio track found"])
}

let timeRange = CMTimeRange(start: .zero, duration: try await asset.load(.duration))
for i in 0..<4 {
try compositionTrack.insertTimeRange(
timeRange,
of: audioTrack,
at: CMTime(seconds: Double(i) * timeRange.duration.seconds, preferredTimescale: 600)
)
}

guard let exportSession = AVAssetExportSession(
asset: composition,
presetName: AVAssetExportPresetPassthrough
) else {
throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create export session"])
}

exportSession.outputURL = outputURL
exportSession.outputFileType = AVFileType.mp4
exportSession.shouldOptimizeForNetworkUse = true

// Create a tuple of values we need to check after export
try await withCheckedThrowingContinuation { continuation in
let mainQueue = DispatchQueue.main
exportSession.exportAsynchronously {
mainQueue.async {
switch exportSession.status {
case .completed:
continuation.resume()
case .failed:
continuation.resume(throwing: exportSession.error ?? NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export failed"]))
case .cancelled:
continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export cancelled"]))
default:
continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unknown export error"]))
}
}
}
}

let fileSize = try FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? Int ?? 0
if fileSize < 5000 { // 5KB minimum
throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Converted file too small"])
}

return outputURL
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//
// Rime+SpeechSynthesisRequestHandling.swift
// Voice
//
// Created by Jared Davidson on 11/21/24.
//

import Foundation
import AI
import ElevenLabs
import SwiftUI
import AVFoundation

extension Rime.Client: SpeechSynthesisRequestHandling {
public func availableVoices() async throws -> [AbstractVoice] {
return try await getAllAvailableVoiceDetails().map { try $0.__conversion() }
}

public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
return try await streamTextToSpeech(
text: text,
voice: voiceID,
outputAudio: .MP3,
model: .mist
)
}

public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data {
throw Rime.APIError.unknown(message: "Speech to speech not supported")
}

public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID {
throw Rime.APIError.unknown(message: "Voice creation is not supported")
}

public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool {
throw Rime.APIError.unknown(message: "Voice creation is not supported")
}

public func delete(voice: AbstractVoice.ID) async throws {
throw Rime.APIError.unknown(message: "Voice creation is not supported")
}

public func availableVoices() async throws -> [ElevenLabs.Voice] {
return try await getAllAvailableVoiceDetails().map { voice in
ElevenLabs.Voice(
voiceID: voice.name,
name: voice.name,
description: voice.demographic,
isOwner: false
)
}
}

}

0 comments on commit 114db0b

Please sign in to comment.