-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
300 additions
and
4 deletions.
There are no files selected for viewing
44 changes: 44 additions & 0 deletions
44
Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// | ||
// HumeAI+ElevenLabsClientProtocol.swift | ||
// Voice | ||
// | ||
// Created by Jared Davidson on 11/22/24. | ||
// | ||
|
||
import Foundation | ||
import SwiftUI | ||
import AVFoundation | ||
import LargeLanguageModels | ||
|
||
extension HumeAI.Client: SpeechSynthesisRequestHandling { | ||
public func availableVoices() async throws -> [AbstractVoice] { | ||
return try await getAllAvailableVoices().map( | ||
{ voice in | ||
return AbstractVoice( | ||
voiceID: voice.id, | ||
name: voice.name, | ||
description: nil | ||
) | ||
}) | ||
} | ||
|
||
public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { | ||
throw HumeAI.APIError.unknown(message: "Text to speech not supported") | ||
} | ||
|
||
public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { | ||
throw HumeAI.APIError.unknown(message: "Speech to speech not supported") | ||
} | ||
|
||
public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { | ||
throw HumeAI.APIError.unknown(message: "Voice creation is not supported") | ||
} | ||
|
||
public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { | ||
throw HumeAI.APIError.unknown(message: "Voice creation is not supported") | ||
} | ||
|
||
public func delete(voice: AbstractVoice.ID) async throws { | ||
throw HumeAI.APIError.unknown(message: "Voice creation is not supported") | ||
} | ||
} |
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// | ||
// NeetsAI.Client+SpeechSynthesisRequestHandling.swift | ||
// Voice | ||
// | ||
|
||
import Foundation | ||
import SwiftUI | ||
import AVFoundation | ||
import LargeLanguageModels | ||
|
||
extension NeetsAI.Client: SpeechSynthesisRequestHandling { | ||
public func availableVoices() async throws -> [AbstractVoice] { | ||
return try await getAllAvailableVoices().map( { try $0.__conversion() } ) | ||
} | ||
|
||
public func speech(for text: String, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { | ||
let audio = try await generateSpeech( | ||
text: text, | ||
voiceId: voiceID, | ||
model: .init(rawValue: model) ?? .mistralai | ||
) | ||
return audio | ||
} | ||
|
||
public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { | ||
throw NeetsAI.APIError.unknown(message: "Speech to speech not supported") | ||
|
||
} | ||
|
||
public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> LargeLanguageModels.AbstractVoice.ID { | ||
throw NeetsAI.APIError.unknown(message: "Uploading Voice is not supported") | ||
} | ||
|
||
public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { | ||
throw NeetsAI.APIError.unknown(message: "Editing Voice is not supported") | ||
} | ||
|
||
public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws { | ||
throw NeetsAI.APIError.unknown(message: "Deleting Voice is not supported") | ||
} | ||
} |
56 changes: 56 additions & 0 deletions
56
Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
// | ||
// PlayHT+SpeechSynthesisRequestHandling.swift | ||
// Voice | ||
// | ||
// Created by Jared Davidson on 11/20/24. | ||
// | ||
|
||
import Foundation | ||
import AI | ||
import ElevenLabs | ||
import SwiftUI | ||
import AVFoundation | ||
import LargeLanguageModels | ||
|
||
extension PlayHT.Client: SpeechSynthesisRequestHandling { | ||
public func availableVoices() async throws -> [AbstractVoice] { | ||
let voices: [AbstractVoice] = try await getAllAvailableVoices().map { try $0.__conversion() } | ||
return voices | ||
} | ||
|
||
public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { | ||
let data: Data = try await streamTextToSpeech( | ||
text: text, | ||
voice: voiceID, | ||
settings: .init(), | ||
model: .playHT2Turbo | ||
) | ||
|
||
return data | ||
} | ||
|
||
public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { | ||
throw PlayHT.APIError.unknown(message: "Speech to speech not supported") | ||
} | ||
|
||
public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { | ||
let mp4URL = try await fileURL.convertAudioToMP4() | ||
let fileURLString = mp4URL.absoluteString | ||
let voiceID = try await instantCloneVoice( | ||
sampleFileURL: fileURLString, | ||
name: name | ||
) | ||
|
||
try? FileManager.default.removeItem(at: mp4URL) | ||
|
||
return .init(rawValue: voiceID.rawValue) | ||
} | ||
|
||
public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { | ||
throw PlayHT.APIError.unknown(message: "Voice editing not supported") | ||
} | ||
|
||
public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws { | ||
try await deleteClonedVoice(voice: .init(rawValue: voice.rawValue)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
// | ||
// URL++.swift | ||
// AI | ||
// | ||
// Created by Jared Davidson on 1/14/25. | ||
// | ||
|
||
import AVFoundation | ||
import AudioToolbox | ||
|
||
// FIXME: - This needs to be moved somewhere else (@archetapp) | ||
|
||
extension URL { | ||
func convertAudioToMP4() async throws -> URL { | ||
let outputURL = FileManager.default.temporaryDirectory | ||
.appendingPathComponent(UUID().uuidString) | ||
.appendingPathExtension("mp4") | ||
|
||
let asset = AVURLAsset(url: self) | ||
|
||
let composition = AVMutableComposition() | ||
guard let compositionTrack = composition.addMutableTrack( | ||
withMediaType: .audio, | ||
preferredTrackID: kCMPersistentTrackID_Invalid | ||
) else { | ||
throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create composition track"]) | ||
} | ||
|
||
guard let audioTrack = try await asset.loadTracks(withMediaType: .audio).first else { | ||
throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "No audio track found"]) | ||
} | ||
|
||
let timeRange = CMTimeRange(start: .zero, duration: try await asset.load(.duration)) | ||
for i in 0..<4 { | ||
try compositionTrack.insertTimeRange( | ||
timeRange, | ||
of: audioTrack, | ||
at: CMTime(seconds: Double(i) * timeRange.duration.seconds, preferredTimescale: 600) | ||
) | ||
} | ||
|
||
guard let exportSession = AVAssetExportSession( | ||
asset: composition, | ||
presetName: AVAssetExportPresetPassthrough | ||
) else { | ||
throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create export session"]) | ||
} | ||
|
||
exportSession.outputURL = outputURL | ||
exportSession.outputFileType = AVFileType.mp4 | ||
exportSession.shouldOptimizeForNetworkUse = true | ||
|
||
// Create a tuple of values we need to check after export | ||
try await withCheckedThrowingContinuation { continuation in | ||
let mainQueue = DispatchQueue.main | ||
exportSession.exportAsynchronously { | ||
mainQueue.async { | ||
switch exportSession.status { | ||
case .completed: | ||
continuation.resume() | ||
case .failed: | ||
continuation.resume(throwing: exportSession.error ?? NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export failed"])) | ||
case .cancelled: | ||
continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export cancelled"])) | ||
default: | ||
continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unknown export error"])) | ||
} | ||
} | ||
} | ||
} | ||
|
||
let fileSize = try FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? Int ?? 0 | ||
if fileSize < 5000 { // 5KB minimum | ||
throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Converted file too small"]) | ||
} | ||
|
||
return outputURL | ||
} | ||
} |
55 changes: 55 additions & 0 deletions
55
Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
// | ||
// Rime+SpeechSynthesisRequestHandling.swift | ||
// Voice | ||
// | ||
// Created by Jared Davidson on 11/21/24. | ||
// | ||
|
||
import Foundation | ||
import AI | ||
import ElevenLabs | ||
import SwiftUI | ||
import AVFoundation | ||
|
||
extension Rime.Client: SpeechSynthesisRequestHandling { | ||
public func availableVoices() async throws -> [AbstractVoice] { | ||
return try await getAllAvailableVoiceDetails().map { try $0.__conversion() } | ||
} | ||
|
||
public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { | ||
return try await streamTextToSpeech( | ||
text: text, | ||
voice: voiceID, | ||
outputAudio: .MP3, | ||
model: .mist | ||
) | ||
} | ||
|
||
public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { | ||
throw Rime.APIError.unknown(message: "Speech to speech not supported") | ||
} | ||
|
||
public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { | ||
throw Rime.APIError.unknown(message: "Voice creation is not supported") | ||
} | ||
|
||
public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { | ||
throw Rime.APIError.unknown(message: "Voice creation is not supported") | ||
} | ||
|
||
public func delete(voice: AbstractVoice.ID) async throws { | ||
throw Rime.APIError.unknown(message: "Voice creation is not supported") | ||
} | ||
|
||
public func availableVoices() async throws -> [ElevenLabs.Voice] { | ||
return try await getAllAvailableVoiceDetails().map { voice in | ||
ElevenLabs.Voice( | ||
voiceID: voice.name, | ||
name: voice.name, | ||
description: voice.demographic, | ||
isOwner: false | ||
) | ||
} | ||
} | ||
|
||
} |