Skip to content

Commit fb88e33

Browse files
refactored elevenlabs voice settings and added tests
1 parent cd9c6f7 commit fb88e33

File tree

6 files changed

+162
-7
lines changed

6 files changed

+162
-7
lines changed

Package.swift

+11
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,17 @@ let package = Package(
231231
swiftSettings: [
232232
.enableExperimentalFeature("AccessLevelOnImport")
233233
]
234+
),
235+
.testTarget(
236+
name: "ElevenLabsTests",
237+
dependencies: [
238+
"AI",
239+
"Swallow"
240+
],
241+
path: "Tests/ElevenLabs",
242+
swiftSettings: [
243+
.enableExperimentalFeature("AccessLevelOnImport")
244+
]
234245
)
235246
]
236247
)

Sources/ElevenLabs/Intramodular/ElevenLabs.APISpecification.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ extension ElevenLabs.APISpecification {
2323
}
2424

2525
let text: String
26-
let voiceSettings: [String: JSON]
26+
let voiceSettings: ElevenLabs.VoiceSettings
2727
let model: ElevenLabs.Model
2828

2929
init(
3030
text: String,
31-
voiceSettings: [String: JSON],
31+
voiceSettings: ElevenLabs.VoiceSettings,
3232
model: ElevenLabs.Model
3333
) {
3434
self.text = text

Sources/ElevenLabs/Intramodular/ElevenLabs.Client.swift

+2-5
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ extension ElevenLabs.Client {
7575
public func speech(
7676
for text: String,
7777
voiceID: String,
78-
voiceSettings: [String: JSON]? = nil,
78+
voiceSettings: ElevenLabs.VoiceSettings,
7979
model: ElevenLabs.Model
8080
) async throws -> Data {
8181
let request = try HTTPRequest(url: URL(string: "\(apiSpecification.host)/v1/text-to-speech/\(voiceID)")!)
@@ -86,10 +86,7 @@ extension ElevenLabs.Client {
8686
.jsonBody(
8787
ElevenLabs.APISpecification.RequestBodies.SpeechRequest(
8888
text: text,
89-
voiceSettings: voiceSettings ?? [
90-
"stability" : 0,
91-
"similarity_boost": 0
92-
],
89+
voiceSettings: voiceSettings,
9390
model: model
9491
),
9592
keyEncodingStrategy: .convertToSnakeCase
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
//
2+
// Copyright (c) Vatsal Manot
3+
//
4+
5+
import Foundation
6+
7+
extension ElevenLabs {
8+
public final class VoiceSettings: Codable, Sendable {
9+
10+
public enum Setting: String, Codable, Sendable {
11+
case stability
12+
case similarityBoost = "similarity_boost"
13+
case styleExaggeration = "style"
14+
case speakerBoost = "use_speaker_boost"
15+
}
16+
17+
/// Increasing stability will make the voice more consistent between re-generations, but it can also make it sounds a bit monotone. On longer text fragments it is recommended to lower this value.
18+
/// This is a double between 0 (more variable) and 1 (more stable).
19+
public let stability: Double
20+
21+
/// Increasing the Similarity Boost setting enhances the overall voice clarity and targets speaker similarity. However, very high values can cause artifacts, so it is recommended to adjust this setting to find the optimal value.
22+
/// This is a double between 0 (Low) and 1 (High).
23+
public let similarityBoost: Double
24+
25+
/// High values are recommended if the style of the speech should be exaggerated compared to the selected voice. Higher values can lead to more instability in the generated speech. Setting this to 0 will greatly increase generation speed and is the default setting.
26+
public let styleExaggeration: Double
27+
28+
/// Boost the similarity of the synthesized speech and the voice at the cost of some generation speed.
29+
public let speakerBoost: Bool
30+
31+
public init(stability: Double,
32+
similarityBoost: Double,
33+
styleExaggeration: Double,
34+
speakerBoost: Bool) {
35+
self.stability = max(0, min(1, stability))
36+
self.similarityBoost = max(0, min(1, similarityBoost))
37+
self.styleExaggeration = max(0, min(1, styleExaggeration))
38+
self.speakerBoost = speakerBoost
39+
}
40+
41+
public init(stability: Double? = nil,
42+
similarityBoost: Double? = nil,
43+
styleExaggeration: Double? = nil,
44+
speakerBoost: Bool? = nil) {
45+
self.stability = stability.map { max(0, min(1, $0)) } ?? 0.5
46+
self.similarityBoost = similarityBoost.map { max(0, min(1, $0)) } ?? 0.75
47+
self.styleExaggeration = styleExaggeration.map { max(0, min(1, $0)) } ?? 0
48+
self.speakerBoost = speakerBoost ?? true
49+
}
50+
51+
public convenience init(stability: Double) {
52+
self.init(
53+
stability: stability,
54+
similarityBoost: 0.75,
55+
styleExaggeration: 0,
56+
speakerBoost: true
57+
)
58+
}
59+
60+
public convenience init(similarityBoost: Double) {
61+
self.init(
62+
stability: 0.5,
63+
similarityBoost: similarityBoost,
64+
styleExaggeration: 0,
65+
speakerBoost: true
66+
)
67+
}
68+
69+
public convenience init(styleExaggeration: Double) {
70+
self.init(
71+
stability: 0.5,
72+
similarityBoost: 0.75,
73+
styleExaggeration: styleExaggeration,
74+
speakerBoost: true
75+
)
76+
}
77+
78+
public convenience init(speakerBoost: Bool) {
79+
self.init(
80+
stability: 0.5,
81+
similarityBoost: 0.75,
82+
styleExaggeration: 0,
83+
speakerBoost: speakerBoost
84+
)
85+
}
86+
87+
public func encode(to encoder: Encoder) throws {
88+
var container = encoder.container(keyedBy: CodingKeys.self)
89+
90+
try container.encode(stability, forKey: .stability)
91+
try container.encode(similarityBoost, forKey: .similarityBoost)
92+
try container.encode(styleExaggeration, forKey: .styleExaggeration)
93+
try container.encode(speakerBoost, forKey: .speakerBoost)
94+
}
95+
}
96+
}
97+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//
2+
// Copyright (c) Vatsal Manot
3+
//
4+
5+
import ElevenLabs
6+
import XCTest
7+
8+
final class SpeechTests: XCTestCase {
9+
10+
func testCreateSpeech() async throws {
11+
12+
let text = "In a quiet, unassuming village nestled deep in a lush, verdant valley, young Elara leads a simple life, dreaming of adventure beyond the horizon. Her village is filled with ancient folklore and tales of mystical relics, but none capture her imagination like the legend of the Enchanted Amulet—a powerful artifact said to grant its bearer the ability to control time."
13+
14+
let voiceID = "4v7HtLWqY9rpQ7Cg2GT4"
15+
16+
let voiceSettings: ElevenLabs.VoiceSettings = .init(
17+
stability: 0.5,
18+
similarityBoost: 0.75,
19+
styleExaggeration: 0,
20+
speakerBoost: true)
21+
22+
let model = ElevenLabs.Model.EnglishV1
23+
24+
let speechData = try await client.speech(
25+
for: text,
26+
voiceID: voiceID,
27+
voiceSettings: voiceSettings,
28+
model: model
29+
)
30+
31+
XCTAssertFalse(speechData.isEmpty, "speechData should not be empty")
32+
33+
_ = speechData
34+
}
35+
}

Tests/ElevenLabs/module.swift

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
//
2+
// Copyright (c) Vatsal Manot
3+
//
4+
5+
import ElevenLabs
6+
7+
public var ELEVENLABS_API_KEY: String {
8+
""
9+
}
10+
11+
public var client: ElevenLabs.Client {
12+
let client = ElevenLabs.Client(apiKey: ELEVENLABS_API_KEY)
13+
14+
return client
15+
}

0 commit comments

Comments
 (0)