Skip to content

Commit eebf2a2

Browse files
added VoyageAI for text embeddings
1 parent 7af8053 commit eebf2a2

16 files changed

+518
-14
lines changed

Package.swift

+25-1
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,18 @@ let package = Package(
180180
],
181181
path: "Sources/Jina"
182182
),
183+
.target(
184+
name: "VoyageAI",
185+
dependencies: [
186+
"CorePersistence",
187+
"CoreMI",
188+
"LargeLanguageModels",
189+
"Merge",
190+
"NetworkKit",
191+
"Swallow"
192+
],
193+
path: "Sources/VoyageAI"
194+
),
183195
.target(
184196
name: "AI",
185197
dependencies: [
@@ -194,7 +206,8 @@ let package = Package(
194206
"OpenAI",
195207
"Perplexity",
196208
"Swallow",
197-
"Jina"
209+
"Jina",
210+
"VoyageAI"
198211
],
199212
path: "Sources/AI",
200213
swiftSettings: [
@@ -288,6 +301,17 @@ let package = Package(
288301
swiftSettings: [
289302
.enableExperimentalFeature("AccessLevelOnImport")
290303
]
304+
),
305+
.testTarget(
306+
name: "VoyageAITests",
307+
dependencies: [
308+
"AI",
309+
"Swallow"
310+
],
311+
path: "Tests/VoyageAI",
312+
swiftSettings: [
313+
.enableExperimentalFeature("AccessLevelOnImport")
314+
]
291315
)
292316
]
293317
)

Sources/CoreMI/Intramodular/Model Identifier/ModelIdentifier.Provider.swift

+11
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ extension ModelIdentifier {
1919
case _Gemini
2020
case _Perplexity
2121
case _Jina
22+
case _VoyageAI
2223

2324
case unknown(String)
2425

@@ -49,6 +50,10 @@ extension ModelIdentifier {
4950
public static var jina: Self {
5051
Self._Jina
5152
}
53+
54+
public static var voyageAI: Self {
55+
Self._VoyageAI
56+
}
5257
}
5358
}
5459

@@ -75,6 +80,8 @@ extension ModelIdentifier.Provider: CustomStringConvertible {
7580
return "Perplexity"
7681
case ._Jina:
7782
return "Perplexity"
83+
case ._VoyageAI:
84+
return "VoyageAI"
7885
case .unknown(let provider):
7986
return provider
8087
}
@@ -102,6 +109,8 @@ extension ModelIdentifier.Provider: RawRepresentable {
102109
return "perplexity"
103110
case ._Jina:
104111
return "jina"
112+
case ._VoyageAI:
113+
return "voyageai"
105114
case .unknown(let provider):
106115
return provider
107116
}
@@ -125,6 +134,8 @@ extension ModelIdentifier.Provider: RawRepresentable {
125134
self = ._Perplexity
126135
case Self._Jina.rawValue:
127136
self = ._Jina
137+
case Self._VoyageAI.rawValue:
138+
self = ._VoyageAI
128139
default:
129140
self = .unknown(rawValue)
130141
}

Sources/CoreMI/Intramodular/Service/_MIServiceTypeIdentifier.swift

+1
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,5 @@ extension _MIServiceTypeIdentifier {
3434
public static let _Replicate = _MIServiceTypeIdentifier(rawValue: "dovon-vatig-posov-luvis")
3535
public static let _ElevenLabs = _MIServiceTypeIdentifier(rawValue: "jatap-jogaz-ritiz-vibok")
3636
public static let _Jina = _MIServiceTypeIdentifier(rawValue: "bozud-sipup-natin-bizif")
37+
public static let _VoyageAI = _MIServiceTypeIdentifier(rawValue: "hajat-fufoh-janaf-disam")
3738
}

Sources/Jina/Intramodular/Jina.APISpecification.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ extension Jina {
2525
public typealias Error = APIError
2626

2727
public struct Configuration: Codable, Hashable {
28-
public var apiKey: String?
28+
public var apiKey: String
2929
}
3030

3131
public let configuration: Configuration
@@ -57,7 +57,7 @@ extension Jina.APISpecification {
5757
.jsonBody(input, keyEncodingStrategy: .convertToSnakeCase)
5858
.header(.contentType(.json))
5959
.header(.accept(.json))
60-
.header(.authorization(.bearer, configuration.apiKey.unwrap()))
60+
.header(.authorization(.bearer, configuration.apiKey))
6161
}
6262

6363
struct _ErrorWrapper: Codable, Hashable, Sendable {

Sources/Jina/Intramodular/Jina.Client.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ extension Jina {
2323
self.session = session
2424
}
2525

26-
public convenience init(apiKey: String?) {
26+
public convenience init(apiKey: String) {
2727
self.init(
2828
interface: .init(configuration: .init(apiKey: apiKey)),
2929
session: .shared

Sources/Jina/Intramodular/Jina.Embeddings.swift

-5
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ extension Jina.Embeddings {
2727

2828
extension Jina.Embeddings {
2929
public struct Usage: Codable, Hashable, Sendable {
30-
public enum Role: String, Codable, Hashable, Sendable {
31-
case promptTokens = "prompt_tokens"
32-
case totalTokens = "total_tokens"
33-
}
34-
3530
public let promptTokens: Int
3631
public let totalTokens: Int
3732
}

Sources/Mistral/Intramodular/Models/Mistral.Embedding.swift

-5
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,6 @@ extension Mistral.Embeddings {
2424

2525
extension Mistral.Embeddings {
2626
public struct Usage: Codable, Hashable, Sendable {
27-
public enum Role: String, Codable, Hashable, Sendable {
28-
case promptTokens = "prompt_tokens"
29-
case totalTokens = "total_tokens"
30-
}
31-
3227
public let promptTokens: Int
3328
public let totalTokens: Int
3429
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
//
2+
// Copyright (c) Vatsal Manot
3+
//
4+
5+
import NetworkKit
6+
import FoundationX
7+
import Swallow
8+
9+
extension VoyageAI {
10+
public enum APIError: APIErrorProtocol {
11+
public typealias API = VoyageAI.APISpecification
12+
13+
case apiKeyMissing
14+
case incorrectAPIKeyProvided
15+
case rateLimitExceeded
16+
case badRequest(API.Request.Error)
17+
case runtime(AnyError)
18+
19+
public var traits: ErrorTraits {
20+
[.domain(.networking)]
21+
}
22+
}
23+
24+
public struct APISpecification: RESTAPISpecification {
25+
public typealias Error = APIError
26+
27+
public struct Configuration: Codable, Hashable {
28+
public var apiKey: String
29+
}
30+
31+
public let configuration: Configuration
32+
33+
public var host: URL {
34+
URL(string: "https://api.voyageai.com/v1/")!
35+
}
36+
37+
public var id: some Hashable {
38+
configuration
39+
}
40+
41+
@POST
42+
@Path("embeddings")
43+
public var createEmbeddings = Endpoint<RequestBodies.CreateEmbedding, VoyageAI.Embeddings, Void>()
44+
}
45+
}
46+
47+
extension VoyageAI.APISpecification {
48+
public final class Endpoint<Input, Output, Options>: BaseHTTPEndpoint<VoyageAI.APISpecification, Input, Output, Options> {
49+
override public func buildRequestBase(
50+
from input: Input,
51+
context: BuildRequestContext
52+
) throws -> Request {
53+
let configuration = context.root.configuration
54+
55+
return try super
56+
.buildRequestBase(from: input, context: context)
57+
.jsonBody(input, keyEncodingStrategy: .convertToSnakeCase)
58+
.header(.contentType(.json))
59+
.header(.accept(.json))
60+
.header(.authorization(.bearer, configuration.apiKey))
61+
}
62+
63+
struct _ErrorWrapper: Codable, Hashable, Sendable {
64+
struct Error: Codable, Hashable, Sendable {
65+
let detail: String
66+
}
67+
68+
let error: Error
69+
}
70+
71+
override public func decodeOutputBase(
72+
from response: Request.Response,
73+
context: DecodeOutputContext
74+
) throws -> Output {
75+
do {
76+
try response.validate()
77+
} catch {
78+
let apiError: Error
79+
80+
if let error = error as? Request.Error {
81+
if let error = try? response.decode(
82+
_ErrorWrapper.self,
83+
keyDecodingStrategy: .convertFromSnakeCase
84+
).error {
85+
if error.detail.contains("You didn't provide an API key") {
86+
throw Error.apiKeyMissing
87+
} else if error.detail.contains("Incorrect API key provided") {
88+
throw Error.incorrectAPIKeyProvided
89+
}
90+
91+
}
92+
93+
if response.statusCode.rawValue == 429 {
94+
apiError = .rateLimitExceeded
95+
} else {
96+
apiError = .badRequest(error)
97+
}
98+
} else {
99+
apiError = .runtime(error)
100+
}
101+
102+
throw apiError
103+
}
104+
105+
return try response.decode(
106+
Output.self,
107+
keyDecodingStrategy: .convertFromSnakeCase
108+
)
109+
}
110+
}
111+
}
112+
113+
extension VoyageAI.APISpecification {
114+
public enum RequestBodies: _StaticSwift.Namespace {
115+
116+
}
117+
118+
public enum ResponseBodies: _StaticSwift.Namespace {
119+
120+
}
121+
}
122+
123+
extension VoyageAI.APISpecification.RequestBodies {
124+
public struct CreateEmbedding: Codable, Hashable {
125+
126+
/// Name of the model. Recommended options: voyage-2, voyage-large-2, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2.
127+
public let model: VoyageAI.Model
128+
129+
/// A single text string, or a list of texts as a list of strings. Currently, we have two constraints on the list:
130+
/// The maximum length of the list is 128.
131+
/// The total number of tokens in the list is at most 320K for voyage-2, and 120K for voyage-large-2, voyage-finance-2, voyage-multilingual-2, voyage-law-2, and voyage-code-2.
132+
public let input: [String]
133+
134+
/// Type of the input text. Defaults to nil. Other options: query, document.
135+
public let inputType: String?
136+
137+
/// Whether to truncate the input texts to fit within the context length. Defaults to true.
138+
/// If true, over-length input texts will be truncated to fit within the context length, before vectorized by the embedding model.
139+
/// If false, an error will be raised if any given text exceeds the context length.
140+
public let truncation: Bool
141+
142+
/// Format in which the embeddings are encoded. We support two options:
143+
/// If not specified (defaults to null): the embeddings are represented as lists of floating-point numbers;
144+
/// base64: the embeddings are compressed to base64 encodings.
145+
public let encodingFormat: EncodingFormat?
146+
public enum EncodingFormat: String, Codable, Hashable, Sendable {
147+
case float
148+
case base64
149+
}
150+
151+
init(
152+
model: VoyageAI.Model,
153+
input: [String],
154+
inputType: String? = nil,
155+
truncation: Bool = true,
156+
encodingFormat: EncodingFormat? = nil
157+
) {
158+
self.model = model
159+
self.input = input
160+
self.inputType = inputType
161+
self.truncation = truncation
162+
self.encodingFormat = encodingFormat == .base64 ? .base64 : nil
163+
}
164+
}
165+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//
2+
// Copyright (c) Vatsal Manot
3+
//
4+
5+
import CoreMI
6+
import CorePersistence
7+
8+
extension VoyageAI.Client: TextEmbeddingsRequestHandling {
9+
10+
public var _availableModels: [ModelIdentifier]? {
11+
VoyageAI.Model.allCases.map({ $0.__conversion() })
12+
}
13+
14+
public func fulfill(
15+
_ request: TextEmbeddingsRequest
16+
) async throws -> TextEmbeddings {
17+
let model = request.model ?? VoyageAI.Model.voyage2.__conversion()
18+
let embeddingModel = try VoyageAI.Model(rawValue: model.name).unwrap()
19+
20+
let embeddings: VoyageAI.Embeddings = try await createEmbeddings(
21+
for: embeddingModel,
22+
input: request.input
23+
)
24+
25+
let textEmbeddings = embeddings.data.map {
26+
TextEmbeddings.Element(
27+
text: $0.object,
28+
embedding: $0.embedding.map { Double($0) },
29+
model: model)
30+
}
31+
32+
return TextEmbeddings(
33+
model: model,
34+
data: textEmbeddings
35+
)
36+
}
37+
}

0 commit comments

Comments
 (0)