diff --git a/Package.swift b/Package.swift index c31ee267..e1880e1e 100644 --- a/Package.swift +++ b/Package.swift @@ -366,7 +366,7 @@ let package = Package( .target( name: "HuggingFace", dependencies: [ - + "Swallow" ], path: "Sources/HuggingFace", swiftSettings: [ diff --git a/Sources/HuggingFace/HuggingFace.Downloader.swift b/Sources/HuggingFace/HuggingFace.Downloader.swift index c7e51938..5fff8a1e 100644 --- a/Sources/HuggingFace/HuggingFace.Downloader.swift +++ b/Sources/HuggingFace/HuggingFace.Downloader.swift @@ -1,13 +1,10 @@ // -// Downloader.swift -// -// Adapted from https://github.com/huggingface/swift-coreml-diffusers/blob/d041577b9f5e201baa3465bc60bc5d0a1cf7ed7f/Diffusion/Common/Downloader.swift -// Created by Pedro Cuenca on December 2022. -// See LICENSE at https://github.com/huggingface/swift-coreml-diffusers/LICENSE +// Copyright (c) Preternatural AI, Inc. // -import Foundation import Combine +import FoundationX +import Swallow extension HuggingFace { class Downloader: NSObject, ObservableObject { diff --git a/Sources/HuggingFace/HuggingFace.Hub.Client.swift b/Sources/HuggingFace/HuggingFace.Hub.Client.swift index 4e4b6b46..c33e0632 100644 --- a/Sources/HuggingFace/HuggingFace.Hub.Client.swift +++ b/Sources/HuggingFace/HuggingFace.Hub.Client.swift @@ -1,22 +1,22 @@ // -// HubApi.swift -// -// -// Created by Pedro Cuenca on 20231230. +// Copyright (c) Preternatural AI, Inc. // -import Foundation +import FoundationX +import Swallow extension HuggingFace.Hub { public struct Client { - var downloadBase: URL - var hfToken: String? - var endpoint: String - var useBackgroundSession: Bool - public typealias RepoType = HuggingFace.Hub.RepoType public typealias Repo = HuggingFace.Hub.Repo - + + public static let shared = Client() + + public var downloadBase: URL + public var hfToken: String? + public var endpoint: String + public var useBackgroundSession: Bool + public init( downloadBase: URL? = nil, hfToken: String? = nil, @@ -24,31 +24,32 @@ extension HuggingFace.Hub { useBackgroundSession: Bool = false ) { self.hfToken = hfToken + if let downloadBase { self.downloadBase = downloadBase } else { let documents = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first! + self.downloadBase = documents.appending(component: "huggingface") } + self.endpoint = endpoint self.useBackgroundSession = useBackgroundSession } - - public static let shared = Client() } } /// File retrieval -public extension HuggingFace.Hub.Client { +extension HuggingFace.Hub.Client { /// Model data for parsed filenames - struct Sibling: Codable { - let rfilename: String + public struct Sibling: Codable { + public let rfilename: String } - struct SiblingsResponse: Codable { - let siblings: [Sibling] + public struct SiblingsResponse: Codable { + public let siblings: [Sibling] } - + /// Throws error if the response code is not 20X func httpGet( for url: URL @@ -64,15 +65,15 @@ public extension HuggingFace.Hub.Client { guard let response = response as? HTTPURLResponse else { throw HuggingFace.Hub.HubClientError.unexpectedError } switch response.statusCode { - case 200..<300: break - case 400..<500: throw HuggingFace.Hub.HubClientError.authorizationRequired - default: throw HuggingFace.Hub.HubClientError.httpStatusCode(response.statusCode) + case 200..<300: break + case 400..<500: throw HuggingFace.Hub.HubClientError.authorizationRequired + default: throw HuggingFace.Hub.HubClientError.httpStatusCode(response.statusCode) } - + return (data, response) } - func getFilenames( + public func getFilenames( from repo: Repo, matching globs: [String] = [] ) async throws -> [String] { @@ -90,21 +91,21 @@ public extension HuggingFace.Hub.Client { return Array(selected) } - func getFilenames( + public func getFilenames( from repoId: String, matching globs: [String] = [] ) async throws -> [String] { return try await getFilenames(from: Repo(id: repoId), matching: globs) } - func getFilenames( + public func getFilenames( from repo: Repo, matching glob: String ) async throws -> [String] { return try await getFilenames(from: repo, matching: [glob]) } - func getFilenames( + public func getFilenames( from repoId: String, matching glob: String ) async throws -> [String] { @@ -113,17 +114,22 @@ public extension HuggingFace.Hub.Client { } /// Configuration loading helpers -public extension HuggingFace.Hub.Client { +extension HuggingFace.Hub.Client { /// Assumes the file has already been downloaded. /// `filename` is relative to the download base. - func configuration(from filename: String, in repo: Repo) throws -> HuggingFace.Config { + public func configuration( + from filename: String, + in repo: Repo + ) throws -> HuggingFace.Config { let fileURL = localRepoLocation(repo).appending(path: filename) return try configuration(fileURL: fileURL) } /// Assumes the file is already present at local url. /// `fileURL` is a complete local file path for the given model - func configuration(fileURL: URL) throws -> HuggingFace.Config { + public func configuration( + fileURL: URL + ) throws -> HuggingFace.Config { let data = try Data(contentsOf: fileURL) let parsed = try JSONSerialization.jsonObject(with: data, options: []) guard let dictionary = parsed as? [String: Any] else { throw HuggingFace.Hub.HubClientError.parse } @@ -132,13 +138,13 @@ public extension HuggingFace.Hub.Client { } /// Whoami -public extension HuggingFace.Hub.Client { - func whoami() async throws -> HuggingFace.Config { +extension HuggingFace.Hub.Client { + public func whoami() async throws -> HuggingFace.Config { guard hfToken != nil else { throw HuggingFace.Hub.HubClientError.authorizationRequired } let url = URL(string: "\(endpoint)/api/whoami-v2")! let (data, _) = try await httpGet(for: url) - + let parsed = try JSONSerialization.jsonObject(with: data, options: []) guard let dictionary = parsed as? [String: Any] else { throw HuggingFace.Hub.HubClientError.parse } return HuggingFace.Config(dictionary) @@ -146,20 +152,20 @@ public extension HuggingFace.Hub.Client { } /// Snaphsot download -public extension HuggingFace.Hub.Client { - func localRepoLocation(_ repo: Repo) -> URL { +extension HuggingFace.Hub.Client { + public func localRepoLocation(_ repo: Repo) -> URL { downloadBase.appending(component: repo.type.rawValue).appending(component: repo.id) } - struct HubFileDownloader { - let repo: Repo - let repoDestination: URL - let relativeFilename: String - let hfToken: String? - let endpoint: String? - let backgroundSession: Bool - - var source: URL { + public struct HubFileDownloader { + public let repo: Repo + public let repoDestination: URL + public let relativeFilename: String + public let hfToken: String? + public let endpoint: String? + public let backgroundSession: Bool + + public var source: URL { // https://huggingface.co/coreml-projects/Llama-2-7b-chat-coreml/resolve/main/tokenizer.json?download=true var url = URL(string: endpoint ?? "https://huggingface.co")! if repo.type != .models { @@ -171,26 +177,26 @@ public extension HuggingFace.Hub.Client { return url } - var destination: URL { + public var destination: URL { repoDestination.appending(path: relativeFilename) } - var downloaded: Bool { - FileManager.default.fileExists(atPath: destination.path) + public var downloaded: Bool { + FileManager.default.fileExists(at: destination) } - func prepareDestination() throws { + public func prepareDestination() throws { let directoryURL = destination.deletingLastPathComponent() try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil) } - + // Note we go from Combine in Downloader to callback-based progress reporting // We'll probably need to support Combine as well to play well with Swift UI // (See for example PipelineLoader in swift-coreml-diffusers) @discardableResult func download(outputHandler: @escaping (Double) -> Void) async throws -> URL { guard !downloaded else { return destination } - + try prepareDestination() let downloader = HuggingFace.Downloader(from: source, to: destination, using: hfToken, inBackground: backgroundSession) let downloadSubscriber = downloader.downloadState.sink { state in @@ -204,9 +210,61 @@ public extension HuggingFace.Hub.Client { return destination } } - + + /* + @discardableResult + func download( + from repo: Repo, + matching globs: [String] = [], + session: URLSession, + outputHandler: @escaping (Progress) -> Void = { _ in } + ) async throws { + let filenames = try await getFilenames(from: repo, matching: globs) + let progress = Progress(totalUnitCount: Int64(filenames.count)) + let repoDestination = localRepoLocation(repo) + + for filename in filenames { + let fileProgress = Progress(totalUnitCount: 100, parent: progress, pendingUnitCount: 1) + let downloader = HubFileDownloader( + repo: repo, + repoDestination: repoDestination, + relativeFilename: filename, + hfToken: hfToken, + endpoint: endpoint, + backgroundSession: useBackgroundSession + ) + try await downloader.download { fractionDownloaded in + fileProgress.completedUnitCount = Int64(100 * fractionDownloaded) + outputHandler(progress) + } + fileProgress.completedUnitCount = 100 + } + } + */ + public func formRequest(repo: HuggingFace.Hub.Repo, relativeFilename: String, authToken: String?) -> URLRequest { + var url: URL { + // https://huggingface.co/coreml-projects/Llama-2-7b-chat-coreml/resolve/main/tokenizer.json?download=true + var url = URL(string: endpoint)! + if repo.type != .models { + url = url.appending(component: repo.type.rawValue) + } + url = url.appending(path: repo.id) + url = url.appending(path: "resolve/main") // TODO: revisions + url = url.appending(path: relativeFilename) + return url + } + + var request = URLRequest(url: url) + if let authToken = authToken { + request.setValue("Bearer \(authToken)", forHTTPHeaderField: "Authorization") + } + + return request + } + + // main snapshot function @discardableResult - func snapshot( + public func snapshot( from repo: Repo, matching globs: [String] = [], outputHandler: @escaping ( @@ -239,7 +297,7 @@ public extension HuggingFace.Hub.Client { } @discardableResult - func snapshot( + public func snapshot( from repoId: String, matching globs: [String] = [], outputHandler: @escaping (Progress) -> Void = { _ in } @@ -252,7 +310,7 @@ public extension HuggingFace.Hub.Client { } @discardableResult - func snapshot( + public func snapshot( from repo: Repo, matching glob: String, outputHandler: @escaping (Progress) -> Void = { _ in } @@ -261,7 +319,7 @@ public extension HuggingFace.Hub.Client { } @discardableResult - func snapshot( + public func snapshot( from repoId: String, matching glob: String, outputHandler: @escaping (Progress) -> Void = {_ in } @@ -271,29 +329,29 @@ public extension HuggingFace.Hub.Client { } /// Stateless wrappers that use `HubApi` instances -public extension HuggingFace.Hub { - static func getFilenames( +extension HuggingFace.Hub { + public static func getFilenames( from repo: HuggingFace.Hub.Repo, matching globs: [String] = [] ) async throws -> [String] { return try await HuggingFace.Hub.Client.shared.getFilenames(from: repo, matching: globs) } - static func getFilenames( + public static func getFilenames( from repoId: String, matching globs: [String] = [] ) async throws -> [String] { return try await HuggingFace.Hub.Client.shared.getFilenames(from: Repo(id: repoId), matching: globs) } - static func getFilenames( + public static func getFilenames( from repo: Repo, matching glob: String ) async throws -> [String] { return try await HuggingFace.Hub.Client.shared.getFilenames(from: repo, matching: glob) } - static func getFilenames( + public static func getFilenames( from repoId: String, matching glob: String ) async throws -> [String] { @@ -303,7 +361,7 @@ public extension HuggingFace.Hub { ) } - static func snapshot( + public static func snapshot( from repo: Repo, matching globs: [String] = [], outputHandler: @escaping (Progress) -> Void = { _ in } @@ -311,7 +369,7 @@ public extension HuggingFace.Hub { return try await HuggingFace.Hub.Client.shared.snapshot(from: repo, matching: globs, outputHandler: outputHandler) } - static func snapshot( + public static func snapshot( from repoId: String, matching globs: [String] = [], outputHandler: @escaping (Progress) -> Void = { _ in } @@ -319,7 +377,7 @@ public extension HuggingFace.Hub { return try await HuggingFace.Hub.Client.shared.snapshot(from: Repo(id: repoId), matching: globs, outputHandler: outputHandler) } - static func snapshot( + public static func snapshot( from repo: Repo, matching glob: String, outputHandler: @escaping (Progress) -> Void = { _ in } @@ -327,7 +385,7 @@ public extension HuggingFace.Hub { return try await HuggingFace.Hub.Client.shared.snapshot(from: repo, matching: glob, outputHandler: outputHandler) } - static func snapshot( + public static func snapshot( from repoId: String, matching glob: String, outputHandler: @escaping (Progress) -> Void = { _ in } @@ -335,13 +393,13 @@ public extension HuggingFace.Hub { return try await HuggingFace.Hub.Client.shared.snapshot(from: Repo(id: repoId), matching: glob, outputHandler: outputHandler) } - static func whoami(token: String) async throws -> HuggingFace.Config { + public static func whoami(token: String) async throws -> HuggingFace.Config { return try await HuggingFace.Hub.Client(hfToken: token).whoami() } } -public extension [String] { - func matching(glob: String) -> [String] { +extension [String] { + fileprivate func matching(glob: String) -> [String] { filter { fnmatch(glob, $0, 0) == 0 } } } diff --git a/Sources/HuggingFace/HuggingFace.Hub.swift b/Sources/HuggingFace/HuggingFace.Hub.swift index e443f402..6e2b95dc 100644 --- a/Sources/HuggingFace/HuggingFace.Hub.swift +++ b/Sources/HuggingFace/HuggingFace.Hub.swift @@ -1,14 +1,14 @@ // -// Hub.swift -// -// -// Created by Pedro Cuenca on 18/5/23. +// Copyright (c) Preternatural AI, Inc. // -import Foundation +import FoundationX +import Swallow extension HuggingFace { - public struct Hub {} + public struct Hub { + + } } public extension HuggingFace.Hub { @@ -19,15 +19,15 @@ public extension HuggingFace.Hub { case httpStatusCode(Int) } - enum RepoType: String { + enum RepoType: String, Codable { case models case datasets case spaces } - struct Repo { - let id: String - let type: RepoType + struct Repo: Codable { + public let id: String + public let type: RepoType public init(id: String, type: RepoType = .models) { self.id = id diff --git a/Sources/LargeLanguageModels/Intramodular/Prompt Literal/PromptLiteral._Degenerate.swift b/Sources/LargeLanguageModels/Intramodular/Prompt Literal/PromptLiteral._Degenerate.swift index c33f8fad..a781e681 100644 --- a/Sources/LargeLanguageModels/Intramodular/Prompt Literal/PromptLiteral._Degenerate.swift +++ b/Sources/LargeLanguageModels/Intramodular/Prompt Literal/PromptLiteral._Degenerate.swift @@ -17,7 +17,6 @@ extension PromptLiteral { } } -@_spi(Internal) extension PromptLiteral { public enum _DegenerationError: Error { case dynamicVariableUnresolved(any _opaque_DynamicPromptVariable) diff --git a/Tests/Perplexity/Intramodular/CompletionTests.swift b/Tests/Perplexity/Intramodular/CompletionTests.swift index 816231ed..2f07169b 100644 --- a/Tests/Perplexity/Intramodular/CompletionTests.swift +++ b/Tests/Perplexity/Intramodular/CompletionTests.swift @@ -11,37 +11,37 @@ final class CompletionTests: XCTestCase { let llm: any LLMRequestHandling = client func testChatCompletionsLlama3SonarSmall32kChat() async throws { - let result = try await resultForModel(Perplexity.Model.llama3SonarSmall32kChat) + let result = try await resultForModel(Perplexity.Model.llamaSonarSmall128kOnline) print(result) // "Hello! How can I assist you today?" } func testChatCompletionsLlama3SonarSmall32kOnline() async throws { - let result = try await resultForModel(Perplexity.Model.llama3SonarSmall32kOnline) + let result = try await resultForModel(Perplexity.Model.llamaSonarSmall128kOnline) print(result) // "Hello! How can I assist you today?" } func testChatCompletionsLlama3SonarLarge32kChat() async throws { - let result = try await resultForModel(Perplexity.Model.llama3SonarLarge32kChat) + let result = try await resultForModel(Perplexity.Model.llamaSonarSmall128kOnline) print(result) // "Hello! How can I assist you today?" } func testChatCompletionsLlama3SonarLarge32kOnline() async throws { - let result = try await resultForModel(Perplexity.Model.llama3SonarLarge32kOnline) + let result = try await resultForModel(Perplexity.Model.llamaSonarSmall128kOnline) print(result) // "Hello! How can I assist you today?" } func testChatCompletionsLlama38bInstruct() async throws { - let result = try await resultForModel(Perplexity.Model.llama38bInstruct) + let result = try await resultForModel(Perplexity.Model.llamaSonarSmall128kOnline) print(result) // "Hello! How can I assist you today?" } func testChatCompletionsLlama370bInstruct() async throws { - let result = try await resultForModel(Perplexity.Model.llama370bInstruct) + let result = try await resultForModel(Perplexity.Model.llamaSonarSmall128kOnline) print(result) // "Hello! How can I assist you today?" } func testChatCompletionsMixtral8x7bInstruct() async throws { - let result = try await resultForModel(Perplexity.Model.mixtral8x7bInstruct) + let result = try await resultForModel(Perplexity.Model.llamaSonarSmall128kOnline) print(result) // "Hello! How can I assist you today?" } diff --git a/Tests/Perplexity/module.swift b/Tests/Perplexity/module.swift index b1f20c26..a57038e7 100644 --- a/Tests/Perplexity/module.swift +++ b/Tests/Perplexity/module.swift @@ -5,7 +5,7 @@ import Perplexity public var PERPLEXITY_API_KEY: String { - "API_KEY " + "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" } public var client: Perplexity.Client {