7
7
#include " ort_genai.h"
8
8
#include " ort_genai_c.h"
9
9
10
+
11
+ const size_t kMaxTokens = 200 ;
12
+
10
13
@interface GenAIGenerator () {
11
14
std::unique_ptr<OgaModel> model;
12
15
std::unique_ptr<OgaTokenizer> tokenizer;
@@ -29,6 +32,8 @@ - (instancetype)init {
29
32
30
33
- (void )generate : (nonnull NSString *)input_user_question {
31
34
std::vector<long long > tokenTimes; // per-token generation times
35
+ tokenTimes.reserve (kMaxTokens );
36
+
32
37
TimePoint startTime, firstTokenTime, tokenStartTime;
33
38
34
39
try {
@@ -60,7 +65,7 @@ - (void)generate:(nonnull NSString*)input_user_question {
60
65
61
66
NSLog (@" Setting generator parameters..." );
62
67
auto params = OgaGeneratorParams::Create (*self->model );
63
- params->SetSearchOption (" max_length" , 200 );
68
+ params->SetSearchOption (" max_length" , kMaxTokens );
64
69
params->SetInputSequences (*sequences);
65
70
66
71
auto generator = OgaGenerator::Create (*self->model , *params);
@@ -86,7 +91,7 @@ - (void)generate:(nonnull NSString*)input_user_question {
86
91
const char * decode_tokens = tokenizer_stream->Decode (seq[seq_len - 1 ]);
87
92
88
93
if (!decode_tokens) {
89
- @ throw [ NSException exceptionWithName: @" TokenDecodeError " reason: @ " Token decoding failed." userInfo: nil ] ;
94
+ throw std::runtime_error ( " Token decoding failed." ) ;
90
95
}
91
96
92
97
// Measure token generation time excluding logging
0 commit comments