@@ -348,31 +348,41 @@ def generate_tokenizer_tests():
348
348
for tokenizer_id in TOKENIZERS_WITH_CHAT_TEMPLATES :
349
349
print (f'Generating chat templates for { tokenizer_id } ' )
350
350
351
- # In local mode, use safer settings
352
- use_fast = not LOCAL_MODE or 'llama' not in tokenizer_id
353
-
354
- tokenizer = AutoTokenizer .from_pretrained (
355
- tokenizer_id ,
356
- use_fast = use_fast ,
357
- trust_remote_code = True ,
358
- )
359
- tokenizer_results = []
360
- for key in TOKENIZERS_WITH_CHAT_TEMPLATES [tokenizer_id ]:
361
- messages = CHAT_MESSAGES_EXAMPLES [key ]
362
-
363
- for add_generation_prompt , tokenize in product ([True , False ], [True , False ]):
364
- tokenizer_results .append (dict (
365
- messages = messages ,
366
- add_generation_prompt = add_generation_prompt ,
367
- tokenize = tokenize ,
368
- target = tokenizer .apply_chat_template (
369
- messages ,
370
- add_generation_prompt = add_generation_prompt ,
371
- tokenize = tokenize ,
372
- ),
373
- ))
351
+ try :
352
+ # In local mode, use safer settings
353
+ use_fast = not LOCAL_MODE or 'llama' not in tokenizer_id
354
+
355
+ tokenizer = AutoTokenizer .from_pretrained (
356
+ tokenizer_id ,
357
+ use_fast = use_fast ,
358
+ trust_remote_code = True ,
359
+ )
360
+ tokenizer_results = []
361
+ for key in TOKENIZERS_WITH_CHAT_TEMPLATES [tokenizer_id ]:
362
+ messages = CHAT_MESSAGES_EXAMPLES [key ]
363
+
364
+ for add_generation_prompt , tokenize in product ([True , False ], [True , False ]):
365
+ try :
366
+ result = tokenizer .apply_chat_template (
367
+ messages ,
368
+ add_generation_prompt = add_generation_prompt ,
369
+ tokenize = tokenize ,
370
+ )
371
+ tokenizer_results .append (dict (
372
+ messages = messages ,
373
+ add_generation_prompt = add_generation_prompt ,
374
+ tokenize = tokenize ,
375
+ target = result ,
376
+ ))
377
+ except ValueError as e :
378
+ print (f" - Skipping template for { tokenizer_id } with { key } : { str (e )} " )
379
+ continue
374
380
375
- template_results [tokenizer_id ] = tokenizer_results
381
+ if tokenizer_results :
382
+ template_results [tokenizer_id ] = tokenizer_results
383
+ except Exception as e :
384
+ print (f" - Error processing tokenizer { tokenizer_id } : { str (e )} " )
385
+ continue
376
386
377
387
return dict (
378
388
tokenization = tokenization_results ,
@@ -455,3 +465,4 @@ def main():
455
465
456
466
if __name__ == "__main__" :
457
467
main ()
468
+
0 commit comments