Skip to content

Commit 190bbac

Browse files
committed
Fix: Add error handling for tokenizers without chat templates
1 parent 720c0a9 commit 190bbac

File tree

1 file changed

+35
-24
lines changed

1 file changed

+35
-24
lines changed

tests/generate_tests.py

+35-24
Original file line numberDiff line numberDiff line change
@@ -348,31 +348,41 @@ def generate_tokenizer_tests():
348348
for tokenizer_id in TOKENIZERS_WITH_CHAT_TEMPLATES:
349349
print(f'Generating chat templates for {tokenizer_id}')
350350

351-
# In local mode, use safer settings
352-
use_fast = not LOCAL_MODE or 'llama' not in tokenizer_id
353-
354-
tokenizer = AutoTokenizer.from_pretrained(
355-
tokenizer_id,
356-
use_fast=use_fast,
357-
trust_remote_code=True,
358-
)
359-
tokenizer_results = []
360-
for key in TOKENIZERS_WITH_CHAT_TEMPLATES[tokenizer_id]:
361-
messages = CHAT_MESSAGES_EXAMPLES[key]
362-
363-
for add_generation_prompt, tokenize in product([True, False], [True, False]):
364-
tokenizer_results.append(dict(
365-
messages=messages,
366-
add_generation_prompt=add_generation_prompt,
367-
tokenize=tokenize,
368-
target=tokenizer.apply_chat_template(
369-
messages,
370-
add_generation_prompt=add_generation_prompt,
371-
tokenize=tokenize,
372-
),
373-
))
351+
try:
352+
# In local mode, use safer settings
353+
use_fast = not LOCAL_MODE or 'llama' not in tokenizer_id
354+
355+
tokenizer = AutoTokenizer.from_pretrained(
356+
tokenizer_id,
357+
use_fast=use_fast,
358+
trust_remote_code=True,
359+
)
360+
tokenizer_results = []
361+
for key in TOKENIZERS_WITH_CHAT_TEMPLATES[tokenizer_id]:
362+
messages = CHAT_MESSAGES_EXAMPLES[key]
363+
364+
for add_generation_prompt, tokenize in product([True, False], [True, False]):
365+
try:
366+
result = tokenizer.apply_chat_template(
367+
messages,
368+
add_generation_prompt=add_generation_prompt,
369+
tokenize=tokenize,
370+
)
371+
tokenizer_results.append(dict(
372+
messages=messages,
373+
add_generation_prompt=add_generation_prompt,
374+
tokenize=tokenize,
375+
target=result,
376+
))
377+
except ValueError as e:
378+
print(f" - Skipping template for {tokenizer_id} with {key}: {str(e)}")
379+
continue
374380

375-
template_results[tokenizer_id] = tokenizer_results
381+
if tokenizer_results:
382+
template_results[tokenizer_id] = tokenizer_results
383+
except Exception as e:
384+
print(f" - Error processing tokenizer {tokenizer_id}: {str(e)}")
385+
continue
376386

377387
return dict(
378388
tokenization=tokenization_results,
@@ -455,3 +465,4 @@ def main():
455465

456466
if __name__ == "__main__":
457467
main()
468+

0 commit comments

Comments
 (0)