Skip to content

Commit 2e71604

Browse files
Update lengths.ipynb
1 parent f3fe137 commit 2e71604

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

lengths.ipynb

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,16 @@
1919
],
2020
"source": [
2121
"from datasets import load_dataset\n",
22-
"from transformers import LlamaTokenizer\n",
22+
"from transformers import BloomTokenizerFast\n",
2323
"\n",
2424
"\n",
25-
"tokenizer = LlamaTokenizer.from_pretrained(\n",
26-
" \"decapoda-research/llama-7b-hf\", add_eos_token=True\n",
25+
"tokenizer = BloomTokenizerFast.from_pretrained(\n",
26+
" \"bigscience/bloom\", add_eos_token=True\n",
2727
")\n",
2828
"tokenizer.pad_token = tokenizer.eos_token\n",
2929
"tokenizer.pad_token_id = tokenizer.eos_token_id\n",
3030
"\n",
31-
"data = load_dataset(\"json\", data_files=\"alpaca_data.json\")\n",
31+
"data = load_dataset(\"json\", data_files=\"chatdoctor-200k.json\")\n",
3232
"\n",
3333
"\n",
3434
"def generate_prompt(data_point):\n",

0 commit comments

Comments
 (0)