We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent f3fe137 commit 2e71604Copy full SHA for 2e71604
lengths.ipynb
@@ -19,16 +19,16 @@
19
],
20
"source": [
21
"from datasets import load_dataset\n",
22
- "from transformers import LlamaTokenizer\n",
+ "from transformers import BloomTokenizerFast\n",
23
"\n",
24
25
- "tokenizer = LlamaTokenizer.from_pretrained(\n",
26
- " \"decapoda-research/llama-7b-hf\", add_eos_token=True\n",
+ "tokenizer = BloomTokenizerFast.from_pretrained(\n",
+ " \"bigscience/bloom\", add_eos_token=True\n",
27
")\n",
28
"tokenizer.pad_token = tokenizer.eos_token\n",
29
"tokenizer.pad_token_id = tokenizer.eos_token_id\n",
30
31
- "data = load_dataset(\"json\", data_files=\"alpaca_data.json\")\n",
+ "data = load_dataset(\"json\", data_files=\"chatdoctor-200k.json\")\n",
32
33
34
"def generate_prompt(data_point):\n",
0 commit comments