From 84ab99114a6f4a00cf362c50ae4d1350ee62d304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyifeng=2Ewang=E2=80=9D?= <β€œ3038880699@qq.com;q:wqqgit config --global user.name β€œyifeng.wang”git config --global user.email β€œ3038880699@qq.com> Date: Wed, 15 Jan 2025 21:44:50 +0800 Subject: [PATCH] draft --- ..._ORPO_traning_in_Unsloth_Ollama_chat.ipynb | 8129 +++++++++++++++++ 1 file changed, 8129 insertions(+) create mode 100644 docs/cookbooks/model_training/preference_data_gen_+Qwen_ORPO_traning_in_Unsloth_Ollama_chat.ipynb diff --git a/docs/cookbooks/model_training/preference_data_gen_+Qwen_ORPO_traning_in_Unsloth_Ollama_chat.ipynb b/docs/cookbooks/model_training/preference_data_gen_+Qwen_ORPO_traning_in_Unsloth_Ollama_chat.ipynb new file mode 100644 index 0000000000..0cd6397b92 --- /dev/null +++ b/docs/cookbooks/model_training/preference_data_gen_+Qwen_ORPO_traning_in_Unsloth_Ollama_chat.ipynb @@ -0,0 +1,8129 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "8lSNa9FYvmA7" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install unsloth\n", + "# Also get the latest nightly Unsloth!\n", + "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4dfwD72Ct1_T" + }, + "source": [ + "* We support Llama, Mistral, CodeLlama, TinyLlama, Vicuna, Open Hermes etc\n", + "* And Yi, Qwen ([llamafied](https://huggingface.co/models?sort=trending&search=qwen+llama)), Deepseek, all Llama, Mistral derived archs.\n", + "* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.\n", + "* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.\n", + "* [**NEW**] With [PR 26037](https://github.com/huggingface/transformers/pull/26037), we support downloading 4bit models **4x faster**! [Our repo](https://huggingface.co/unsloth) has Llama, Mistral 4bit models." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 412, + "referenced_widgets": [ + "5e7b3dcce78a463eb14e42cd3d2a0b0b", + "22b564917f5f4c8e81a8347f90a3d557", + "6b993dd64aa7489c8ca8c2f5d728178e", + "512462794d674769b6629ad8c8c254b7", + "d8c30b721eae4c33a8fa2746c91f6ebe", + "7927c572e90248c7a97ea2303fb9547e", + "738db5c70e8644d8825396a0c9f8309f", + "3ccf6630013f4c4d950d30eca6d8e2be", + "b956ad4e4ffb47908402a0d9fce147fb", + "16088f5268744b63a2fe9f2d22a0dcbb", + "b95d504adfd742c89a3fb277233fac5b", + "2d2844e25ace497aba29b9fb68173b0f", + "35d8b168ec234ca7a045d47c348ca9a6", + "c2fcb84d8f834b70880e7bfbe1e691c7", + "fc4242c15c81477899b8446c6ad6b1a3", + "dd3f008a31b24d86af3ae7e9760cae8d", + "c4560decbda04d628fd815fa8f3953a0", + "af65219a68e644698d5a687095007316", + "41ed1216684a40cba7c0cf3e3aaf18c1", + "365fac5b8cb14b3c9c85c21529551c7d", + "73b62457b51e413dadc7bfca227abe39", + "5b214ea52c60495bb35f4b89439c70b6", + "76ea4577ebc34368a1f220104a0778af", + "16b2957328bf4093a00cb3bf2e655c24", + "0aa69e1780cf4537ada828298340f744", + "13339cb1901644b9be473299832f6ced", + "ae2e31a363ae4051ac40375bfec4dd42", + "50c5f5d69a164f8fa51721e1c44a7783", + "00936bda447f46e5b1439913aeb730de", + "ce3e19e662e94dec89ee565ed892d571", + "3fa8e45950cd4a28b7314b6545f9bbc8", + "1596274edd804f1e858d2b5fb307300c", + "81654beebf7e425aa4f23504c1b71112", + "8088d38d4d374574b9e52fd7356e2ea6", + "326908aed9c64fc6abb1f767bbc0211a", + "bc86d9704e2f460bb7cfd04b664eed12", + "9e0ea9e88df94b4d9967ff1a7ed1bdd6", + "b47cd2b42af746d4ab10c7d0c640ba7a", + "689e62ad5ee249bfa66e675e778b96f3", + "ff3791fe18f141e59d63056c5816b0f5", + "8fc45c66242648cc94f51cf08ff18f25", + "7bf13dd9534d4e9197bd1874687862ec", + "445862f7c16947d198aae928b2f3f6bb", + "c2ca493b2d8a487c8db0defbeea297cc", + "f602392931b1423e867b9e37dad27300", + "d2f03b20726d489287e5ebce211303c4", + "e6683a89074d456e949a5072a55b1602", + "c46dbb7e089a46c5be6f9947c79cf23f", + "d25dfa432ab84d04b346b8cd58c89720", + "aba1e58a13744fbea62c85ced2969652", + "ad42cefa580c4e84ba5201b704218c34", + "bf846b25033d48bd818ae306e671b61b", + "595074903827402b8a1246c101340eff", + "85dba50a8bc04ccebec42f560bbc1b79", + "a8e9032fb3e34ca08f0a742c20c15848", + "7b1d17800e4540ffb099bdef9eb8ae6c", + "74069380475f434bb34afec56314fa4e", + "075bc926c06742d399070029d705173f", + "81300c1080894e34a3a5d88eeac33801", + "28bf71c117fe456ba48b1107a22c1d16", + "f3d89e8a8407433db10d775dec82099f", + "c57b3e2abc4d4147a7410fdeb16f318e", + "4543871a84b646f286bd601896d5d8cc", + "07bac781635f4da69b591e7579359c64", + "894dcbc8129c4f979581aa8c05efb747", + "0b5a520ef7ef4df7ac67d2fce9036a92", + "fe4e7cd444b4449292b704253c574c2d", + "49a0e5d1681046c88e9bc1159d47d875", + "54faaf670f5e43aa9f0cada481be066c", + "e7f156806cf84f80bc6562cc7fe24e08", + "ba57df115cbd4571913b8399d3bdf2e0", + "fb7d899a343846f789adc4d9a3f54659", + "87d15d738ba4471cb86fa002620042a8", + "d7535f7fad1d416a8936735cd0f23099", + "061df7aa5cd44f8481ec3af743ffc442", + "e7dc3433d02642728f805d18ab987102", + "5d53d4ee8b5747f4a29d3efcabc2a324", + "811c37a344f84daabaa1e3e7eb985769", + "1a1777e3d39a4d98aa9bd0685925c52d", + "216e1bd82a024ad895c7abd89025b771", + "3f85a717e1d242eaa21deda5944c658f", + "ad45988fb01248ac96ff1b127149fafb", + "83efa62da7c04b79829e69f81f229c56", + "7843d80c596749abae891f9855830943", + "61129d3bdae2454e997bf3263aa616b9", + "b0732ed61af849678e127cdaaa67ce7d", + "b5f3983270e54dd3911a35fe3128d9b7", + "9d7d05e6711149aa8adf3d03f3468e2f" + ] + }, + "id": "2eSvM9zX_2d3", + "outputId": "296225a1-1e10-43f2-c40d-a82d417c39ff" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", + "πŸ¦₯ Unsloth Zoo will now patch everything to make training faster!\n", + "==((====))== Unsloth 2025.1.5: Fast Qwen2 patching. Transformers: 4.47.1.\n", + " \\\\ /| GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.\n", + "O^O/ \\_/ \\ Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0\n", + "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]\n", + " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", + "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5e7b3dcce78a463eb14e42cd3d2a0b0b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "model.safetensors: 0%| | 0.00/1.14G [00:00 0 ! Suggested 8, 16, 32, 64, 128\n", + " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", + " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", + " lora_alpha = 16,\n", + " lora_dropout = 0, # Supports any, but = 0 is optimized\n", + " bias = \"none\", # Supports any, but = \"none\" is optimized\n", + " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n", + " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n", + " random_state = 3407,\n", + " use_rslora = False, # We support rank stabilized LoRA\n", + " loftq_config = None, # And LoftQ\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FNfc6OLOuuyZ" + }, + "source": [ + "\n", + "### Data Prep\n", + "We now use a special ORPO style dataset from [recipe-research](https://huggingface.co/datasets/reciperesearch/dolphin-sft-v0.1-preference).\n", + "\n", + "You need at least 3 columns:\n", + "* Instruction\n", + "* Accepted\n", + "* Rejected\n", + "\n", + "For example:\n", + "* Instruction: \"What is 2+2?\"\n", + "* Accepted: \"The answer is 4\"\n", + "* Rejected: \"The answer is 5\"\n", + "\n", + "The goal of ORPO is to penalize the \"rejected\" samples, and increase the likelihood of \"accepted\" samples. [recipe-research](https://huggingface.co/datasets/reciperesearch/dolphin-sft-v0.1-preference) essentially used Mistral to generate the \"rejected\" responses, and used GPT-4 to generated the \"accepted\" responses." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 145, + "referenced_widgets": [ + "ec2052c4d20f4273b2fc9d20b9f6cc04", + "d47f521581c8423682ed590854f55702", + "2cf07655e93b49d9823d460b68bfa553", + "a47c4b141e244e44b068a2d79f9ac3f8", + "078aba59954f45d5bae22aacaa9a5a8b", + "d196c1273a72403e926266ebde2ff551", + "cb7eb0355b084370a2ba96125d6aa939", + "4e7b87ce3eec4994996c1d467bf6b2fb", + "1215442ba51b430d8de2358f44f7a9fa", + "d98b7e8cc525466598fe34609e41fa54", + "463bddb985dc4b62a9ee0be8919cc5cf", + "140f686ee529400b81876ccd4a37558c", + "aafb1c4d69944584aaf0bc30e47df550", + "e420266f780541e481cfbe4a80b2675c", + "0c506dbd691d428082ed8151cfebd5e6", + "522c004154f0468e9057fdc2609a11d7", + "3e308101e62f43c799ca27b6eaa744d4", + "0576ab6d4cce4cb09f0755a230d50980", + "40a09d1d63ad49eca2a955e3e7d0cc72", + "25fe66aa53bc49d281a17ead8041ff1d", + "4698d859bc2648f8bd41de734d3e6b6f", + "30ed19bc86744c8b92b685ef97eff53d", + "05096318093d4ded8f6aacf0d8baa5a1", + "8e10bfda957d412592a2b5a7a33c81aa", + "2157c5a847d24a9c88fe0a657fa4a7d5", + "df90e41a4fc241c1ad61506089b59e83", + "7ec180f6335341d594e0a726c48777a3", + "bfbfd237e36d46f9be546d2416b7bb36", + "73a0c53397be44c7974c4c9b1ad0423c", + "e5dfd1e6fcf44da6bdb2957cf603710f", + "6666615c6adc49bead154aff367c30e2", + "69eb1c785b6f46cfab022b650e0ccc7b", + "91d3a3d009f84b05b62f54353dc955eb", + "675bd5d813834814b6f9c8273a026e0c", + "ac97da47a07441f8829640460203fa34", + "7b532513494f4b35b74573e89140dda6", + "c8e9096bd7764591aa44e8a2e1852c74", + "d394bbf04b904c0eb87ce8bd19bc4788", + "7f565d6c60064b1f86a60e9037651f14", + "f67d2789738a4b3dac5dfdd91c588c5d", + "f308b42d29b74c6fa49b9558dfe63409", + "d3b092ed193c49e0ba26aeae08111b4f", + "56a1c1654c304b05876f0e3664a091ed", + "0a9c24d18391432f84fe2f89f69193b7" + ] + }, + "id": "GetrTh37qgDp", + "outputId": "c8348041-364f-4670-d9d8-ad5043e2949d" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ec2052c4d20f4273b2fc9d20b9f6cc04", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "README.md: 0%| | 0.00/490 [00:00')\n", + "REJECTED: ==================================================\n", + "('Question: What is the reason behind the popularity of Xkcd comics among '\n", + " 'internet users?\\n'\n", + " '\\n'\n", + " 'Answer: Xkcd comics are popular among internet users because they offer a '\n", + " 'unique blend of humor, relatable content, and thought-provoking topics that '\n", + " 'resonate with a wide range of people. The comics often address everyday '\n", + " 'experiences, technology, and social issues, making them accessible and '\n", + " 'enjoyable for many individuals. Additionally, the simple and minimalistic '\n", + " 'art style of Xkcd comics allows for easy comprehension and sharing, '\n", + " 'contributing to their widespread appeal.<|im_end|>')\n" + ] + } + ], + "source": [ + "import pprint\n", + "row = dataset[1]\n", + "print('INSTRUCTION: ' + '=' * 50)\n", + "pprint.pprint(row[\"prompt\"])\n", + "print('ACCEPTED: ' + '=' * 50)\n", + "pprint.pprint(row[\"chosen\"])\n", + "print('REJECTED: ' + '=' * 50)\n", + "pprint.pprint(row[\"rejected\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "oqyleKojqgDq" + }, + "outputs": [], + "source": [ + "# Enable reward modelling stats\n", + "from unsloth import PatchDPOTrainer\n", + "PatchDPOTrainer()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J1ZlnJpkxIuV" + }, + "source": [ + "\n", + "### Train the model\n", + "Now let's use Huggingface TRL's `ORPOTrainer`! More docs here: [TRL ORPO docs](https://huggingface.co/docs/trl/main/en/orpo_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 113, + "referenced_widgets": [ + "8f46cd3d936343aca34b328df6b3dea0", + "5cd4555da19a47c8beb1c0c7d782aa10", + "850136c0706a448eb98a32181f4f192d", + "d4ba51ee7f864afb822a7e4076496ab4", + "2dfbe287b29e4f6b9c84f95d751daffa", + "e5e50e29698645a1b0bc68d7821d27a7", + "3d360b6d73704cf9956bd7279a0af53a", + "b88a783124404b9b807b83d6f7023285", + "f596811361d34a75b178553520aa3400", + "06a7440478ee4c35a58bba8a434d9e69", + "81c0046c66224f7582d72c1dbf919d33", + "93b2a2bd27dc49d28acc3b6643cfc022", + "29bdeef39f4f46ae910080a2b4547686", + "98ba361adff74799a8aeecfd313e3beb", + "336d3e3f8280423ab2a62e0cdf7631d8", + "4a82e48b80de44f29cef8b1724ef85e1", + "74363b66c6064709b98b95c4cd69662a", + "22dc4471a9b54eefafd7021177608984", + "f4f28c070f7241539ec87dd82a701a7e", + "ab5f2943fb674f03bb003d11304b3188", + "87cc4745ead94aaa8e41f4eed3cb1e59", + "6211db271a134ea9b2bd08089faf7d80", + "7b9032352b0a4ae4b7bacfd217e29451", + "b272574e4eab4313a3ea5f97c1d5c359", + "102875abfc5b4e6cab735f75bf200d50", + "b1011b504fc145af88d96667cafef7d4", + "d9d77e5d7b014d6883b8dafa3a685dc2", + "2bdf072fd2b44e0abd3e2d5802a2054b", + "f8973d77047c4106987ae37a4c6a97ec", + "ad90ff35111d48d78a4a8595947ab8d3", + "a2d515ef793b4435b1fe6035de08585b", + "9c676e3f4e204349ae41edc8ae770ce3", + "ad2b896b04fb42c18839380b8a40d19b" + ] + }, + "id": "QtoqUw80QDV0", + "outputId": "c175c6df-669b-4429-8f01-82ac1134250c" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8f46cd3d936343aca34b328df6b3dea0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map: 0%| | 0/16000 [00:00\n", + " \n", + " \n", + " [30/30 02:36, Epoch 0/1]\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining Lossrewards / chosenrewards / rejectedrewards / accuraciesrewards / marginslogps / rejectedlogps / chosenlogits / rejectedlogits / chosen
14.112800-0.426227-0.2645670.125000-0.161660-2.645666-4.262269-1.351560-1.609066
23.619100-0.389984-0.2308140.125000-0.159171-2.308137-3.899843-0.607541-0.993858
34.056700-0.245542-0.2175180.125000-0.028024-2.175181-2.455418-1.361474-1.343608
44.118300-0.372123-0.3511790.375000-0.020944-3.511789-3.721230-0.954310-1.037349
53.344800-0.219567-0.1857790.375000-0.033788-1.857790-2.195669-1.216430-1.192454
63.565200-0.346085-0.4088530.2500000.062767-4.088528-3.460854-1.281686-1.154220
74.106100-0.223531-0.1922990.375000-0.031232-1.922991-2.235310-1.130482-1.015447
83.744200-0.222023-0.1581680.000000-0.063855-1.581680-2.220229-0.981553-1.063048
93.841800-0.294004-0.2224550.000000-0.071549-2.224555-2.940043-0.610051-0.692850
103.574100-0.323491-0.2869410.000000-0.036549-2.869414-3.234910-1.168018-1.013650
113.842800-0.256523-0.1724040.125000-0.084120-1.724037-2.565235-1.059811-1.065985
123.698000-0.347930-0.3017300.250000-0.046199-3.017303-3.479295-1.261715-0.990827
134.154500-0.408001-0.2694750.000000-0.138527-2.694748-4.080014-1.196285-1.425266
143.846700-0.285937-0.1853770.000000-0.100560-1.853770-2.859370-0.976951-0.978538
154.014600-0.405519-0.3514930.000000-0.054027-3.514926-4.055194-1.438474-1.343131
163.691200-0.337413-0.2877050.250000-0.049708-2.877048-3.374131-1.140527-1.140243
173.849800-0.317650-0.2490210.125000-0.068629-2.490213-3.176501-1.095078-1.301038
183.892200-0.322818-0.2914110.125000-0.031407-2.914110-3.228178-1.081944-0.880424
193.942300-0.236923-0.1984370.375000-0.038486-1.984369-2.369226-1.024021-0.998742
203.660500-0.318861-0.3042940.375000-0.014567-3.042940-3.188613-1.159517-0.961397
213.374000-0.183591-0.1233560.000000-0.060235-1.233564-1.835910-0.954624-0.998104
223.527600-0.371039-0.3774090.2500000.006370-3.774089-3.710387-1.262056-0.990792
233.822700-0.349856-0.2881180.125000-0.061738-2.881181-3.498561-1.308489-1.409815
243.224100-0.239801-0.2081660.125000-0.031635-2.081656-2.398007-1.129003-1.081420
253.929600-0.345307-0.1859350.000000-0.159372-1.859352-3.453071-1.213040-1.342487
263.918800-0.327853-0.2606780.125000-0.067175-2.606782-3.278529-1.286484-1.394064
273.845700-0.343079-0.2264850.250000-0.116594-2.264853-3.430789-1.334893-1.199522
283.372900-0.287159-0.2357700.000000-0.051389-2.357700-2.871589-1.577852-1.151554
293.541100-0.244107-0.1991150.250000-0.044992-1.991150-2.441066-1.001042-0.955630
303.574000-0.267658-0.1952550.250000-0.072403-1.952554-2.676580-1.218759-1.284432

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "TrainOutput(global_step=30, training_loss=3.7602075894673663, metrics={'train_runtime': 167.4826, 'train_samples_per_second': 1.433, 'train_steps_per_second': 0.179, 'total_flos': 0.0, 'train_loss': 3.7602075894673663, 'epoch': 0.015})" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "orpo_trainer.train()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FgEvCW76xblp" + }, + "source": [ + "\n", + "### Inference\n", + "Let's run the model! You can change the instruction and input - leave the output blank!" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0DJPbbtGxcFJ", + "outputId": "69026769-2fd3-4267-d1fe-d05003561d73" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nContinue the fibonnaci sequence.\\n\\n### Input:\\n1, 1, 2, 3, 5, 8\\n\\n### Response:\\nThe next number in the sequence is 13. The sequence is the Fibonacci sequence, where each number is the sum of the two preceding ones. The next number is 5 + 8 = 13.\\n\\nTo determine the next number in the Fibonacci sequence, we need to follow the rule that each number is']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# alpaca_prompt = Copied from above\n", + "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n", + "inputs = tokenizer(\n", + "[\n", + " alpaca_prompt.format(\n", + " \"Continue the fibonnaci sequence.\", # instruction\n", + " \"1, 1, 2, 3, 5, 8\", # input\n", + " \"\", # output - leave this blank for generation!\n", + " )\n", + "], return_tensors = \"pt\").to(\"cuda\")\n", + "\n", + "outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)\n", + "tokenizer.batch_decode(outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "absTV8M1xzwz" + }, + "source": [ + " You can also use a `TextStreamer` for continuous inference - so you can see the generation token by token, instead of waiting the whole time!" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xKzWRN1Px0Bg", + "outputId": "fcdfb08e-5219-48bf-c7e3-cf41ceeead55" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", + "\n", + "### Instruction:\n", + "Continue the fibonnaci sequence.\n", + "\n", + "### Input:\n", + "1, 1, 2, 3, 5, 8\n", + "\n", + "### Response:\n", + "The next number in the sequence is 13. The sequence is the Fibonacci sequence, where each number is the sum of the two preceding ones. The next number is 5 + 8 = 13.\n", + "\n", + "To determine the next number in the Fibonacci sequence, we need to follow the rule that each number is the sum of the two preceding numbers. The given sequence is:\n", + "\n", + "1, 1, 2, 3, 5, 8\n", + "\n", + "Let's identify the pattern:\n", + "\n", + "- The first number is 1.\n", + "- The second number is 1.\n", + "- The third number is 1 + 1 = \n" + ] + } + ], + "source": [ + "# alpaca_prompt = Copied from above\n", + "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n", + "inputs = tokenizer(\n", + "[\n", + " alpaca_prompt.format(\n", + " \"Continue the fibonnaci sequence.\", # instruction\n", + " \"1, 1, 2, 3, 5, 8\", # input\n", + " \"\", # output - leave this blank for generation!\n", + " )\n", + "], return_tensors = \"pt\").to(\"cuda\")\n", + "\n", + "from transformers import TextStreamer\n", + "text_streamer = TextStreamer(tokenizer)\n", + "_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y_3rdZXmx3Hh" + }, + "source": [ + "\n", + "### Saving, loading finetuned models\n", + "To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.\n", + "\n", + "**[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HJbRqLynx3a8", + "outputId": "05228b0d-4490-49f1-cd71-5cfe0f9417c9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('lora_model/tokenizer_config.json',\n", + " 'lora_model/special_tokens_map.json',\n", + " 'lora_model/tokenizer.json')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.save_pretrained(\"lora_model\") # Local saving\n", + "tokenizer.save_pretrained(\"lora_model\")\n", + "# model.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\n", + "# tokenizer.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mwIRb8DByBGg" + }, + "source": [ + "Now if you want to load the LoRA adapters we just saved for inference, set `False` to `True`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UPHJs9wDyBbN", + "outputId": "77165645-7402-4347-a613-f0ad90b572d9" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n" + ] + }, + { + "data": { + "text/plain": [ + "[\"<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nWhat is a famous tall tower in Paris?\\n\\n### Input:\\n\\n\\n### Response:\\nThe Eiffel Tower is a famous tall tower in Paris. It is a wrought iron tower located on the Champ de Mars in Paris, France. The tower is named after the engineer Gustave Eiffel, the main designer, and was built as the entrance to the 1889 World's Fair. The tower\"]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "if False:\n", + " from unsloth import FastLanguageModel\n", + " model, tokenizer = FastLanguageModel.from_pretrained(\n", + " model_name = \"lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n", + " max_seq_length = max_seq_length,\n", + " dtype = dtype,\n", + " load_in_4bit = load_in_4bit,\n", + " )\n", + " FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n", + "\n", + "# alpaca_prompt = You MUST copy from above!\n", + "\n", + "inputs = tokenizer(\n", + "[\n", + " alpaca_prompt.format(\n", + " \"What is a famous tall tower in Paris?\", # instruction\n", + " \"\", # input\n", + " \"\", # output - leave this blank for generation!\n", + " )\n", + "], return_tensors = \"pt\").to(\"cuda\")\n", + "\n", + "outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)\n", + "tokenizer.batch_decode(outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l10uNsFYyGav" + }, + "source": [ + "### Saving to float16 for VLLM\n", + "\n", + "We also support saving to `float16` directly. Select `merged_16bit` for float16 or `merged_4bit` for int4. We also allow `lora` adapters as a fallback. Use `push_to_hub_merged` to upload to your Hugging Face account! You can go to https://huggingface.co/settings/tokens for your personal tokens." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fZR6n7DsyHu6" + }, + "outputs": [], + "source": [ + "# Merge to 16bit\n", + "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "\n", + "# Merge to 4bit\n", + "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "\n", + "# Just LoRA adapters\n", + "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"lora\",)\n", + "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"lora\", token = \"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ssjW3ST2yI1L" + }, + "source": [ + "### GGUF / llama.cpp Conversion\n", + "To save to `GGUF` / `llama.cpp`, we support it natively now! We clone `llama.cpp` and we default save it to `q8_0`. We allow all methods like `q4_k_m`. Use `save_pretrained_gguf` for local saving and `push_to_hub_gguf` for uploading to HF.\n", + "\n", + "Some supported quant methods (full list on our [Wiki page](https://github.com/unslothai/unsloth/wiki#gguf-quantization-options)):\n", + "* `q8_0` - Fast conversion. High resource use, but generally acceptable.\n", + "* `q4_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K.\n", + "* `q5_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FKVTNAwyyKFR", + "outputId": "6561be80-b25d-4aa9-8014-2db0ac38b650" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unsloth: Merging 4bit and LoRA weights to 16bit...\n", + "Unsloth: Will use up to 6.5 out of 12.67 RAM for saving.\n", + "Unsloth: Saving model... This might take 5 minutes ...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 28/28 [00:00<00:00, 54.17it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unsloth: Saving tokenizer... Done.\n", + "Unsloth: Saving model/pytorch_model.bin...\n", + "Done.\n", + "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n", + " \\\\ /| [0] Installing llama.cpp might take 3 minutes.\n", + "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits might take 3 minutes.\n", + "\\ / [2] Converting GGUF 16bits to ['q8_0'] might take 10 minutes each.\n", + " \"-____-\" In total, you will have to wait at least 16 minutes.\n", + "\n", + "Unsloth: Installing llama.cpp. This might take 3 minutes...\n", + "Unsloth: [1] Converting model at model into q8_0 GGUF format.\n", + "The output location will be /content/model/unsloth.Q8_0.gguf\n", + "This might take 3 minutes...\n", + "INFO:hf-to-gguf:Loading model: model\n", + "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n", + "INFO:hf-to-gguf:Exporting model...\n", + "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model.bin'\n", + "INFO:hf-to-gguf:token_embd.weight, torch.float16 --> Q8_0, shape = {1536, 151936}\n", + "INFO:hf-to-gguf:blk.0.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.0.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.0.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.1.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.1.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.1.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.2.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.2.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.2.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.3.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.3.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.3.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.4.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.4.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.4.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.5.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.5.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.5.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.6.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.6.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.6.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.7.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.7.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.7.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.8.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.8.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.8.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.9.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.9.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.9.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.10.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.10.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.10.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.11.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.11.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.11.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.12.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.12.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.12.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.13.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.13.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.13.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.14.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.14.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.14.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.15.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.15.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.15.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.16.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.16.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.16.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.17.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.17.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.17.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.18.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.18.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.18.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.19.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.19.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.19.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.20.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.20.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.20.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.21.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.21.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.21.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.22.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.22.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.22.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.23.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.23.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.23.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.24.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.24.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.24.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.25.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.25.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.25.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.26.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.26.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.26.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.27.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.27.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.27.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:output_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:Set meta model\n", + "INFO:hf-to-gguf:Set model parameters\n", + "INFO:hf-to-gguf:gguf: context length = 4096\n", + "INFO:hf-to-gguf:gguf: embedding length = 1536\n", + "INFO:hf-to-gguf:gguf: feed forward length = 8960\n", + "INFO:hf-to-gguf:gguf: head count = 12\n", + "INFO:hf-to-gguf:gguf: key-value head count = 2\n", + "INFO:hf-to-gguf:gguf: rope theta = 10000.0\n", + "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-06\n", + "INFO:hf-to-gguf:gguf: file type = 7\n", + "INFO:hf-to-gguf:Set model tokenizer\n", + "INFO:numexpr.utils:NumExpr defaulting to 2 threads.\n", + "2025-01-15 03:52:04.014137: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2025-01-15 03:52:04.040222: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2025-01-15 03:52:04.048249: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2025-01-15 03:52:05.924447: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "INFO:gguf.vocab:Adding 151387 merge(s).\n", + "INFO:gguf.vocab:Setting special token type eos to 151645\n", + "INFO:gguf.vocab:Setting special token type pad to 151665\n", + "INFO:gguf.vocab:Setting special token type bos to 151643\n", + "INFO:gguf.vocab:Setting add_bos_token to False\n", + "INFO:gguf.vocab:Setting chat_template to {%- if tools %}\n", + " {{- '<|im_start|>system\\n' }}\n", + " {%- if messages[0]['role'] == 'system' %}\n", + " {{- messages[0]['content'] }}\n", + " {%- else %}\n", + " {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n", + " {%- endif %}\n", + " {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n", + " {%- for tool in tools %}\n", + " {{- \"\\n\" }}\n", + " {{- tool | tojson }}\n", + " {%- endfor %}\n", + " {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n", + "{%- else %}\n", + " {%- if messages[0]['role'] == 'system' %}\n", + " {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n", + " {%- else %}\n", + " {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n", + " {%- endif %}\n", + "{%- endif %}\n", + "{%- for message in messages %}\n", + " {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n", + " {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n", + " {%- elif message.role == \"assistant\" %}\n", + " {{- '<|im_start|>' + message.role }}\n", + " {%- if message.content %}\n", + " {{- '\\n' + message.content }}\n", + " {%- endif %}\n", + " {%- for tool_call in message.tool_calls %}\n", + " {%- if tool_call.function is defined %}\n", + " {%- set tool_call = tool_call.function %}\n", + " {%- endif %}\n", + " {{- '\\n\\n{\"name\": \"' }}\n", + " {{- tool_call.name }}\n", + " {{- '\", \"arguments\": ' }}\n", + " {{- tool_call.arguments | tojson }}\n", + " {{- '}\\n' }}\n", + " {%- endfor %}\n", + " {{- '<|im_end|>\\n' }}\n", + " {%- elif message.role == \"tool\" %}\n", + " {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n", + " {{- '<|im_start|>user' }}\n", + " {%- endif %}\n", + " {{- '\\n\\n' }}\n", + " {{- message.content }}\n", + " {{- '\\n' }}\n", + " {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n", + " {{- '<|im_end|>\\n' }}\n", + " {%- endif %}\n", + " {%- endif %}\n", + "{%- endfor %}\n", + "{%- if add_generation_prompt %}\n", + " {{- '<|im_start|>assistant\\n' }}\n", + "{%- endif %}\n", + "\n", + "INFO:hf-to-gguf:Set model quantization version\n", + "INFO:gguf.gguf_writer:Writing the following files:\n", + "INFO:gguf.gguf_writer:/content/model/unsloth.Q8_0.gguf: n_tensors = 338, total_size = 1.6G\n", + "Writing: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.64G/1.64G [00:36<00:00, 44.6Mbyte/s]\n", + "INFO:hf-to-gguf:Model successfully exported to /content/model/unsloth.Q8_0.gguf\n", + "Unsloth: Conversion completed! Output location: /content/model/unsloth.Q8_0.gguf\n", + "Unsloth: Merging 4bit and LoRA weights to 16bit...\n", + "Unsloth: Will use up to 6.47 out of 12.67 RAM for saving.\n", + "Unsloth: Saving model... This might take 5 minutes ...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 28/28 [00:00<00:00, 55.92it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unsloth: Saving tokenizer... Done.\n", + "Unsloth: Saving Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/pytorch_model.bin...\n", + "Done.\n", + "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n", + " \\\\ /| [0] Installing llama.cpp might take 3 minutes.\n", + "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits might take 3 minutes.\n", + "\\ / [2] Converting GGUF 16bits to ['q4_k_m'] might take 10 minutes each.\n", + " \"-____-\" In total, you will have to wait at least 16 minutes.\n", + "\n", + "Unsloth: Installing llama.cpp. This might take 3 minutes...\n", + "Unsloth: [1] Converting model at Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m into f16 GGUF format.\n", + "The output location will be /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf\n", + "This might take 3 minutes...\n", + "INFO:hf-to-gguf:Loading model: Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m\n", + "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n", + "INFO:hf-to-gguf:Exporting model...\n", + "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model.bin'\n", + "INFO:hf-to-gguf:token_embd.weight, torch.float16 --> F16, shape = {1536, 151936}\n", + "INFO:hf-to-gguf:blk.0.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.0.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.0.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.1.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.1.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.1.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.2.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.2.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.2.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.3.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.3.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.3.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.4.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.4.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.4.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.5.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.5.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.5.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.6.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.6.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.6.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.7.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.7.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.7.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.8.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.8.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.8.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.9.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.9.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.9.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.10.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.10.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.10.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.11.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.11.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.11.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.12.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.12.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.12.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.13.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.13.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.13.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.14.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.14.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.14.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.15.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.15.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.15.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.16.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.16.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.16.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.17.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.17.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.17.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.18.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.18.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.18.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.19.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.19.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.19.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.20.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.20.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.20.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.21.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.21.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.21.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.22.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.22.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.22.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.23.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.23.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.23.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.24.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.24.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.24.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.25.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.25.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.25.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.26.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.26.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.26.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.27.attn_q.bias, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.27.attn_k.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.27.attn_v.bias, torch.float16 --> F32, shape = {256}\n", + "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n", + "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n", + "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n", + "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n", + "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:output_norm.weight, torch.float16 --> F32, shape = {1536}\n", + "INFO:hf-to-gguf:Set meta model\n", + "INFO:hf-to-gguf:Set model parameters\n", + "INFO:hf-to-gguf:gguf: context length = 4096\n", + "INFO:hf-to-gguf:gguf: embedding length = 1536\n", + "INFO:hf-to-gguf:gguf: feed forward length = 8960\n", + "INFO:hf-to-gguf:gguf: head count = 12\n", + "INFO:hf-to-gguf:gguf: key-value head count = 2\n", + "INFO:hf-to-gguf:gguf: rope theta = 10000.0\n", + "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-06\n", + "INFO:hf-to-gguf:gguf: file type = 1\n", + "INFO:hf-to-gguf:Set model tokenizer\n", + "INFO:numexpr.utils:NumExpr defaulting to 2 threads.\n", + "2025-01-15 03:53:24.715092: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2025-01-15 03:53:24.741806: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2025-01-15 03:53:24.752070: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2025-01-15 03:53:26.568906: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "INFO:gguf.vocab:Adding 151387 merge(s).\n", + "INFO:gguf.vocab:Setting special token type eos to 151645\n", + "INFO:gguf.vocab:Setting special token type pad to 151665\n", + "INFO:gguf.vocab:Setting special token type bos to 151643\n", + "INFO:gguf.vocab:Setting add_bos_token to False\n", + "INFO:gguf.vocab:Setting chat_template to {%- if tools %}\n", + " {{- '<|im_start|>system\\n' }}\n", + " {%- if messages[0]['role'] == 'system' %}\n", + " {{- messages[0]['content'] }}\n", + " {%- else %}\n", + " {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n", + " {%- endif %}\n", + " {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n", + " {%- for tool in tools %}\n", + " {{- \"\\n\" }}\n", + " {{- tool | tojson }}\n", + " {%- endfor %}\n", + " {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n", + "{%- else %}\n", + " {%- if messages[0]['role'] == 'system' %}\n", + " {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n", + " {%- else %}\n", + " {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n", + " {%- endif %}\n", + "{%- endif %}\n", + "{%- for message in messages %}\n", + " {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n", + " {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n", + " {%- elif message.role == \"assistant\" %}\n", + " {{- '<|im_start|>' + message.role }}\n", + " {%- if message.content %}\n", + " {{- '\\n' + message.content }}\n", + " {%- endif %}\n", + " {%- for tool_call in message.tool_calls %}\n", + " {%- if tool_call.function is defined %}\n", + " {%- set tool_call = tool_call.function %}\n", + " {%- endif %}\n", + " {{- '\\n\\n{\"name\": \"' }}\n", + " {{- tool_call.name }}\n", + " {{- '\", \"arguments\": ' }}\n", + " {{- tool_call.arguments | tojson }}\n", + " {{- '}\\n' }}\n", + " {%- endfor %}\n", + " {{- '<|im_end|>\\n' }}\n", + " {%- elif message.role == \"tool\" %}\n", + " {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n", + " {{- '<|im_start|>user' }}\n", + " {%- endif %}\n", + " {{- '\\n\\n' }}\n", + " {{- message.content }}\n", + " {{- '\\n' }}\n", + " {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n", + " {{- '<|im_end|>\\n' }}\n", + " {%- endif %}\n", + " {%- endif %}\n", + "{%- endfor %}\n", + "{%- if add_generation_prompt %}\n", + " {{- '<|im_start|>assistant\\n' }}\n", + "{%- endif %}\n", + "\n", + "INFO:hf-to-gguf:Set model quantization version\n", + "INFO:gguf.gguf_writer:Writing the following files:\n", + "INFO:gguf.gguf_writer:/content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf: n_tensors = 338, total_size = 3.1G\n", + "Writing: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.09G/3.09G [00:34<00:00, 89.6Mbyte/s]\n", + "INFO:hf-to-gguf:Model successfully exported to /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf\n", + "Unsloth: Conversion completed! Output location: /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf\n", + "Unsloth: [2] Converting GGUF 16bit into q4_k_m. This might take 20 minutes...\n", + "main: build = 4485 (f446c2cf)\n", + "main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n", + "main: quantizing '/content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf' to '/content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.Q4_K_M.gguf' as Q4_K_M using 4 threads\n", + "llama_model_loader: loaded meta data with 27 key-value pairs and 338 tensors from /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf (version GGUF V3 (latest))\n", + "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", + "llama_model_loader: - kv 0: general.architecture str = qwen2\n", + "llama_model_loader: - kv 1: general.type str = model\n", + "llama_model_loader: - kv 2: general.name str = Qwen2.5 Math 1.5b Instruct Bnb 4bit\n", + "llama_model_loader: - kv 3: general.organization str = Unsloth\n", + "llama_model_loader: - kv 4: general.finetune str = instruct-bnb-4bit\n", + "llama_model_loader: - kv 5: general.basename str = qwen2.5-math\n", + "llama_model_loader: - kv 6: general.size_label str = 1.5B\n", + "llama_model_loader: - kv 7: qwen2.block_count u32 = 28\n", + "llama_model_loader: - kv 8: qwen2.context_length u32 = 4096\n", + "llama_model_loader: - kv 9: qwen2.embedding_length u32 = 1536\n", + "llama_model_loader: - kv 10: qwen2.feed_forward_length u32 = 8960\n", + "llama_model_loader: - kv 11: qwen2.attention.head_count u32 = 12\n", + "llama_model_loader: - kv 12: qwen2.attention.head_count_kv u32 = 2\n", + "llama_model_loader: - kv 13: qwen2.rope.freq_base f32 = 10000.000000\n", + "llama_model_loader: - kv 14: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001\n", + "llama_model_loader: - kv 15: general.file_type u32 = 1\n", + "llama_model_loader: - kv 16: tokenizer.ggml.model str = gpt2\n", + "llama_model_loader: - kv 17: tokenizer.ggml.pre str = qwen2\n", + "llama_model_loader: - kv 18: tokenizer.ggml.tokens arr[str,151936] = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n", + "llama_model_loader: - kv 19: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n", + "llama_model_loader: - kv 20: tokenizer.ggml.merges arr[str,151387] = [\"Δ  Δ \", \"Δ Δ  Δ Δ \", \"i n\", \"Δ  t\",...\n", + "llama_model_loader: - kv 21: tokenizer.ggml.eos_token_id u32 = 151645\n", + "llama_model_loader: - kv 22: tokenizer.ggml.padding_token_id u32 = 151665\n", + "llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 151643\n", + "llama_model_loader: - kv 24: tokenizer.ggml.add_bos_token bool = false\n", + "llama_model_loader: - kv 25: tokenizer.chat_template str = {%- if tools %}\\n {{- '<|im_start|>...\n", + "llama_model_loader: - kv 26: general.quantization_version u32 = 2\n", + "llama_model_loader: - type f32: 141 tensors\n", + "llama_model_loader: - type f16: 197 tensors\n", + "[ 1/ 338] output_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 2/ 338] token_embd.weight - [ 1536, 151936, 1, 1], type = f16, converting to q6_K .. size = 445.12 MiB -> 182.57 MiB\n", + "[ 3/ 338] blk.0.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 4/ 338] blk.0.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 5/ 338] blk.0.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 6/ 338] blk.0.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 7/ 338] blk.0.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 8/ 338] blk.0.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 9/ 338] blk.0.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 10/ 338] blk.0.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 11/ 338] blk.0.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 12/ 338] blk.0.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 13/ 338] blk.0.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 14/ 338] blk.0.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 15/ 338] blk.1.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 16/ 338] blk.1.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 17/ 338] blk.1.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 18/ 338] blk.1.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 19/ 338] blk.1.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 20/ 338] blk.1.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 21/ 338] blk.1.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 22/ 338] blk.1.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 23/ 338] blk.1.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 24/ 338] blk.1.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 25/ 338] blk.1.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 26/ 338] blk.1.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 27/ 338] blk.2.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 28/ 338] blk.2.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 29/ 338] blk.2.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 30/ 338] blk.2.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 31/ 338] blk.2.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 32/ 338] blk.2.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 33/ 338] blk.2.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 34/ 338] blk.2.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 35/ 338] blk.2.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 36/ 338] blk.2.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 37/ 338] blk.2.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 38/ 338] blk.2.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 39/ 338] blk.3.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 40/ 338] blk.3.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 41/ 338] blk.3.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 42/ 338] blk.3.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 43/ 338] blk.3.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 44/ 338] blk.3.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 45/ 338] blk.3.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 46/ 338] blk.3.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 47/ 338] blk.3.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 48/ 338] blk.3.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 49/ 338] blk.3.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 50/ 338] blk.3.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 51/ 338] blk.4.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 52/ 338] blk.4.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 53/ 338] blk.4.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 54/ 338] blk.4.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 55/ 338] blk.4.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 56/ 338] blk.4.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 57/ 338] blk.4.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 58/ 338] blk.4.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 59/ 338] blk.4.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 60/ 338] blk.4.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 61/ 338] blk.4.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 62/ 338] blk.4.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 63/ 338] blk.5.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 64/ 338] blk.5.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 65/ 338] blk.5.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 66/ 338] blk.5.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 67/ 338] blk.5.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 68/ 338] blk.5.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 69/ 338] blk.5.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 70/ 338] blk.5.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 71/ 338] blk.5.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 72/ 338] blk.5.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 73/ 338] blk.5.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 74/ 338] blk.5.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 75/ 338] blk.6.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 76/ 338] blk.6.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 77/ 338] blk.6.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 78/ 338] blk.6.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 79/ 338] blk.6.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 80/ 338] blk.6.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 81/ 338] blk.6.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 82/ 338] blk.6.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 83/ 338] blk.6.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 84/ 338] blk.6.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 85/ 338] blk.6.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 86/ 338] blk.6.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 87/ 338] blk.7.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 88/ 338] blk.7.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 89/ 338] blk.7.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 90/ 338] blk.7.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 91/ 338] blk.7.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 92/ 338] blk.7.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 93/ 338] blk.7.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 94/ 338] blk.7.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 95/ 338] blk.7.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 96/ 338] blk.7.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 97/ 338] blk.7.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 98/ 338] blk.7.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 99/ 338] blk.8.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 100/ 338] blk.8.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 101/ 338] blk.8.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 102/ 338] blk.8.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 103/ 338] blk.8.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 104/ 338] blk.8.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 105/ 338] blk.8.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 106/ 338] blk.8.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 107/ 338] blk.8.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 108/ 338] blk.8.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 109/ 338] blk.8.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 110/ 338] blk.8.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 111/ 338] blk.9.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 112/ 338] blk.9.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 113/ 338] blk.9.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 114/ 338] blk.9.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 115/ 338] blk.9.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 116/ 338] blk.9.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 117/ 338] blk.9.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 118/ 338] blk.9.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 119/ 338] blk.9.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 120/ 338] blk.9.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 121/ 338] blk.9.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 122/ 338] blk.9.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 123/ 338] blk.10.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 124/ 338] blk.10.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 125/ 338] blk.10.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 126/ 338] blk.10.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 127/ 338] blk.10.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 128/ 338] blk.10.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 129/ 338] blk.10.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 130/ 338] blk.10.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 131/ 338] blk.10.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 132/ 338] blk.10.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 133/ 338] blk.10.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 134/ 338] blk.10.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 135/ 338] blk.11.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 136/ 338] blk.11.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 137/ 338] blk.11.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 138/ 338] blk.11.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 139/ 338] blk.11.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 140/ 338] blk.11.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 141/ 338] blk.11.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 142/ 338] blk.11.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 143/ 338] blk.11.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 144/ 338] blk.11.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 145/ 338] blk.11.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 146/ 338] blk.11.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 147/ 338] blk.12.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 148/ 338] blk.12.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 149/ 338] blk.12.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 150/ 338] blk.12.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 151/ 338] blk.12.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 152/ 338] blk.12.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 153/ 338] blk.12.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 154/ 338] blk.12.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 155/ 338] blk.12.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 156/ 338] blk.12.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 157/ 338] blk.12.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 158/ 338] blk.12.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 159/ 338] blk.13.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 160/ 338] blk.13.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 161/ 338] blk.13.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 162/ 338] blk.13.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 163/ 338] blk.13.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 164/ 338] blk.13.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 165/ 338] blk.13.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 166/ 338] blk.13.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 167/ 338] blk.13.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 168/ 338] blk.13.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 169/ 338] blk.13.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 170/ 338] blk.13.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 171/ 338] blk.14.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 172/ 338] blk.14.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 173/ 338] blk.14.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 174/ 338] blk.14.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 175/ 338] blk.14.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 176/ 338] blk.14.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 177/ 338] blk.14.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 178/ 338] blk.14.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 179/ 338] blk.14.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 180/ 338] blk.14.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 181/ 338] blk.14.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 182/ 338] blk.14.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 183/ 338] blk.15.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 184/ 338] blk.15.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 185/ 338] blk.15.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 186/ 338] blk.15.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 187/ 338] blk.15.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 188/ 338] blk.15.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 189/ 338] blk.15.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 190/ 338] blk.15.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 191/ 338] blk.15.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 192/ 338] blk.15.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 193/ 338] blk.15.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 194/ 338] blk.15.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 195/ 338] blk.16.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 196/ 338] blk.16.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 197/ 338] blk.16.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 198/ 338] blk.16.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 199/ 338] blk.16.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 200/ 338] blk.16.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 201/ 338] blk.16.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 202/ 338] blk.16.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 203/ 338] blk.16.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 204/ 338] blk.16.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 205/ 338] blk.16.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 206/ 338] blk.16.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 207/ 338] blk.17.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 208/ 338] blk.17.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 209/ 338] blk.17.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 210/ 338] blk.17.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 211/ 338] blk.17.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 212/ 338] blk.17.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 213/ 338] blk.17.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 214/ 338] blk.17.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 215/ 338] blk.17.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 216/ 338] blk.17.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 217/ 338] blk.17.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 218/ 338] blk.17.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 219/ 338] blk.18.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 220/ 338] blk.18.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 221/ 338] blk.18.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 222/ 338] blk.18.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 223/ 338] blk.18.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 224/ 338] blk.18.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 225/ 338] blk.18.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 226/ 338] blk.18.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 227/ 338] blk.18.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 228/ 338] blk.18.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 229/ 338] blk.18.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 230/ 338] blk.18.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 231/ 338] blk.19.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 232/ 338] blk.19.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 233/ 338] blk.19.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 234/ 338] blk.19.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 235/ 338] blk.19.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 236/ 338] blk.19.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 237/ 338] blk.19.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 238/ 338] blk.19.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 239/ 338] blk.19.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 240/ 338] blk.19.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 241/ 338] blk.19.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 242/ 338] blk.19.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 243/ 338] blk.20.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 244/ 338] blk.20.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 245/ 338] blk.20.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 246/ 338] blk.20.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 247/ 338] blk.20.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 248/ 338] blk.20.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 249/ 338] blk.20.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 250/ 338] blk.20.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 251/ 338] blk.20.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 252/ 338] blk.20.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 253/ 338] blk.20.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 254/ 338] blk.20.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 255/ 338] blk.21.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 256/ 338] blk.21.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 257/ 338] blk.21.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 258/ 338] blk.21.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 259/ 338] blk.21.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 260/ 338] blk.21.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 261/ 338] blk.21.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 262/ 338] blk.21.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 263/ 338] blk.21.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 264/ 338] blk.21.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 265/ 338] blk.21.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 266/ 338] blk.21.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 267/ 338] blk.22.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 268/ 338] blk.22.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 269/ 338] blk.22.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 270/ 338] blk.22.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 271/ 338] blk.22.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 272/ 338] blk.22.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 273/ 338] blk.22.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 274/ 338] blk.22.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 275/ 338] blk.22.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 276/ 338] blk.22.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 277/ 338] blk.22.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 278/ 338] blk.22.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 279/ 338] blk.23.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 280/ 338] blk.23.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 281/ 338] blk.23.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 282/ 338] blk.23.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 283/ 338] blk.23.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 284/ 338] blk.23.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 285/ 338] blk.23.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 286/ 338] blk.23.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 287/ 338] blk.23.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 288/ 338] blk.23.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 289/ 338] blk.23.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 290/ 338] blk.23.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 291/ 338] blk.24.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 292/ 338] blk.24.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 293/ 338] blk.24.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 294/ 338] blk.24.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 295/ 338] blk.24.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 296/ 338] blk.24.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 297/ 338] blk.24.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 298/ 338] blk.24.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 299/ 338] blk.24.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 300/ 338] blk.24.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 301/ 338] blk.24.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 302/ 338] blk.24.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 303/ 338] blk.25.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 304/ 338] blk.25.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 305/ 338] blk.25.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 306/ 338] blk.25.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 307/ 338] blk.25.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 308/ 338] blk.25.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 309/ 338] blk.25.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 310/ 338] blk.25.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 311/ 338] blk.25.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 312/ 338] blk.25.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 313/ 338] blk.25.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 314/ 338] blk.25.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 315/ 338] blk.26.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 316/ 338] blk.26.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 317/ 338] blk.26.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 318/ 338] blk.26.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 319/ 338] blk.26.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 320/ 338] blk.26.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 321/ 338] blk.26.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 322/ 338] blk.26.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 323/ 338] blk.26.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 324/ 338] blk.26.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 325/ 338] blk.26.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 326/ 338] blk.26.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 327/ 338] blk.27.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 328/ 338] blk.27.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n", + "[ 329/ 338] blk.27.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 330/ 338] blk.27.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 331/ 338] blk.27.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 332/ 338] blk.27.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n", + "[ 333/ 338] blk.27.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n", + "[ 334/ 338] blk.27.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n", + "[ 335/ 338] blk.27.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n", + "[ 336/ 338] blk.27.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "[ 337/ 338] blk.27.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n", + "[ 338/ 338] blk.27.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n", + "llama_model_quantize_impl: model size = 2944.68 MB\n", + "llama_model_quantize_impl: quant size = 934.69 MB\n", + "\n", + "main: quantize time = 160895.28 ms\n", + "main: total time = 160895.28 ms\n", + "Unsloth: Conversion completed! Output location: /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.Q4_K_M.gguf\n" + ] + } + ], + "source": [ + "# Save to 8bit Q8_0\n", + "if True: model.save_pretrained_gguf(\"model\", tokenizer,)\n", + "if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, token = \"\")\n", + "\n", + "# Save to 16bit GGUF\n", + "if False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"f16\")\n", + "if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"f16\", token = \"\")\n", + "\n", + "# Save to q4_k_m GGUF\n", + "if True: model.save_pretrained_gguf(\"Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m\", tokenizer, quantization_method = \"q4_k_m\")\n", + "if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"q4_k_m\", token = \"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TL-PCYb1yMzx" + }, + "source": [ + "Now, use the `model-unsloth.gguf` file or `model-unsloth-Q4_K_M.gguf` file in `llama.cpp` or a UI based system like `GPT4All`. You can install GPT4All by going [here](https://gpt4all.io/index.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOFzC441vCtq" + }, + "source": [ + "\n", + "### Ollama Support\n", + "\n", + "[Unsloth](https://github.com/unslothai/unsloth) now allows you to automatically finetune and create a [Modelfile](https://github.com/ollama/ollama/blob/main/docs/modelfile.md), and export to [Ollama](https://ollama.com/)! This makes finetuning much easier and provides a seamless workflow from `Unsloth` to `Ollama`!\n", + "\n", + "Let's first install `Ollama`!" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NUxcyP_UfeLl", + "outputId": "52262c8e-341b-4dce-c967-cddc46aa659d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">>> Installing ollama to /usr/local\n", + ">>> Downloading Linux amd64 bundle\n", + "############################################################################################# 100.0%\n", + ">>> Creating ollama user...\n", + ">>> Adding ollama user to video group...\n", + ">>> Adding current user to ollama group...\n", + ">>> Creating ollama systemd service...\n", + "\u001b[1m\u001b[31mWARNING:\u001b[m systemd is not running\n", + "\u001b[1m\u001b[31mWARNING:\u001b[m Unable to detect NVIDIA/AMD GPU. Install lspci or lshw to automatically detect and install GPU dependencies.\n", + ">>> The Ollama API is now available at 127.0.0.1:11434.\n", + ">>> Install complete. Run \"ollama\" from the command line.\n" + ] + } + ], + "source": [ + "!curl -fsSL https://ollama.com/install.sh | sh" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "mcP9omF_tN7Q" + }, + "outputs": [], + "source": [ + "import subprocess\n", + "\n", + "subprocess.Popen([\"ollama\", \"serve\"])\n", + "import time\n", + "\n", + "time.sleep(3) # Wait for a few seconds for Ollama to load!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "md3PExRLRhOc" + }, + "source": [ + "`Ollama` needs a `Modelfile`, which specifies the model's prompt format. Let's print Unsloth's auto generated one:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 263 + }, + "id": "h82vfNigRhiz", + "outputId": "569d817b-a804-4ee8-fbf3-511a94afbdb6" + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "Qwen2TokenizerFast has no attribute _ollama_modelfile", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ollama_modelfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1103\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dict__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1104\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{self.__class__.__name__} has no attribute {key}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1105\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1106\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: Qwen2TokenizerFast has no attribute _ollama_modelfile" + ] + } + ], + "source": [ + "print(tokenizer._ollama_modelfile)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j6cipBJBudxv" + }, + "source": [ + "We now will create an `Ollama` model called `unsloth_model` using the `Modelfile` which we auto generated!" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SDTUJv_QiaVh", + "outputId": "ab427885-22c0-4786-eaf5-d76e97d765a4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error: specified Modelfile wasn't found\n" + ] + } + ], + "source": [ + "!ollama create unsloth_model -f ./model/Modelfile" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-KSoKTKQukba" + }, + "source": [ + "And now we can do inference on it via `Ollama`!\n", + "\n", + "You can also upload to `Ollama` and try the `Ollama` Desktop app by heading to https://www.ollama.com/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rkp0uMrNpYaW", + "outputId": "38bb3bd7-4a29-4c81-e319-388dcd96a449" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:04.241326628Z\",\"message\":{\"role\":\"assistant\",\"content\":\"The\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:04.465575479Z\",\"message\":{\"role\":\"assistant\",\"content\":\" next\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:04.760101468Z\",\"message\":{\"role\":\"assistant\",\"content\":\" number\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.051240606Z\",\"message\":{\"role\":\"assistant\",\"content\":\" in\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.376545126Z\",\"message\":{\"role\":\"assistant\",\"content\":\" the\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.515751946Z\",\"message\":{\"role\":\"assistant\",\"content\":\" Fibonacci\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.658721744Z\",\"message\":{\"role\":\"assistant\",\"content\":\" sequence\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.795226527Z\",\"message\":{\"role\":\"assistant\",\"content\":\" after\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.923676364Z\",\"message\":{\"role\":\"assistant\",\"content\":\" \"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.053599585Z\",\"message\":{\"role\":\"assistant\",\"content\":\"8\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.187220374Z\",\"message\":{\"role\":\"assistant\",\"content\":\" is\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.316237671Z\",\"message\":{\"role\":\"assistant\",\"content\":\" \"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.448901764Z\",\"message\":{\"role\":\"assistant\",\"content\":\"13\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.585864644Z\",\"message\":{\"role\":\"assistant\",\"content\":\" (\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.712030586Z\",\"message\":{\"role\":\"assistant\",\"content\":\"the\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.835728964Z\",\"message\":{\"role\":\"assistant\",\"content\":\" sum\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.962898827Z\",\"message\":{\"role\":\"assistant\",\"content\":\" of\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.088064406Z\",\"message\":{\"role\":\"assistant\",\"content\":\" the\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.212942126Z\",\"message\":{\"role\":\"assistant\",\"content\":\" previous\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.336569966Z\",\"message\":{\"role\":\"assistant\",\"content\":\" two\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.46094096Z\",\"message\":{\"role\":\"assistant\",\"content\":\" numbers\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.593857726Z\",\"message\":{\"role\":\"assistant\",\"content\":\").\"},\"done\":false}\n", + "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.741203726Z\",\"message\":{\"role\":\"assistant\",\"content\":\"\"},\"done_reason\":\"stop\",\"done\":true,\"total_duration\":3741960321,\"load_duration\":48967410,\"prompt_eval_count\":47,\"prompt_eval_duration\":150430000,\"eval_count\":23,\"eval_duration\":3499634000}\n" + ] + } + ], + "source": [ + "!curl http://localhost:11434/api/chat -d '{ \\\n", + " \"model\": \"unsloth_model\", \\\n", + " \"messages\": [ \\\n", + " { \"role\": \"user\", \"content\": \"Continue the Fibonacci sequence: 1, 1, 2, 3, 5, 8,\" } \\\n", + " ] \\\n", + " }'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XnMbhp7KsKhr" + }, + "source": [ + "# ChatGPT interactive mode\n", + "\n", + "### ⭐ To run the finetuned model like in a ChatGPT style interface, first click the **| >_ |** button.\n", + "![](https://raw.githubusercontent.com/unslothai/unsloth/nightly/images/Where_Terminal.png)\n", + "\n", + "---\n", + "---\n", + "---\n", + "\n", + "### ⭐ Then, type `ollama run unsloth_model`\n", + "\n", + "![](https://raw.githubusercontent.com/unslothai/unsloth/nightly/images/Terminal_Type.png)\n", + "\n", + "---\n", + "---\n", + "---\n", + "### ⭐ And you have a ChatGPT style assistant!\n", + "\n", + "### Type any question you like and press `ENTER`. If you want to exit, hit `CTRL + D`\n", + "![](https://raw.githubusercontent.com/unslothai/unsloth/nightly/images/Assistant.png)You can also use the `model-unsloth.gguf` file or `model-unsloth-Q4_K_M.gguf` file in llama.cpp or a UI based system like Jan or Open WebUI. You can install Jan [here](https://github.com/janhq/jan) and Open WebUI [here](https://github.com/open-webui/open-webui)\n", + "\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "00936bda447f46e5b1439913aeb730de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "05096318093d4ded8f6aacf0d8baa5a1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8e10bfda957d412592a2b5a7a33c81aa", + "IPY_MODEL_2157c5a847d24a9c88fe0a657fa4a7d5", + "IPY_MODEL_df90e41a4fc241c1ad61506089b59e83" + ], + "layout": "IPY_MODEL_7ec180f6335341d594e0a726c48777a3" + } + }, + "0576ab6d4cce4cb09f0755a230d50980": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "061df7aa5cd44f8481ec3af743ffc442": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "06a7440478ee4c35a58bba8a434d9e69": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "075bc926c06742d399070029d705173f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4543871a84b646f286bd601896d5d8cc", + "max": 632, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_07bac781635f4da69b591e7579359c64", + "value": 632 + } + }, + "078aba59954f45d5bae22aacaa9a5a8b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "07bac781635f4da69b591e7579359c64": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0a9c24d18391432f84fe2f89f69193b7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0aa69e1780cf4537ada828298340f744": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ce3e19e662e94dec89ee565ed892d571", + "max": 7522, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3fa8e45950cd4a28b7314b6545f9bbc8", + "value": 7522 + } + }, + "0b5a520ef7ef4df7ac67d2fce9036a92": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0c506dbd691d428082ed8151cfebd5e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4698d859bc2648f8bd41de734d3e6b6f", + "placeholder": "​", + "style": "IPY_MODEL_30ed19bc86744c8b92b685ef97eff53d", + "value": " 34.1M/34.1M [00:00<00:00, 39.3MB/s]" + } + }, + "102875abfc5b4e6cab735f75bf200d50": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ad90ff35111d48d78a4a8595947ab8d3", + "max": 16000, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a2d515ef793b4435b1fe6035de08585b", + "value": 16000 + } + }, + "1215442ba51b430d8de2358f44f7a9fa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "13339cb1901644b9be473299832f6ced": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1596274edd804f1e858d2b5fb307300c", + "placeholder": "​", + "style": "IPY_MODEL_81654beebf7e425aa4f23504c1b71112", + "value": " 7.52k/7.52k [00:00<00:00, 379kB/s]" + } + }, + "140f686ee529400b81876ccd4a37558c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_aafb1c4d69944584aaf0bc30e47df550", + "IPY_MODEL_e420266f780541e481cfbe4a80b2675c", + "IPY_MODEL_0c506dbd691d428082ed8151cfebd5e6" + ], + "layout": "IPY_MODEL_522c004154f0468e9057fdc2609a11d7" + } + }, + "1596274edd804f1e858d2b5fb307300c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "16088f5268744b63a2fe9f2d22a0dcbb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "16b2957328bf4093a00cb3bf2e655c24": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_50c5f5d69a164f8fa51721e1c44a7783", + "placeholder": "​", + "style": "IPY_MODEL_00936bda447f46e5b1439913aeb730de", + "value": "tokenizer_config.json: 100%" + } + }, + "1a1777e3d39a4d98aa9bd0685925c52d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_83efa62da7c04b79829e69f81f229c56", + "placeholder": "​", + "style": "IPY_MODEL_7843d80c596749abae891f9855830943", + "value": "tokenizer.json: 100%" + } + }, + "2157c5a847d24a9c88fe0a657fa4a7d5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e5dfd1e6fcf44da6bdb2957cf603710f", + "max": 16000, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6666615c6adc49bead154aff367c30e2", + "value": 16000 + } + }, + "216e1bd82a024ad895c7abd89025b771": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_61129d3bdae2454e997bf3263aa616b9", + "max": 7031863, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b0732ed61af849678e127cdaaa67ce7d", + "value": 7031863 + } + }, + "22b564917f5f4c8e81a8347f90a3d557": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7927c572e90248c7a97ea2303fb9547e", + "placeholder": "​", + "style": "IPY_MODEL_738db5c70e8644d8825396a0c9f8309f", + "value": "model.safetensors: 100%" + } + }, + "22dc4471a9b54eefafd7021177608984": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "25fe66aa53bc49d281a17ead8041ff1d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "28bf71c117fe456ba48b1107a22c1d16": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "29bdeef39f4f46ae910080a2b4547686": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_74363b66c6064709b98b95c4cd69662a", + "placeholder": "​", + "style": "IPY_MODEL_22dc4471a9b54eefafd7021177608984", + "value": "Map: 100%" + } + }, + "2bdf072fd2b44e0abd3e2d5802a2054b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2cf07655e93b49d9823d460b68bfa553": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4e7b87ce3eec4994996c1d467bf6b2fb", + "max": 490, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1215442ba51b430d8de2358f44f7a9fa", + "value": 490 + } + }, + "2d2844e25ace497aba29b9fb68173b0f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_35d8b168ec234ca7a045d47c348ca9a6", + "IPY_MODEL_c2fcb84d8f834b70880e7bfbe1e691c7", + "IPY_MODEL_fc4242c15c81477899b8446c6ad6b1a3" + ], + "layout": "IPY_MODEL_dd3f008a31b24d86af3ae7e9760cae8d" + } + }, + "2dfbe287b29e4f6b9c84f95d751daffa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "30ed19bc86744c8b92b685ef97eff53d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "326908aed9c64fc6abb1f767bbc0211a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_689e62ad5ee249bfa66e675e778b96f3", + "placeholder": "​", + "style": "IPY_MODEL_ff3791fe18f141e59d63056c5816b0f5", + "value": "vocab.json: 100%" + } + }, + "336d3e3f8280423ab2a62e0cdf7631d8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_87cc4745ead94aaa8e41f4eed3cb1e59", + "placeholder": "​", + "style": "IPY_MODEL_6211db271a134ea9b2bd08089faf7d80", + "value": " 16000/16000 [00:01<00:00, 13001.59 examples/s]" + } + }, + "35d8b168ec234ca7a045d47c348ca9a6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c4560decbda04d628fd815fa8f3953a0", + "placeholder": "​", + "style": "IPY_MODEL_af65219a68e644698d5a687095007316", + "value": "generation_config.json: 100%" + } + }, + "365fac5b8cb14b3c9c85c21529551c7d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3ccf6630013f4c4d950d30eca6d8e2be": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3d360b6d73704cf9956bd7279a0af53a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3e308101e62f43c799ca27b6eaa744d4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3f85a717e1d242eaa21deda5944c658f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b5f3983270e54dd3911a35fe3128d9b7", + "placeholder": "​", + "style": "IPY_MODEL_9d7d05e6711149aa8adf3d03f3468e2f", + "value": " 7.03M/7.03M [00:00<00:00, 21.5MB/s]" + } + }, + "3fa8e45950cd4a28b7314b6545f9bbc8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "40a09d1d63ad49eca2a955e3e7d0cc72": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "41ed1216684a40cba7c0cf3e3aaf18c1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "445862f7c16947d198aae928b2f3f6bb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4543871a84b646f286bd601896d5d8cc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "463bddb985dc4b62a9ee0be8919cc5cf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4698d859bc2648f8bd41de734d3e6b6f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "49a0e5d1681046c88e9bc1159d47d875": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fb7d899a343846f789adc4d9a3f54659", + "placeholder": "​", + "style": "IPY_MODEL_87d15d738ba4471cb86fa002620042a8", + "value": "special_tokens_map.json: 100%" + } + }, + "4a82e48b80de44f29cef8b1724ef85e1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e7b87ce3eec4994996c1d467bf6b2fb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "50c5f5d69a164f8fa51721e1c44a7783": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "512462794d674769b6629ad8c8c254b7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_16088f5268744b63a2fe9f2d22a0dcbb", + "placeholder": "​", + "style": "IPY_MODEL_b95d504adfd742c89a3fb277233fac5b", + "value": " 1.14G/1.14G [00:08<00:00, 422MB/s]" + } + }, + "522c004154f0468e9057fdc2609a11d7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "54faaf670f5e43aa9f0cada481be066c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d7535f7fad1d416a8936735cd0f23099", + "max": 613, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_061df7aa5cd44f8481ec3af743ffc442", + "value": 613 + } + }, + "56a1c1654c304b05876f0e3664a091ed": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "595074903827402b8a1246c101340eff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5b214ea52c60495bb35f4b89439c70b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5cd4555da19a47c8beb1c0c7d782aa10": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e5e50e29698645a1b0bc68d7821d27a7", + "placeholder": "​", + "style": "IPY_MODEL_3d360b6d73704cf9956bd7279a0af53a", + "value": "Map: 100%" + } + }, + "5d53d4ee8b5747f4a29d3efcabc2a324": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5e7b3dcce78a463eb14e42cd3d2a0b0b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_22b564917f5f4c8e81a8347f90a3d557", + "IPY_MODEL_6b993dd64aa7489c8ca8c2f5d728178e", + "IPY_MODEL_512462794d674769b6629ad8c8c254b7" + ], + "layout": "IPY_MODEL_d8c30b721eae4c33a8fa2746c91f6ebe" + } + }, + "61129d3bdae2454e997bf3263aa616b9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6211db271a134ea9b2bd08089faf7d80": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6666615c6adc49bead154aff367c30e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "675bd5d813834814b6f9c8273a026e0c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ac97da47a07441f8829640460203fa34", + "IPY_MODEL_7b532513494f4b35b74573e89140dda6", + "IPY_MODEL_c8e9096bd7764591aa44e8a2e1852c74" + ], + "layout": "IPY_MODEL_d394bbf04b904c0eb87ce8bd19bc4788" + } + }, + "689e62ad5ee249bfa66e675e778b96f3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "69eb1c785b6f46cfab022b650e0ccc7b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6b993dd64aa7489c8ca8c2f5d728178e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "danger", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3ccf6630013f4c4d950d30eca6d8e2be", + "max": 1143327678, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b956ad4e4ffb47908402a0d9fce147fb", + "value": 1143327569 + } + }, + "738db5c70e8644d8825396a0c9f8309f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "73a0c53397be44c7974c4c9b1ad0423c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "73b62457b51e413dadc7bfca227abe39": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "74069380475f434bb34afec56314fa4e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f3d89e8a8407433db10d775dec82099f", + "placeholder": "​", + "style": "IPY_MODEL_c57b3e2abc4d4147a7410fdeb16f318e", + "value": "added_tokens.json: 100%" + } + }, + "74363b66c6064709b98b95c4cd69662a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "76ea4577ebc34368a1f220104a0778af": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_16b2957328bf4093a00cb3bf2e655c24", + "IPY_MODEL_0aa69e1780cf4537ada828298340f744", + "IPY_MODEL_13339cb1901644b9be473299832f6ced" + ], + "layout": "IPY_MODEL_ae2e31a363ae4051ac40375bfec4dd42" + } + }, + "7843d80c596749abae891f9855830943": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7927c572e90248c7a97ea2303fb9547e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7b1d17800e4540ffb099bdef9eb8ae6c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_74069380475f434bb34afec56314fa4e", + "IPY_MODEL_075bc926c06742d399070029d705173f", + "IPY_MODEL_81300c1080894e34a3a5d88eeac33801" + ], + "layout": "IPY_MODEL_28bf71c117fe456ba48b1107a22c1d16" + } + }, + "7b532513494f4b35b74573e89140dda6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f308b42d29b74c6fa49b9558dfe63409", + "max": 16000, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d3b092ed193c49e0ba26aeae08111b4f", + "value": 16000 + } + }, + "7b9032352b0a4ae4b7bacfd217e29451": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b272574e4eab4313a3ea5f97c1d5c359", + "IPY_MODEL_102875abfc5b4e6cab735f75bf200d50", + "IPY_MODEL_b1011b504fc145af88d96667cafef7d4" + ], + "layout": "IPY_MODEL_d9d77e5d7b014d6883b8dafa3a685dc2" + } + }, + "7bf13dd9534d4e9197bd1874687862ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7ec180f6335341d594e0a726c48777a3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7f565d6c60064b1f86a60e9037651f14": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8088d38d4d374574b9e52fd7356e2ea6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_326908aed9c64fc6abb1f767bbc0211a", + "IPY_MODEL_bc86d9704e2f460bb7cfd04b664eed12", + "IPY_MODEL_9e0ea9e88df94b4d9967ff1a7ed1bdd6" + ], + "layout": "IPY_MODEL_b47cd2b42af746d4ab10c7d0c640ba7a" + } + }, + "811c37a344f84daabaa1e3e7eb985769": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1a1777e3d39a4d98aa9bd0685925c52d", + "IPY_MODEL_216e1bd82a024ad895c7abd89025b771", + "IPY_MODEL_3f85a717e1d242eaa21deda5944c658f" + ], + "layout": "IPY_MODEL_ad45988fb01248ac96ff1b127149fafb" + } + }, + "81300c1080894e34a3a5d88eeac33801": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_894dcbc8129c4f979581aa8c05efb747", + "placeholder": "​", + "style": "IPY_MODEL_0b5a520ef7ef4df7ac67d2fce9036a92", + "value": " 632/632 [00:00<00:00, 54.1kB/s]" + } + }, + "81654beebf7e425aa4f23504c1b71112": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "81c0046c66224f7582d72c1dbf919d33": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "83efa62da7c04b79829e69f81f229c56": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "850136c0706a448eb98a32181f4f192d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b88a783124404b9b807b83d6f7023285", + "max": 16000, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f596811361d34a75b178553520aa3400", + "value": 16000 + } + }, + "85dba50a8bc04ccebec42f560bbc1b79": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "87cc4745ead94aaa8e41f4eed3cb1e59": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "87d15d738ba4471cb86fa002620042a8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "894dcbc8129c4f979581aa8c05efb747": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8e10bfda957d412592a2b5a7a33c81aa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bfbfd237e36d46f9be546d2416b7bb36", + "placeholder": "​", + "style": "IPY_MODEL_73a0c53397be44c7974c4c9b1ad0423c", + "value": "Generating train split: 100%" + } + }, + "8f46cd3d936343aca34b328df6b3dea0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5cd4555da19a47c8beb1c0c7d782aa10", + "IPY_MODEL_850136c0706a448eb98a32181f4f192d", + "IPY_MODEL_d4ba51ee7f864afb822a7e4076496ab4" + ], + "layout": "IPY_MODEL_2dfbe287b29e4f6b9c84f95d751daffa" + } + }, + "8fc45c66242648cc94f51cf08ff18f25": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "91d3a3d009f84b05b62f54353dc955eb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "93b2a2bd27dc49d28acc3b6643cfc022": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_29bdeef39f4f46ae910080a2b4547686", + "IPY_MODEL_98ba361adff74799a8aeecfd313e3beb", + "IPY_MODEL_336d3e3f8280423ab2a62e0cdf7631d8" + ], + "layout": "IPY_MODEL_4a82e48b80de44f29cef8b1724ef85e1" + } + }, + "98ba361adff74799a8aeecfd313e3beb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f4f28c070f7241539ec87dd82a701a7e", + "max": 16000, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ab5f2943fb674f03bb003d11304b3188", + "value": 16000 + } + }, + "9c676e3f4e204349ae41edc8ae770ce3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9d7d05e6711149aa8adf3d03f3468e2f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9e0ea9e88df94b4d9967ff1a7ed1bdd6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_445862f7c16947d198aae928b2f3f6bb", + "placeholder": "​", + "style": "IPY_MODEL_c2ca493b2d8a487c8db0defbeea297cc", + "value": " 2.78M/2.78M [00:00<00:00, 13.7MB/s]" + } + }, + "a2d515ef793b4435b1fe6035de08585b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a47c4b141e244e44b068a2d79f9ac3f8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d98b7e8cc525466598fe34609e41fa54", + "placeholder": "​", + "style": "IPY_MODEL_463bddb985dc4b62a9ee0be8919cc5cf", + "value": " 490/490 [00:00<00:00, 33.1kB/s]" + } + }, + "a8e9032fb3e34ca08f0a742c20c15848": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "aafb1c4d69944584aaf0bc30e47df550": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3e308101e62f43c799ca27b6eaa744d4", + "placeholder": "​", + "style": "IPY_MODEL_0576ab6d4cce4cb09f0755a230d50980", + "value": "dpo_fixed.jsonl: 100%" + } + }, + "ab5f2943fb674f03bb003d11304b3188": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "aba1e58a13744fbea62c85ced2969652": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ac97da47a07441f8829640460203fa34": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7f565d6c60064b1f86a60e9037651f14", + "placeholder": "​", + "style": "IPY_MODEL_f67d2789738a4b3dac5dfdd91c588c5d", + "value": "Map: 100%" + } + }, + "ad2b896b04fb42c18839380b8a40d19b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ad42cefa580c4e84ba5201b704218c34": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ad45988fb01248ac96ff1b127149fafb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ad90ff35111d48d78a4a8595947ab8d3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae2e31a363ae4051ac40375bfec4dd42": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "af65219a68e644698d5a687095007316": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b0732ed61af849678e127cdaaa67ce7d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b1011b504fc145af88d96667cafef7d4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9c676e3f4e204349ae41edc8ae770ce3", + "placeholder": "​", + "style": "IPY_MODEL_ad2b896b04fb42c18839380b8a40d19b", + "value": " 16000/16000 [02:10<00:00, 100.14 examples/s]" + } + }, + "b272574e4eab4313a3ea5f97c1d5c359": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2bdf072fd2b44e0abd3e2d5802a2054b", + "placeholder": "​", + "style": "IPY_MODEL_f8973d77047c4106987ae37a4c6a97ec", + "value": "Map: 100%" + } + }, + "b47cd2b42af746d4ab10c7d0c640ba7a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b5f3983270e54dd3911a35fe3128d9b7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b88a783124404b9b807b83d6f7023285": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b956ad4e4ffb47908402a0d9fce147fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b95d504adfd742c89a3fb277233fac5b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ba57df115cbd4571913b8399d3bdf2e0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bc86d9704e2f460bb7cfd04b664eed12": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8fc45c66242648cc94f51cf08ff18f25", + "max": 2776833, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7bf13dd9534d4e9197bd1874687862ec", + "value": 2776833 + } + }, + "bf846b25033d48bd818ae306e671b61b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bfbfd237e36d46f9be546d2416b7bb36": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c2ca493b2d8a487c8db0defbeea297cc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c2fcb84d8f834b70880e7bfbe1e691c7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_41ed1216684a40cba7c0cf3e3aaf18c1", + "max": 161, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_365fac5b8cb14b3c9c85c21529551c7d", + "value": 161 + } + }, + "c4560decbda04d628fd815fa8f3953a0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c46dbb7e089a46c5be6f9947c79cf23f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_85dba50a8bc04ccebec42f560bbc1b79", + "placeholder": "​", + "style": "IPY_MODEL_a8e9032fb3e34ca08f0a742c20c15848", + "value": " 1.67M/1.67M [00:00<00:00, 6.40MB/s]" + } + }, + "c57b3e2abc4d4147a7410fdeb16f318e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c8e9096bd7764591aa44e8a2e1852c74": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_56a1c1654c304b05876f0e3664a091ed", + "placeholder": "​", + "style": "IPY_MODEL_0a9c24d18391432f84fe2f89f69193b7", + "value": " 16000/16000 [00:01<00:00, 10752.11 examples/s]" + } + }, + "cb7eb0355b084370a2ba96125d6aa939": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ce3e19e662e94dec89ee565ed892d571": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d196c1273a72403e926266ebde2ff551": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d25dfa432ab84d04b346b8cd58c89720": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d2f03b20726d489287e5ebce211303c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aba1e58a13744fbea62c85ced2969652", + "placeholder": "​", + "style": "IPY_MODEL_ad42cefa580c4e84ba5201b704218c34", + "value": "merges.txt: 100%" + } + }, + "d394bbf04b904c0eb87ce8bd19bc4788": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d3b092ed193c49e0ba26aeae08111b4f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d47f521581c8423682ed590854f55702": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d196c1273a72403e926266ebde2ff551", + "placeholder": "​", + "style": "IPY_MODEL_cb7eb0355b084370a2ba96125d6aa939", + "value": "README.md: 100%" + } + }, + "d4ba51ee7f864afb822a7e4076496ab4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_06a7440478ee4c35a58bba8a434d9e69", + "placeholder": "​", + "style": "IPY_MODEL_81c0046c66224f7582d72c1dbf919d33", + "value": " 16000/16000 [00:01<00:00, 10769.01 examples/s]" + } + }, + "d7535f7fad1d416a8936735cd0f23099": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d8c30b721eae4c33a8fa2746c91f6ebe": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d98b7e8cc525466598fe34609e41fa54": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d9d77e5d7b014d6883b8dafa3a685dc2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dd3f008a31b24d86af3ae7e9760cae8d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "df90e41a4fc241c1ad61506089b59e83": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_69eb1c785b6f46cfab022b650e0ccc7b", + "placeholder": "​", + "style": "IPY_MODEL_91d3a3d009f84b05b62f54353dc955eb", + "value": " 16000/16000 [00:00<00:00, 72576.29 examples/s]" + } + }, + "e420266f780541e481cfbe4a80b2675c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_40a09d1d63ad49eca2a955e3e7d0cc72", + "max": 34098509, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_25fe66aa53bc49d281a17ead8041ff1d", + "value": 34098509 + } + }, + "e5dfd1e6fcf44da6bdb2957cf603710f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e5e50e29698645a1b0bc68d7821d27a7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e6683a89074d456e949a5072a55b1602": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bf846b25033d48bd818ae306e671b61b", + "max": 1671853, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_595074903827402b8a1246c101340eff", + "value": 1671853 + } + }, + "e7dc3433d02642728f805d18ab987102": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e7f156806cf84f80bc6562cc7fe24e08": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e7dc3433d02642728f805d18ab987102", + "placeholder": "​", + "style": "IPY_MODEL_5d53d4ee8b5747f4a29d3efcabc2a324", + "value": " 613/613 [00:00<00:00, 42.9kB/s]" + } + }, + "ec2052c4d20f4273b2fc9d20b9f6cc04": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d47f521581c8423682ed590854f55702", + "IPY_MODEL_2cf07655e93b49d9823d460b68bfa553", + "IPY_MODEL_a47c4b141e244e44b068a2d79f9ac3f8" + ], + "layout": "IPY_MODEL_078aba59954f45d5bae22aacaa9a5a8b" + } + }, + "f308b42d29b74c6fa49b9558dfe63409": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f3d89e8a8407433db10d775dec82099f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f4f28c070f7241539ec87dd82a701a7e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f596811361d34a75b178553520aa3400": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f602392931b1423e867b9e37dad27300": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d2f03b20726d489287e5ebce211303c4", + "IPY_MODEL_e6683a89074d456e949a5072a55b1602", + "IPY_MODEL_c46dbb7e089a46c5be6f9947c79cf23f" + ], + "layout": "IPY_MODEL_d25dfa432ab84d04b346b8cd58c89720" + } + }, + "f67d2789738a4b3dac5dfdd91c588c5d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f8973d77047c4106987ae37a4c6a97ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fb7d899a343846f789adc4d9a3f54659": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc4242c15c81477899b8446c6ad6b1a3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_73b62457b51e413dadc7bfca227abe39", + "placeholder": "​", + "style": "IPY_MODEL_5b214ea52c60495bb35f4b89439c70b6", + "value": " 161/161 [00:00<00:00, 11.9kB/s]" + } + }, + "fe4e7cd444b4449292b704253c574c2d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_49a0e5d1681046c88e9bc1159d47d875", + "IPY_MODEL_54faaf670f5e43aa9f0cada481be066c", + "IPY_MODEL_e7f156806cf84f80bc6562cc7fe24e08" + ], + "layout": "IPY_MODEL_ba57df115cbd4571913b8399d3bdf2e0" + } + }, + "ff3791fe18f141e59d63056c5816b0f5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}