From 84ab99114a6f4a00cf362c50ae4d1350ee62d304 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cyifeng=2Ewang=E2=80=9D?=
<β3038880699@qq.com;q:wqqgit config --global user.name βyifeng.wangβgit config --global user.email β3038880699@qq.com>
Date: Wed, 15 Jan 2025 21:44:50 +0800
Subject: [PATCH] draft
---
..._ORPO_traning_in_Unsloth_Ollama_chat.ipynb | 8129 +++++++++++++++++
1 file changed, 8129 insertions(+)
create mode 100644 docs/cookbooks/model_training/preference_data_gen_+Qwen_ORPO_traning_in_Unsloth_Ollama_chat.ipynb
diff --git a/docs/cookbooks/model_training/preference_data_gen_+Qwen_ORPO_traning_in_Unsloth_Ollama_chat.ipynb b/docs/cookbooks/model_training/preference_data_gen_+Qwen_ORPO_traning_in_Unsloth_Ollama_chat.ipynb
new file mode 100644
index 0000000000..0cd6397b92
--- /dev/null
+++ b/docs/cookbooks/model_training/preference_data_gen_+Qwen_ORPO_traning_in_Unsloth_Ollama_chat.ipynb
@@ -0,0 +1,8129 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "8lSNa9FYvmA7"
+ },
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "!pip install unsloth\n",
+ "# Also get the latest nightly Unsloth!\n",
+ "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4dfwD72Ct1_T"
+ },
+ "source": [
+ "* We support Llama, Mistral, CodeLlama, TinyLlama, Vicuna, Open Hermes etc\n",
+ "* And Yi, Qwen ([llamafied](https://huggingface.co/models?sort=trending&search=qwen+llama)), Deepseek, all Llama, Mistral derived archs.\n",
+ "* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.\n",
+ "* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.\n",
+ "* [**NEW**] With [PR 26037](https://github.com/huggingface/transformers/pull/26037), we support downloading 4bit models **4x faster**! [Our repo](https://huggingface.co/unsloth) has Llama, Mistral 4bit models."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 412,
+ "referenced_widgets": [
+ "5e7b3dcce78a463eb14e42cd3d2a0b0b",
+ "22b564917f5f4c8e81a8347f90a3d557",
+ "6b993dd64aa7489c8ca8c2f5d728178e",
+ "512462794d674769b6629ad8c8c254b7",
+ "d8c30b721eae4c33a8fa2746c91f6ebe",
+ "7927c572e90248c7a97ea2303fb9547e",
+ "738db5c70e8644d8825396a0c9f8309f",
+ "3ccf6630013f4c4d950d30eca6d8e2be",
+ "b956ad4e4ffb47908402a0d9fce147fb",
+ "16088f5268744b63a2fe9f2d22a0dcbb",
+ "b95d504adfd742c89a3fb277233fac5b",
+ "2d2844e25ace497aba29b9fb68173b0f",
+ "35d8b168ec234ca7a045d47c348ca9a6",
+ "c2fcb84d8f834b70880e7bfbe1e691c7",
+ "fc4242c15c81477899b8446c6ad6b1a3",
+ "dd3f008a31b24d86af3ae7e9760cae8d",
+ "c4560decbda04d628fd815fa8f3953a0",
+ "af65219a68e644698d5a687095007316",
+ "41ed1216684a40cba7c0cf3e3aaf18c1",
+ "365fac5b8cb14b3c9c85c21529551c7d",
+ "73b62457b51e413dadc7bfca227abe39",
+ "5b214ea52c60495bb35f4b89439c70b6",
+ "76ea4577ebc34368a1f220104a0778af",
+ "16b2957328bf4093a00cb3bf2e655c24",
+ "0aa69e1780cf4537ada828298340f744",
+ "13339cb1901644b9be473299832f6ced",
+ "ae2e31a363ae4051ac40375bfec4dd42",
+ "50c5f5d69a164f8fa51721e1c44a7783",
+ "00936bda447f46e5b1439913aeb730de",
+ "ce3e19e662e94dec89ee565ed892d571",
+ "3fa8e45950cd4a28b7314b6545f9bbc8",
+ "1596274edd804f1e858d2b5fb307300c",
+ "81654beebf7e425aa4f23504c1b71112",
+ "8088d38d4d374574b9e52fd7356e2ea6",
+ "326908aed9c64fc6abb1f767bbc0211a",
+ "bc86d9704e2f460bb7cfd04b664eed12",
+ "9e0ea9e88df94b4d9967ff1a7ed1bdd6",
+ "b47cd2b42af746d4ab10c7d0c640ba7a",
+ "689e62ad5ee249bfa66e675e778b96f3",
+ "ff3791fe18f141e59d63056c5816b0f5",
+ "8fc45c66242648cc94f51cf08ff18f25",
+ "7bf13dd9534d4e9197bd1874687862ec",
+ "445862f7c16947d198aae928b2f3f6bb",
+ "c2ca493b2d8a487c8db0defbeea297cc",
+ "f602392931b1423e867b9e37dad27300",
+ "d2f03b20726d489287e5ebce211303c4",
+ "e6683a89074d456e949a5072a55b1602",
+ "c46dbb7e089a46c5be6f9947c79cf23f",
+ "d25dfa432ab84d04b346b8cd58c89720",
+ "aba1e58a13744fbea62c85ced2969652",
+ "ad42cefa580c4e84ba5201b704218c34",
+ "bf846b25033d48bd818ae306e671b61b",
+ "595074903827402b8a1246c101340eff",
+ "85dba50a8bc04ccebec42f560bbc1b79",
+ "a8e9032fb3e34ca08f0a742c20c15848",
+ "7b1d17800e4540ffb099bdef9eb8ae6c",
+ "74069380475f434bb34afec56314fa4e",
+ "075bc926c06742d399070029d705173f",
+ "81300c1080894e34a3a5d88eeac33801",
+ "28bf71c117fe456ba48b1107a22c1d16",
+ "f3d89e8a8407433db10d775dec82099f",
+ "c57b3e2abc4d4147a7410fdeb16f318e",
+ "4543871a84b646f286bd601896d5d8cc",
+ "07bac781635f4da69b591e7579359c64",
+ "894dcbc8129c4f979581aa8c05efb747",
+ "0b5a520ef7ef4df7ac67d2fce9036a92",
+ "fe4e7cd444b4449292b704253c574c2d",
+ "49a0e5d1681046c88e9bc1159d47d875",
+ "54faaf670f5e43aa9f0cada481be066c",
+ "e7f156806cf84f80bc6562cc7fe24e08",
+ "ba57df115cbd4571913b8399d3bdf2e0",
+ "fb7d899a343846f789adc4d9a3f54659",
+ "87d15d738ba4471cb86fa002620042a8",
+ "d7535f7fad1d416a8936735cd0f23099",
+ "061df7aa5cd44f8481ec3af743ffc442",
+ "e7dc3433d02642728f805d18ab987102",
+ "5d53d4ee8b5747f4a29d3efcabc2a324",
+ "811c37a344f84daabaa1e3e7eb985769",
+ "1a1777e3d39a4d98aa9bd0685925c52d",
+ "216e1bd82a024ad895c7abd89025b771",
+ "3f85a717e1d242eaa21deda5944c658f",
+ "ad45988fb01248ac96ff1b127149fafb",
+ "83efa62da7c04b79829e69f81f229c56",
+ "7843d80c596749abae891f9855830943",
+ "61129d3bdae2454e997bf3263aa616b9",
+ "b0732ed61af849678e127cdaaa67ce7d",
+ "b5f3983270e54dd3911a35fe3128d9b7",
+ "9d7d05e6711149aa8adf3d03f3468e2f"
+ ]
+ },
+ "id": "2eSvM9zX_2d3",
+ "outputId": "296225a1-1e10-43f2-c40d-a82d417c39ff"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "π¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "π¦₯ Unsloth Zoo will now patch everything to make training faster!\n",
+ "==((====))== Unsloth 2025.1.5: Fast Qwen2 patching. Transformers: 4.47.1.\n",
+ " \\\\ /| GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.\n",
+ "O^O/ \\_/ \\ Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0\n",
+ "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "5e7b3dcce78a463eb14e42cd3d2a0b0b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/1.14G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2d2844e25ace497aba29b9fb68173b0f",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "generation_config.json: 0%| | 0.00/161 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "76ea4577ebc34368a1f220104a0778af",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/7.52k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8088d38d4d374574b9e52fd7356e2ea6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "vocab.json: 0%| | 0.00/2.78M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f602392931b1423e867b9e37dad27300",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "merges.txt: 0%| | 0.00/1.67M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "7b1d17800e4540ffb099bdef9eb8ae6c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "added_tokens.json: 0%| | 0.00/632 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "fe4e7cd444b4449292b704253c574c2d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/613 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "811c37a344f84daabaa1e3e7eb985769",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/7.03M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from unsloth import FastLanguageModel\n",
+ "import torch\n",
+ "max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+ "\n",
+ "# 4bit pre quantized models we support for 4x faster downloading + no OOMs.\n",
+ "fourbit_models = [\n",
+ " \"unsloth/mistral-7b-bnb-4bit\",\n",
+ " \"unsloth/mistral-7b-instruct-v0.2-bnb-4bit\",\n",
+ " \"unsloth/llama-2-7b-bnb-4bit\",\n",
+ " \"unsloth/gemma-7b-bnb-4bit\",\n",
+ " \"unsloth/gemma-7b-it-bnb-4bit\", # Instruct version of Gemma 7b\n",
+ " \"unsloth/gemma-2b-bnb-4bit\",\n",
+ " \"unsloth/gemma-2b-it-bnb-4bit\", # Instruct version of Gemma 2b\n",
+ " \"unsloth/llama-3-8b-bnb-4bit\", # [NEW] 15 Trillion token Llama-3\n",
+ "] # More models at https://huggingface.co/unsloth\n",
+ "\n",
+ "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+ " model_name = \"unsloth/Qwen2.5-Math-1.5B-Instruct\",\n",
+ " max_seq_length = max_seq_length,\n",
+ " dtype = dtype,\n",
+ " load_in_4bit = load_in_4bit,\n",
+ " # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "_H6EWbe-t_bk"
+ },
+ "source": [
+ "We now add LoRA adapters so we only need to update 1 to 10% of all parameters!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6bZsfBuZDeCL",
+ "outputId": "1b4401a5-28d0-4971-d302-f05aaf8dee5a"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Unsloth 2025.1.5 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.\n"
+ ]
+ }
+ ],
+ "source": [
+ "model = FastLanguageModel.get_peft_model(\n",
+ " model,\n",
+ " r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+ " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+ " \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+ " lora_alpha = 16,\n",
+ " lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+ " bias = \"none\", # Supports any, but = \"none\" is optimized\n",
+ " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n",
+ " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n",
+ " random_state = 3407,\n",
+ " use_rslora = False, # We support rank stabilized LoRA\n",
+ " loftq_config = None, # And LoftQ\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "FNfc6OLOuuyZ"
+ },
+ "source": [
+ "\n",
+ "### Data Prep\n",
+ "We now use a special ORPO style dataset from [recipe-research](https://huggingface.co/datasets/reciperesearch/dolphin-sft-v0.1-preference).\n",
+ "\n",
+ "You need at least 3 columns:\n",
+ "* Instruction\n",
+ "* Accepted\n",
+ "* Rejected\n",
+ "\n",
+ "For example:\n",
+ "* Instruction: \"What is 2+2?\"\n",
+ "* Accepted: \"The answer is 4\"\n",
+ "* Rejected: \"The answer is 5\"\n",
+ "\n",
+ "The goal of ORPO is to penalize the \"rejected\" samples, and increase the likelihood of \"accepted\" samples. [recipe-research](https://huggingface.co/datasets/reciperesearch/dolphin-sft-v0.1-preference) essentially used Mistral to generate the \"rejected\" responses, and used GPT-4 to generated the \"accepted\" responses."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 145,
+ "referenced_widgets": [
+ "ec2052c4d20f4273b2fc9d20b9f6cc04",
+ "d47f521581c8423682ed590854f55702",
+ "2cf07655e93b49d9823d460b68bfa553",
+ "a47c4b141e244e44b068a2d79f9ac3f8",
+ "078aba59954f45d5bae22aacaa9a5a8b",
+ "d196c1273a72403e926266ebde2ff551",
+ "cb7eb0355b084370a2ba96125d6aa939",
+ "4e7b87ce3eec4994996c1d467bf6b2fb",
+ "1215442ba51b430d8de2358f44f7a9fa",
+ "d98b7e8cc525466598fe34609e41fa54",
+ "463bddb985dc4b62a9ee0be8919cc5cf",
+ "140f686ee529400b81876ccd4a37558c",
+ "aafb1c4d69944584aaf0bc30e47df550",
+ "e420266f780541e481cfbe4a80b2675c",
+ "0c506dbd691d428082ed8151cfebd5e6",
+ "522c004154f0468e9057fdc2609a11d7",
+ "3e308101e62f43c799ca27b6eaa744d4",
+ "0576ab6d4cce4cb09f0755a230d50980",
+ "40a09d1d63ad49eca2a955e3e7d0cc72",
+ "25fe66aa53bc49d281a17ead8041ff1d",
+ "4698d859bc2648f8bd41de734d3e6b6f",
+ "30ed19bc86744c8b92b685ef97eff53d",
+ "05096318093d4ded8f6aacf0d8baa5a1",
+ "8e10bfda957d412592a2b5a7a33c81aa",
+ "2157c5a847d24a9c88fe0a657fa4a7d5",
+ "df90e41a4fc241c1ad61506089b59e83",
+ "7ec180f6335341d594e0a726c48777a3",
+ "bfbfd237e36d46f9be546d2416b7bb36",
+ "73a0c53397be44c7974c4c9b1ad0423c",
+ "e5dfd1e6fcf44da6bdb2957cf603710f",
+ "6666615c6adc49bead154aff367c30e2",
+ "69eb1c785b6f46cfab022b650e0ccc7b",
+ "91d3a3d009f84b05b62f54353dc955eb",
+ "675bd5d813834814b6f9c8273a026e0c",
+ "ac97da47a07441f8829640460203fa34",
+ "7b532513494f4b35b74573e89140dda6",
+ "c8e9096bd7764591aa44e8a2e1852c74",
+ "d394bbf04b904c0eb87ce8bd19bc4788",
+ "7f565d6c60064b1f86a60e9037651f14",
+ "f67d2789738a4b3dac5dfdd91c588c5d",
+ "f308b42d29b74c6fa49b9558dfe63409",
+ "d3b092ed193c49e0ba26aeae08111b4f",
+ "56a1c1654c304b05876f0e3664a091ed",
+ "0a9c24d18391432f84fe2f89f69193b7"
+ ]
+ },
+ "id": "GetrTh37qgDp",
+ "outputId": "c8348041-364f-4670-d9d8-ad5043e2949d"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ec2052c4d20f4273b2fc9d20b9f6cc04",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "README.md: 0%| | 0.00/490 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "140f686ee529400b81876ccd4a37558c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "dpo_fixed.jsonl: 0%| | 0.00/34.1M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "05096318093d4ded8f6aacf0d8baa5a1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Generating train split: 0%| | 0/16000 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "675bd5d813834814b6f9c8273a026e0c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/16000 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# The data must be formatted with appropriate prompt template first.\n",
+ "# See details here: https://github.com/huggingface/trl/blob/main/examples/scripts/orpo.py\n",
+ "\n",
+ "alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
+ "\n",
+ "### Instruction:\n",
+ "{}\n",
+ "\n",
+ "### Input:\n",
+ "{}\n",
+ "\n",
+ "### Response:\n",
+ "{}\"\"\"\n",
+ "\n",
+ "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
+ "\n",
+ "def format_prompt(sample):\n",
+ " instruction = sample[\"instruction\"]\n",
+ " input = sample[\"input\"]\n",
+ " accepted = sample[\"accepted\"]\n",
+ " rejected = sample[\"rejected\"]\n",
+ "\n",
+ " # ORPOTrainer expects prompt/chosen/rejected keys\n",
+ " # See: https://huggingface.co/docs/trl/main/en/orpo_trainer\n",
+ " sample[\"prompt\"] = alpaca_prompt.format(instruction, input, \"\")\n",
+ " sample[\"chosen\"] = accepted + EOS_TOKEN\n",
+ " sample[\"rejected\"] = rejected + EOS_TOKEN\n",
+ " return sample\n",
+ "pass\n",
+ "\n",
+ "from datasets import load_dataset\n",
+ "dataset = load_dataset(\"reciperesearch/dolphin-sft-v0.1-preference\")[\"train\"]\n",
+ "dataset = dataset.map(format_prompt,)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7PH-7Cudu8Ja"
+ },
+ "source": [
+ "Let's print out some examples to see how the dataset should look like"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "oF63zQqNlNJC",
+ "outputId": "32e1edc3-27e5-4028-c231-664f89d86960"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "INSTRUCTION: ==================================================\n",
+ "('Below is an instruction that describes a task, paired with an input that '\n",
+ " 'provides further context. Write a response that appropriately completes the '\n",
+ " 'request.\\n'\n",
+ " '\\n'\n",
+ " '### Instruction:\\n'\n",
+ " 'You are an AI assistant that helps people find information.\\n'\n",
+ " '\\n'\n",
+ " '### Input:\\n'\n",
+ " 'Given the rationale, provide a reasonable question and answer. Step-by-step '\n",
+ " 'reasoning process: Xkcd comics are very popular amongst internet users.\\n'\n",
+ " ' The question and answer:\\n'\n",
+ " '\\n'\n",
+ " '### Response:\\n')\n",
+ "ACCEPTED: ==================================================\n",
+ "('Question: What makes Xkcd comics popular among internet users?\\n'\n",
+ " '\\n'\n",
+ " 'Answer: Xkcd comics are popular among internet users because of their clever '\n",
+ " 'humor, relatable themes, and minimalist art style. They often cover topics '\n",
+ " 'like science, technology, and life experiences, making them appealing to a '\n",
+ " 'broad audience.<|im_end|>')\n",
+ "REJECTED: ==================================================\n",
+ "('Question: What is the reason behind the popularity of Xkcd comics among '\n",
+ " 'internet users?\\n'\n",
+ " '\\n'\n",
+ " 'Answer: Xkcd comics are popular among internet users because they offer a '\n",
+ " 'unique blend of humor, relatable content, and thought-provoking topics that '\n",
+ " 'resonate with a wide range of people. The comics often address everyday '\n",
+ " 'experiences, technology, and social issues, making them accessible and '\n",
+ " 'enjoyable for many individuals. Additionally, the simple and minimalistic '\n",
+ " 'art style of Xkcd comics allows for easy comprehension and sharing, '\n",
+ " 'contributing to their widespread appeal.<|im_end|>')\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pprint\n",
+ "row = dataset[1]\n",
+ "print('INSTRUCTION: ' + '=' * 50)\n",
+ "pprint.pprint(row[\"prompt\"])\n",
+ "print('ACCEPTED: ' + '=' * 50)\n",
+ "pprint.pprint(row[\"chosen\"])\n",
+ "print('REJECTED: ' + '=' * 50)\n",
+ "pprint.pprint(row[\"rejected\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "id": "oqyleKojqgDq"
+ },
+ "outputs": [],
+ "source": [
+ "# Enable reward modelling stats\n",
+ "from unsloth import PatchDPOTrainer\n",
+ "PatchDPOTrainer()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "J1ZlnJpkxIuV"
+ },
+ "source": [
+ "\n",
+ "### Train the model\n",
+ "Now let's use Huggingface TRL's `ORPOTrainer`! More docs here: [TRL ORPO docs](https://huggingface.co/docs/trl/main/en/orpo_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 113,
+ "referenced_widgets": [
+ "8f46cd3d936343aca34b328df6b3dea0",
+ "5cd4555da19a47c8beb1c0c7d782aa10",
+ "850136c0706a448eb98a32181f4f192d",
+ "d4ba51ee7f864afb822a7e4076496ab4",
+ "2dfbe287b29e4f6b9c84f95d751daffa",
+ "e5e50e29698645a1b0bc68d7821d27a7",
+ "3d360b6d73704cf9956bd7279a0af53a",
+ "b88a783124404b9b807b83d6f7023285",
+ "f596811361d34a75b178553520aa3400",
+ "06a7440478ee4c35a58bba8a434d9e69",
+ "81c0046c66224f7582d72c1dbf919d33",
+ "93b2a2bd27dc49d28acc3b6643cfc022",
+ "29bdeef39f4f46ae910080a2b4547686",
+ "98ba361adff74799a8aeecfd313e3beb",
+ "336d3e3f8280423ab2a62e0cdf7631d8",
+ "4a82e48b80de44f29cef8b1724ef85e1",
+ "74363b66c6064709b98b95c4cd69662a",
+ "22dc4471a9b54eefafd7021177608984",
+ "f4f28c070f7241539ec87dd82a701a7e",
+ "ab5f2943fb674f03bb003d11304b3188",
+ "87cc4745ead94aaa8e41f4eed3cb1e59",
+ "6211db271a134ea9b2bd08089faf7d80",
+ "7b9032352b0a4ae4b7bacfd217e29451",
+ "b272574e4eab4313a3ea5f97c1d5c359",
+ "102875abfc5b4e6cab735f75bf200d50",
+ "b1011b504fc145af88d96667cafef7d4",
+ "d9d77e5d7b014d6883b8dafa3a685dc2",
+ "2bdf072fd2b44e0abd3e2d5802a2054b",
+ "f8973d77047c4106987ae37a4c6a97ec",
+ "ad90ff35111d48d78a4a8595947ab8d3",
+ "a2d515ef793b4435b1fe6035de08585b",
+ "9c676e3f4e204349ae41edc8ae770ce3",
+ "ad2b896b04fb42c18839380b8a40d19b"
+ ]
+ },
+ "id": "QtoqUw80QDV0",
+ "outputId": "c175c6df-669b-4429-8f01-82ac1134250c"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8f46cd3d936343aca34b328df6b3dea0",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/16000 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "93b2a2bd27dc49d28acc3b6643cfc022",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/16000 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "7b9032352b0a4ae4b7bacfd217e29451",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/16000 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from trl import ORPOConfig, ORPOTrainer\n",
+ "from unsloth import is_bfloat16_supported\n",
+ "\n",
+ "orpo_trainer = ORPOTrainer(\n",
+ " model = model,\n",
+ " train_dataset = dataset,\n",
+ " tokenizer = tokenizer,\n",
+ " args = ORPOConfig(\n",
+ " max_length = max_seq_length,\n",
+ " max_prompt_length = max_seq_length//2,\n",
+ " max_completion_length = max_seq_length//2,\n",
+ " per_device_train_batch_size = 2,\n",
+ " gradient_accumulation_steps = 4,\n",
+ " beta = 0.1,\n",
+ " logging_steps = 1,\n",
+ " optim = \"adamw_8bit\",\n",
+ " lr_scheduler_type = \"linear\",\n",
+ " max_steps = 30, # Change to num_train_epochs = 1 for full training runs\n",
+ " fp16 = not is_bfloat16_supported(),\n",
+ " bf16 = is_bfloat16_supported(),\n",
+ " output_dir = \"outputs\",\n",
+ " report_to = \"none\", # Use this for WandB etc\n",
+ " ),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "EWGFqAo5Q2me",
+ "outputId": "81dbb67c-9556-4711-f1d2-0d35d56edb58"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 16,000 | Num Epochs = 1\n",
+ "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 8 | Total steps = 30\n",
+ " \"-____-\" Number of trainable parameters = 18,464,768\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ " [30/30 02:36, Epoch 0/1]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step | \n",
+ " Training Loss | \n",
+ " rewards / chosen | \n",
+ " rewards / rejected | \n",
+ " rewards / accuracies | \n",
+ " rewards / margins | \n",
+ " logps / rejected | \n",
+ " logps / chosen | \n",
+ " logits / rejected | \n",
+ " logits / chosen | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 4.112800 | \n",
+ " -0.426227 | \n",
+ " -0.264567 | \n",
+ " 0.125000 | \n",
+ " -0.161660 | \n",
+ " -2.645666 | \n",
+ " -4.262269 | \n",
+ " -1.351560 | \n",
+ " -1.609066 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3.619100 | \n",
+ " -0.389984 | \n",
+ " -0.230814 | \n",
+ " 0.125000 | \n",
+ " -0.159171 | \n",
+ " -2.308137 | \n",
+ " -3.899843 | \n",
+ " -0.607541 | \n",
+ " -0.993858 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4.056700 | \n",
+ " -0.245542 | \n",
+ " -0.217518 | \n",
+ " 0.125000 | \n",
+ " -0.028024 | \n",
+ " -2.175181 | \n",
+ " -2.455418 | \n",
+ " -1.361474 | \n",
+ " -1.343608 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4.118300 | \n",
+ " -0.372123 | \n",
+ " -0.351179 | \n",
+ " 0.375000 | \n",
+ " -0.020944 | \n",
+ " -3.511789 | \n",
+ " -3.721230 | \n",
+ " -0.954310 | \n",
+ " -1.037349 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 3.344800 | \n",
+ " -0.219567 | \n",
+ " -0.185779 | \n",
+ " 0.375000 | \n",
+ " -0.033788 | \n",
+ " -1.857790 | \n",
+ " -2.195669 | \n",
+ " -1.216430 | \n",
+ " -1.192454 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 3.565200 | \n",
+ " -0.346085 | \n",
+ " -0.408853 | \n",
+ " 0.250000 | \n",
+ " 0.062767 | \n",
+ " -4.088528 | \n",
+ " -3.460854 | \n",
+ " -1.281686 | \n",
+ " -1.154220 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 4.106100 | \n",
+ " -0.223531 | \n",
+ " -0.192299 | \n",
+ " 0.375000 | \n",
+ " -0.031232 | \n",
+ " -1.922991 | \n",
+ " -2.235310 | \n",
+ " -1.130482 | \n",
+ " -1.015447 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 3.744200 | \n",
+ " -0.222023 | \n",
+ " -0.158168 | \n",
+ " 0.000000 | \n",
+ " -0.063855 | \n",
+ " -1.581680 | \n",
+ " -2.220229 | \n",
+ " -0.981553 | \n",
+ " -1.063048 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 3.841800 | \n",
+ " -0.294004 | \n",
+ " -0.222455 | \n",
+ " 0.000000 | \n",
+ " -0.071549 | \n",
+ " -2.224555 | \n",
+ " -2.940043 | \n",
+ " -0.610051 | \n",
+ " -0.692850 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 3.574100 | \n",
+ " -0.323491 | \n",
+ " -0.286941 | \n",
+ " 0.000000 | \n",
+ " -0.036549 | \n",
+ " -2.869414 | \n",
+ " -3.234910 | \n",
+ " -1.168018 | \n",
+ " -1.013650 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 3.842800 | \n",
+ " -0.256523 | \n",
+ " -0.172404 | \n",
+ " 0.125000 | \n",
+ " -0.084120 | \n",
+ " -1.724037 | \n",
+ " -2.565235 | \n",
+ " -1.059811 | \n",
+ " -1.065985 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 3.698000 | \n",
+ " -0.347930 | \n",
+ " -0.301730 | \n",
+ " 0.250000 | \n",
+ " -0.046199 | \n",
+ " -3.017303 | \n",
+ " -3.479295 | \n",
+ " -1.261715 | \n",
+ " -0.990827 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 4.154500 | \n",
+ " -0.408001 | \n",
+ " -0.269475 | \n",
+ " 0.000000 | \n",
+ " -0.138527 | \n",
+ " -2.694748 | \n",
+ " -4.080014 | \n",
+ " -1.196285 | \n",
+ " -1.425266 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 3.846700 | \n",
+ " -0.285937 | \n",
+ " -0.185377 | \n",
+ " 0.000000 | \n",
+ " -0.100560 | \n",
+ " -1.853770 | \n",
+ " -2.859370 | \n",
+ " -0.976951 | \n",
+ " -0.978538 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 4.014600 | \n",
+ " -0.405519 | \n",
+ " -0.351493 | \n",
+ " 0.000000 | \n",
+ " -0.054027 | \n",
+ " -3.514926 | \n",
+ " -4.055194 | \n",
+ " -1.438474 | \n",
+ " -1.343131 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 3.691200 | \n",
+ " -0.337413 | \n",
+ " -0.287705 | \n",
+ " 0.250000 | \n",
+ " -0.049708 | \n",
+ " -2.877048 | \n",
+ " -3.374131 | \n",
+ " -1.140527 | \n",
+ " -1.140243 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 3.849800 | \n",
+ " -0.317650 | \n",
+ " -0.249021 | \n",
+ " 0.125000 | \n",
+ " -0.068629 | \n",
+ " -2.490213 | \n",
+ " -3.176501 | \n",
+ " -1.095078 | \n",
+ " -1.301038 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 3.892200 | \n",
+ " -0.322818 | \n",
+ " -0.291411 | \n",
+ " 0.125000 | \n",
+ " -0.031407 | \n",
+ " -2.914110 | \n",
+ " -3.228178 | \n",
+ " -1.081944 | \n",
+ " -0.880424 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 3.942300 | \n",
+ " -0.236923 | \n",
+ " -0.198437 | \n",
+ " 0.375000 | \n",
+ " -0.038486 | \n",
+ " -1.984369 | \n",
+ " -2.369226 | \n",
+ " -1.024021 | \n",
+ " -0.998742 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 3.660500 | \n",
+ " -0.318861 | \n",
+ " -0.304294 | \n",
+ " 0.375000 | \n",
+ " -0.014567 | \n",
+ " -3.042940 | \n",
+ " -3.188613 | \n",
+ " -1.159517 | \n",
+ " -0.961397 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 3.374000 | \n",
+ " -0.183591 | \n",
+ " -0.123356 | \n",
+ " 0.000000 | \n",
+ " -0.060235 | \n",
+ " -1.233564 | \n",
+ " -1.835910 | \n",
+ " -0.954624 | \n",
+ " -0.998104 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 3.527600 | \n",
+ " -0.371039 | \n",
+ " -0.377409 | \n",
+ " 0.250000 | \n",
+ " 0.006370 | \n",
+ " -3.774089 | \n",
+ " -3.710387 | \n",
+ " -1.262056 | \n",
+ " -0.990792 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 3.822700 | \n",
+ " -0.349856 | \n",
+ " -0.288118 | \n",
+ " 0.125000 | \n",
+ " -0.061738 | \n",
+ " -2.881181 | \n",
+ " -3.498561 | \n",
+ " -1.308489 | \n",
+ " -1.409815 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 3.224100 | \n",
+ " -0.239801 | \n",
+ " -0.208166 | \n",
+ " 0.125000 | \n",
+ " -0.031635 | \n",
+ " -2.081656 | \n",
+ " -2.398007 | \n",
+ " -1.129003 | \n",
+ " -1.081420 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 3.929600 | \n",
+ " -0.345307 | \n",
+ " -0.185935 | \n",
+ " 0.000000 | \n",
+ " -0.159372 | \n",
+ " -1.859352 | \n",
+ " -3.453071 | \n",
+ " -1.213040 | \n",
+ " -1.342487 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 3.918800 | \n",
+ " -0.327853 | \n",
+ " -0.260678 | \n",
+ " 0.125000 | \n",
+ " -0.067175 | \n",
+ " -2.606782 | \n",
+ " -3.278529 | \n",
+ " -1.286484 | \n",
+ " -1.394064 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 3.845700 | \n",
+ " -0.343079 | \n",
+ " -0.226485 | \n",
+ " 0.250000 | \n",
+ " -0.116594 | \n",
+ " -2.264853 | \n",
+ " -3.430789 | \n",
+ " -1.334893 | \n",
+ " -1.199522 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 3.372900 | \n",
+ " -0.287159 | \n",
+ " -0.235770 | \n",
+ " 0.000000 | \n",
+ " -0.051389 | \n",
+ " -2.357700 | \n",
+ " -2.871589 | \n",
+ " -1.577852 | \n",
+ " -1.151554 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 3.541100 | \n",
+ " -0.244107 | \n",
+ " -0.199115 | \n",
+ " 0.250000 | \n",
+ " -0.044992 | \n",
+ " -1.991150 | \n",
+ " -2.441066 | \n",
+ " -1.001042 | \n",
+ " -0.955630 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 3.574000 | \n",
+ " -0.267658 | \n",
+ " -0.195255 | \n",
+ " 0.250000 | \n",
+ " -0.072403 | \n",
+ " -1.952554 | \n",
+ " -2.676580 | \n",
+ " -1.218759 | \n",
+ " -1.284432 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "TrainOutput(global_step=30, training_loss=3.7602075894673663, metrics={'train_runtime': 167.4826, 'train_samples_per_second': 1.433, 'train_steps_per_second': 0.179, 'total_flos': 0.0, 'train_loss': 3.7602075894673663, 'epoch': 0.015})"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "orpo_trainer.train()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "FgEvCW76xblp"
+ },
+ "source": [
+ "\n",
+ "### Inference\n",
+ "Let's run the model! You can change the instruction and input - leave the output blank!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "0DJPbbtGxcFJ",
+ "outputId": "69026769-2fd3-4267-d1fe-d05003561d73"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nContinue the fibonnaci sequence.\\n\\n### Input:\\n1, 1, 2, 3, 5, 8\\n\\n### Response:\\nThe next number in the sequence is 13. The sequence is the Fibonacci sequence, where each number is the sum of the two preceding ones. The next number is 5 + 8 = 13.\\n\\nTo determine the next number in the Fibonacci sequence, we need to follow the rule that each number is']"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# alpaca_prompt = Copied from above\n",
+ "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
+ "inputs = tokenizer(\n",
+ "[\n",
+ " alpaca_prompt.format(\n",
+ " \"Continue the fibonnaci sequence.\", # instruction\n",
+ " \"1, 1, 2, 3, 5, 8\", # input\n",
+ " \"\", # output - leave this blank for generation!\n",
+ " )\n",
+ "], return_tensors = \"pt\").to(\"cuda\")\n",
+ "\n",
+ "outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)\n",
+ "tokenizer.batch_decode(outputs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "absTV8M1xzwz"
+ },
+ "source": [
+ " You can also use a `TextStreamer` for continuous inference - so you can see the generation token by token, instead of waiting the whole time!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "xKzWRN1Px0Bg",
+ "outputId": "fcdfb08e-5219-48bf-c7e3-cf41ceeead55"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
+ "\n",
+ "### Instruction:\n",
+ "Continue the fibonnaci sequence.\n",
+ "\n",
+ "### Input:\n",
+ "1, 1, 2, 3, 5, 8\n",
+ "\n",
+ "### Response:\n",
+ "The next number in the sequence is 13. The sequence is the Fibonacci sequence, where each number is the sum of the two preceding ones. The next number is 5 + 8 = 13.\n",
+ "\n",
+ "To determine the next number in the Fibonacci sequence, we need to follow the rule that each number is the sum of the two preceding numbers. The given sequence is:\n",
+ "\n",
+ "1, 1, 2, 3, 5, 8\n",
+ "\n",
+ "Let's identify the pattern:\n",
+ "\n",
+ "- The first number is 1.\n",
+ "- The second number is 1.\n",
+ "- The third number is 1 + 1 = \n"
+ ]
+ }
+ ],
+ "source": [
+ "# alpaca_prompt = Copied from above\n",
+ "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
+ "inputs = tokenizer(\n",
+ "[\n",
+ " alpaca_prompt.format(\n",
+ " \"Continue the fibonnaci sequence.\", # instruction\n",
+ " \"1, 1, 2, 3, 5, 8\", # input\n",
+ " \"\", # output - leave this blank for generation!\n",
+ " )\n",
+ "], return_tensors = \"pt\").to(\"cuda\")\n",
+ "\n",
+ "from transformers import TextStreamer\n",
+ "text_streamer = TextStreamer(tokenizer)\n",
+ "_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Y_3rdZXmx3Hh"
+ },
+ "source": [
+ "\n",
+ "### Saving, loading finetuned models\n",
+ "To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.\n",
+ "\n",
+ "**[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "HJbRqLynx3a8",
+ "outputId": "05228b0d-4490-49f1-cd71-5cfe0f9417c9"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('lora_model/tokenizer_config.json',\n",
+ " 'lora_model/special_tokens_map.json',\n",
+ " 'lora_model/tokenizer.json')"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.save_pretrained(\"lora_model\") # Local saving\n",
+ "tokenizer.save_pretrained(\"lora_model\")\n",
+ "# model.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\n",
+ "# tokenizer.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "mwIRb8DByBGg"
+ },
+ "source": [
+ "Now if you want to load the LoRA adapters we just saved for inference, set `False` to `True`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "UPHJs9wDyBbN",
+ "outputId": "77165645-7402-4347-a613-f0ad90b572d9"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[\"<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nWhat is a famous tall tower in Paris?\\n\\n### Input:\\n\\n\\n### Response:\\nThe Eiffel Tower is a famous tall tower in Paris. It is a wrought iron tower located on the Champ de Mars in Paris, France. The tower is named after the engineer Gustave Eiffel, the main designer, and was built as the entrance to the 1889 World's Fair. The tower\"]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "if False:\n",
+ " from unsloth import FastLanguageModel\n",
+ " model, tokenizer = FastLanguageModel.from_pretrained(\n",
+ " model_name = \"lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
+ " max_seq_length = max_seq_length,\n",
+ " dtype = dtype,\n",
+ " load_in_4bit = load_in_4bit,\n",
+ " )\n",
+ " FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
+ "\n",
+ "# alpaca_prompt = You MUST copy from above!\n",
+ "\n",
+ "inputs = tokenizer(\n",
+ "[\n",
+ " alpaca_prompt.format(\n",
+ " \"What is a famous tall tower in Paris?\", # instruction\n",
+ " \"\", # input\n",
+ " \"\", # output - leave this blank for generation!\n",
+ " )\n",
+ "], return_tensors = \"pt\").to(\"cuda\")\n",
+ "\n",
+ "outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)\n",
+ "tokenizer.batch_decode(outputs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "l10uNsFYyGav"
+ },
+ "source": [
+ "### Saving to float16 for VLLM\n",
+ "\n",
+ "We also support saving to `float16` directly. Select `merged_16bit` for float16 or `merged_4bit` for int4. We also allow `lora` adapters as a fallback. Use `push_to_hub_merged` to upload to your Hugging Face account! You can go to https://huggingface.co/settings/tokens for your personal tokens."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "fZR6n7DsyHu6"
+ },
+ "outputs": [],
+ "source": [
+ "# Merge to 16bit\n",
+ "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n",
+ "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n",
+ "\n",
+ "# Merge to 4bit\n",
+ "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n",
+ "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n",
+ "\n",
+ "# Just LoRA adapters\n",
+ "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"lora\",)\n",
+ "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"lora\", token = \"\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ssjW3ST2yI1L"
+ },
+ "source": [
+ "### GGUF / llama.cpp Conversion\n",
+ "To save to `GGUF` / `llama.cpp`, we support it natively now! We clone `llama.cpp` and we default save it to `q8_0`. We allow all methods like `q4_k_m`. Use `save_pretrained_gguf` for local saving and `push_to_hub_gguf` for uploading to HF.\n",
+ "\n",
+ "Some supported quant methods (full list on our [Wiki page](https://github.com/unslothai/unsloth/wiki#gguf-quantization-options)):\n",
+ "* `q8_0` - Fast conversion. High resource use, but generally acceptable.\n",
+ "* `q4_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K.\n",
+ "* `q5_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "FKVTNAwyyKFR",
+ "outputId": "6561be80-b25d-4aa9-8014-2db0ac38b650"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: Merging 4bit and LoRA weights to 16bit...\n",
+ "Unsloth: Will use up to 6.5 out of 12.67 RAM for saving.\n",
+ "Unsloth: Saving model... This might take 5 minutes ...\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|ββββββββββ| 28/28 [00:00<00:00, 54.17it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model/pytorch_model.bin...\n",
+ "Done.\n",
+ "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n",
+ " \\\\ /| [0] Installing llama.cpp might take 3 minutes.\n",
+ "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits might take 3 minutes.\n",
+ "\\ / [2] Converting GGUF 16bits to ['q8_0'] might take 10 minutes each.\n",
+ " \"-____-\" In total, you will have to wait at least 16 minutes.\n",
+ "\n",
+ "Unsloth: Installing llama.cpp. This might take 3 minutes...\n",
+ "Unsloth: [1] Converting model at model into q8_0 GGUF format.\n",
+ "The output location will be /content/model/unsloth.Q8_0.gguf\n",
+ "This might take 3 minutes...\n",
+ "INFO:hf-to-gguf:Loading model: model\n",
+ "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
+ "INFO:hf-to-gguf:Exporting model...\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model.bin'\n",
+ "INFO:hf-to-gguf:token_embd.weight, torch.float16 --> Q8_0, shape = {1536, 151936}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.float16 --> Q8_0, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.float16 --> Q8_0, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.float16 --> Q8_0, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.float16 --> Q8_0, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:output_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:Set meta model\n",
+ "INFO:hf-to-gguf:Set model parameters\n",
+ "INFO:hf-to-gguf:gguf: context length = 4096\n",
+ "INFO:hf-to-gguf:gguf: embedding length = 1536\n",
+ "INFO:hf-to-gguf:gguf: feed forward length = 8960\n",
+ "INFO:hf-to-gguf:gguf: head count = 12\n",
+ "INFO:hf-to-gguf:gguf: key-value head count = 2\n",
+ "INFO:hf-to-gguf:gguf: rope theta = 10000.0\n",
+ "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-06\n",
+ "INFO:hf-to-gguf:gguf: file type = 7\n",
+ "INFO:hf-to-gguf:Set model tokenizer\n",
+ "INFO:numexpr.utils:NumExpr defaulting to 2 threads.\n",
+ "2025-01-15 03:52:04.014137: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+ "2025-01-15 03:52:04.040222: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+ "2025-01-15 03:52:04.048249: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+ "2025-01-15 03:52:05.924447: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
+ "INFO:gguf.vocab:Adding 151387 merge(s).\n",
+ "INFO:gguf.vocab:Setting special token type eos to 151645\n",
+ "INFO:gguf.vocab:Setting special token type pad to 151665\n",
+ "INFO:gguf.vocab:Setting special token type bos to 151643\n",
+ "INFO:gguf.vocab:Setting add_bos_token to False\n",
+ "INFO:gguf.vocab:Setting chat_template to {%- if tools %}\n",
+ " {{- '<|im_start|>system\\n' }}\n",
+ " {%- if messages[0]['role'] == 'system' %}\n",
+ " {{- messages[0]['content'] }}\n",
+ " {%- else %}\n",
+ " {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n",
+ " {%- endif %}\n",
+ " {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n",
+ " {%- for tool in tools %}\n",
+ " {{- \"\\n\" }}\n",
+ " {{- tool | tojson }}\n",
+ " {%- endfor %}\n",
+ " {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n",
+ "{%- else %}\n",
+ " {%- if messages[0]['role'] == 'system' %}\n",
+ " {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n",
+ " {%- else %}\n",
+ " {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n",
+ " {%- endif %}\n",
+ "{%- endif %}\n",
+ "{%- for message in messages %}\n",
+ " {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n",
+ " {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n",
+ " {%- elif message.role == \"assistant\" %}\n",
+ " {{- '<|im_start|>' + message.role }}\n",
+ " {%- if message.content %}\n",
+ " {{- '\\n' + message.content }}\n",
+ " {%- endif %}\n",
+ " {%- for tool_call in message.tool_calls %}\n",
+ " {%- if tool_call.function is defined %}\n",
+ " {%- set tool_call = tool_call.function %}\n",
+ " {%- endif %}\n",
+ " {{- '\\n\\n{\"name\": \"' }}\n",
+ " {{- tool_call.name }}\n",
+ " {{- '\", \"arguments\": ' }}\n",
+ " {{- tool_call.arguments | tojson }}\n",
+ " {{- '}\\n' }}\n",
+ " {%- endfor %}\n",
+ " {{- '<|im_end|>\\n' }}\n",
+ " {%- elif message.role == \"tool\" %}\n",
+ " {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n",
+ " {{- '<|im_start|>user' }}\n",
+ " {%- endif %}\n",
+ " {{- '\\n\\n' }}\n",
+ " {{- message.content }}\n",
+ " {{- '\\n' }}\n",
+ " {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n",
+ " {{- '<|im_end|>\\n' }}\n",
+ " {%- endif %}\n",
+ " {%- endif %}\n",
+ "{%- endfor %}\n",
+ "{%- if add_generation_prompt %}\n",
+ " {{- '<|im_start|>assistant\\n' }}\n",
+ "{%- endif %}\n",
+ "\n",
+ "INFO:hf-to-gguf:Set model quantization version\n",
+ "INFO:gguf.gguf_writer:Writing the following files:\n",
+ "INFO:gguf.gguf_writer:/content/model/unsloth.Q8_0.gguf: n_tensors = 338, total_size = 1.6G\n",
+ "Writing: 100%|ββββββββββ| 1.64G/1.64G [00:36<00:00, 44.6Mbyte/s]\n",
+ "INFO:hf-to-gguf:Model successfully exported to /content/model/unsloth.Q8_0.gguf\n",
+ "Unsloth: Conversion completed! Output location: /content/model/unsloth.Q8_0.gguf\n",
+ "Unsloth: Merging 4bit and LoRA weights to 16bit...\n",
+ "Unsloth: Will use up to 6.47 out of 12.67 RAM for saving.\n",
+ "Unsloth: Saving model... This might take 5 minutes ...\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|ββββββββββ| 28/28 [00:00<00:00, 55.92it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/pytorch_model.bin...\n",
+ "Done.\n",
+ "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n",
+ " \\\\ /| [0] Installing llama.cpp might take 3 minutes.\n",
+ "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits might take 3 minutes.\n",
+ "\\ / [2] Converting GGUF 16bits to ['q4_k_m'] might take 10 minutes each.\n",
+ " \"-____-\" In total, you will have to wait at least 16 minutes.\n",
+ "\n",
+ "Unsloth: Installing llama.cpp. This might take 3 minutes...\n",
+ "Unsloth: [1] Converting model at Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m into f16 GGUF format.\n",
+ "The output location will be /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf\n",
+ "This might take 3 minutes...\n",
+ "INFO:hf-to-gguf:Loading model: Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m\n",
+ "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
+ "INFO:hf-to-gguf:Exporting model...\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model.bin'\n",
+ "INFO:hf-to-gguf:token_embd.weight, torch.float16 --> F16, shape = {1536, 151936}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_q.bias, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_k.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_v.bias, torch.float16 --> F32, shape = {256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.float16 --> F16, shape = {1536, 256}\n",
+ "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.float16 --> F16, shape = {1536, 1536}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.float16 --> F16, shape = {1536, 8960}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.float16 --> F16, shape = {8960, 1536}\n",
+ "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:output_norm.weight, torch.float16 --> F32, shape = {1536}\n",
+ "INFO:hf-to-gguf:Set meta model\n",
+ "INFO:hf-to-gguf:Set model parameters\n",
+ "INFO:hf-to-gguf:gguf: context length = 4096\n",
+ "INFO:hf-to-gguf:gguf: embedding length = 1536\n",
+ "INFO:hf-to-gguf:gguf: feed forward length = 8960\n",
+ "INFO:hf-to-gguf:gguf: head count = 12\n",
+ "INFO:hf-to-gguf:gguf: key-value head count = 2\n",
+ "INFO:hf-to-gguf:gguf: rope theta = 10000.0\n",
+ "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-06\n",
+ "INFO:hf-to-gguf:gguf: file type = 1\n",
+ "INFO:hf-to-gguf:Set model tokenizer\n",
+ "INFO:numexpr.utils:NumExpr defaulting to 2 threads.\n",
+ "2025-01-15 03:53:24.715092: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+ "2025-01-15 03:53:24.741806: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+ "2025-01-15 03:53:24.752070: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+ "2025-01-15 03:53:26.568906: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
+ "INFO:gguf.vocab:Adding 151387 merge(s).\n",
+ "INFO:gguf.vocab:Setting special token type eos to 151645\n",
+ "INFO:gguf.vocab:Setting special token type pad to 151665\n",
+ "INFO:gguf.vocab:Setting special token type bos to 151643\n",
+ "INFO:gguf.vocab:Setting add_bos_token to False\n",
+ "INFO:gguf.vocab:Setting chat_template to {%- if tools %}\n",
+ " {{- '<|im_start|>system\\n' }}\n",
+ " {%- if messages[0]['role'] == 'system' %}\n",
+ " {{- messages[0]['content'] }}\n",
+ " {%- else %}\n",
+ " {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n",
+ " {%- endif %}\n",
+ " {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n",
+ " {%- for tool in tools %}\n",
+ " {{- \"\\n\" }}\n",
+ " {{- tool | tojson }}\n",
+ " {%- endfor %}\n",
+ " {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n",
+ "{%- else %}\n",
+ " {%- if messages[0]['role'] == 'system' %}\n",
+ " {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n",
+ " {%- else %}\n",
+ " {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n",
+ " {%- endif %}\n",
+ "{%- endif %}\n",
+ "{%- for message in messages %}\n",
+ " {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n",
+ " {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n",
+ " {%- elif message.role == \"assistant\" %}\n",
+ " {{- '<|im_start|>' + message.role }}\n",
+ " {%- if message.content %}\n",
+ " {{- '\\n' + message.content }}\n",
+ " {%- endif %}\n",
+ " {%- for tool_call in message.tool_calls %}\n",
+ " {%- if tool_call.function is defined %}\n",
+ " {%- set tool_call = tool_call.function %}\n",
+ " {%- endif %}\n",
+ " {{- '\\n\\n{\"name\": \"' }}\n",
+ " {{- tool_call.name }}\n",
+ " {{- '\", \"arguments\": ' }}\n",
+ " {{- tool_call.arguments | tojson }}\n",
+ " {{- '}\\n' }}\n",
+ " {%- endfor %}\n",
+ " {{- '<|im_end|>\\n' }}\n",
+ " {%- elif message.role == \"tool\" %}\n",
+ " {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n",
+ " {{- '<|im_start|>user' }}\n",
+ " {%- endif %}\n",
+ " {{- '\\n\\n' }}\n",
+ " {{- message.content }}\n",
+ " {{- '\\n' }}\n",
+ " {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n",
+ " {{- '<|im_end|>\\n' }}\n",
+ " {%- endif %}\n",
+ " {%- endif %}\n",
+ "{%- endfor %}\n",
+ "{%- if add_generation_prompt %}\n",
+ " {{- '<|im_start|>assistant\\n' }}\n",
+ "{%- endif %}\n",
+ "\n",
+ "INFO:hf-to-gguf:Set model quantization version\n",
+ "INFO:gguf.gguf_writer:Writing the following files:\n",
+ "INFO:gguf.gguf_writer:/content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf: n_tensors = 338, total_size = 3.1G\n",
+ "Writing: 100%|ββββββββββ| 3.09G/3.09G [00:34<00:00, 89.6Mbyte/s]\n",
+ "INFO:hf-to-gguf:Model successfully exported to /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf\n",
+ "Unsloth: Conversion completed! Output location: /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf\n",
+ "Unsloth: [2] Converting GGUF 16bit into q4_k_m. This might take 20 minutes...\n",
+ "main: build = 4485 (f446c2cf)\n",
+ "main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n",
+ "main: quantizing '/content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf' to '/content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.Q4_K_M.gguf' as Q4_K_M using 4 threads\n",
+ "llama_model_loader: loaded meta data with 27 key-value pairs and 338 tensors from /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.F16.gguf (version GGUF V3 (latest))\n",
+ "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
+ "llama_model_loader: - kv 0: general.architecture str = qwen2\n",
+ "llama_model_loader: - kv 1: general.type str = model\n",
+ "llama_model_loader: - kv 2: general.name str = Qwen2.5 Math 1.5b Instruct Bnb 4bit\n",
+ "llama_model_loader: - kv 3: general.organization str = Unsloth\n",
+ "llama_model_loader: - kv 4: general.finetune str = instruct-bnb-4bit\n",
+ "llama_model_loader: - kv 5: general.basename str = qwen2.5-math\n",
+ "llama_model_loader: - kv 6: general.size_label str = 1.5B\n",
+ "llama_model_loader: - kv 7: qwen2.block_count u32 = 28\n",
+ "llama_model_loader: - kv 8: qwen2.context_length u32 = 4096\n",
+ "llama_model_loader: - kv 9: qwen2.embedding_length u32 = 1536\n",
+ "llama_model_loader: - kv 10: qwen2.feed_forward_length u32 = 8960\n",
+ "llama_model_loader: - kv 11: qwen2.attention.head_count u32 = 12\n",
+ "llama_model_loader: - kv 12: qwen2.attention.head_count_kv u32 = 2\n",
+ "llama_model_loader: - kv 13: qwen2.rope.freq_base f32 = 10000.000000\n",
+ "llama_model_loader: - kv 14: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001\n",
+ "llama_model_loader: - kv 15: general.file_type u32 = 1\n",
+ "llama_model_loader: - kv 16: tokenizer.ggml.model str = gpt2\n",
+ "llama_model_loader: - kv 17: tokenizer.ggml.pre str = qwen2\n",
+ "llama_model_loader: - kv 18: tokenizer.ggml.tokens arr[str,151936] = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n",
+ "llama_model_loader: - kv 19: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n",
+ "llama_model_loader: - kv 20: tokenizer.ggml.merges arr[str,151387] = [\"Δ Δ \", \"Δ Δ Δ Δ \", \"i n\", \"Δ t\",...\n",
+ "llama_model_loader: - kv 21: tokenizer.ggml.eos_token_id u32 = 151645\n",
+ "llama_model_loader: - kv 22: tokenizer.ggml.padding_token_id u32 = 151665\n",
+ "llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 151643\n",
+ "llama_model_loader: - kv 24: tokenizer.ggml.add_bos_token bool = false\n",
+ "llama_model_loader: - kv 25: tokenizer.chat_template str = {%- if tools %}\\n {{- '<|im_start|>...\n",
+ "llama_model_loader: - kv 26: general.quantization_version u32 = 2\n",
+ "llama_model_loader: - type f32: 141 tensors\n",
+ "llama_model_loader: - type f16: 197 tensors\n",
+ "[ 1/ 338] output_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 2/ 338] token_embd.weight - [ 1536, 151936, 1, 1], type = f16, converting to q6_K .. size = 445.12 MiB -> 182.57 MiB\n",
+ "[ 3/ 338] blk.0.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 4/ 338] blk.0.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 5/ 338] blk.0.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 6/ 338] blk.0.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 7/ 338] blk.0.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 8/ 338] blk.0.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 9/ 338] blk.0.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 10/ 338] blk.0.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 11/ 338] blk.0.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 12/ 338] blk.0.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 13/ 338] blk.0.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 14/ 338] blk.0.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 15/ 338] blk.1.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 16/ 338] blk.1.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 17/ 338] blk.1.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 18/ 338] blk.1.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 19/ 338] blk.1.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 20/ 338] blk.1.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 21/ 338] blk.1.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 22/ 338] blk.1.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 23/ 338] blk.1.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 24/ 338] blk.1.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 25/ 338] blk.1.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 26/ 338] blk.1.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 27/ 338] blk.2.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 28/ 338] blk.2.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 29/ 338] blk.2.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 30/ 338] blk.2.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 31/ 338] blk.2.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 32/ 338] blk.2.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 33/ 338] blk.2.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 34/ 338] blk.2.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 35/ 338] blk.2.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 36/ 338] blk.2.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 37/ 338] blk.2.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 38/ 338] blk.2.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 39/ 338] blk.3.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 40/ 338] blk.3.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 41/ 338] blk.3.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 42/ 338] blk.3.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 43/ 338] blk.3.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 44/ 338] blk.3.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 45/ 338] blk.3.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 46/ 338] blk.3.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 47/ 338] blk.3.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 48/ 338] blk.3.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 49/ 338] blk.3.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 50/ 338] blk.3.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 51/ 338] blk.4.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 52/ 338] blk.4.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 53/ 338] blk.4.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 54/ 338] blk.4.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 55/ 338] blk.4.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 56/ 338] blk.4.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 57/ 338] blk.4.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 58/ 338] blk.4.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 59/ 338] blk.4.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 60/ 338] blk.4.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 61/ 338] blk.4.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 62/ 338] blk.4.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 63/ 338] blk.5.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 64/ 338] blk.5.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 65/ 338] blk.5.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 66/ 338] blk.5.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 67/ 338] blk.5.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 68/ 338] blk.5.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 69/ 338] blk.5.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 70/ 338] blk.5.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 71/ 338] blk.5.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 72/ 338] blk.5.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 73/ 338] blk.5.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 74/ 338] blk.5.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 75/ 338] blk.6.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 76/ 338] blk.6.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 77/ 338] blk.6.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 78/ 338] blk.6.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 79/ 338] blk.6.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 80/ 338] blk.6.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 81/ 338] blk.6.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 82/ 338] blk.6.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 83/ 338] blk.6.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 84/ 338] blk.6.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 85/ 338] blk.6.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 86/ 338] blk.6.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 87/ 338] blk.7.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 88/ 338] blk.7.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 89/ 338] blk.7.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 90/ 338] blk.7.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 91/ 338] blk.7.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 92/ 338] blk.7.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 93/ 338] blk.7.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 94/ 338] blk.7.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 95/ 338] blk.7.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 96/ 338] blk.7.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 97/ 338] blk.7.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 98/ 338] blk.7.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 99/ 338] blk.8.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 100/ 338] blk.8.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 101/ 338] blk.8.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 102/ 338] blk.8.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 103/ 338] blk.8.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 104/ 338] blk.8.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 105/ 338] blk.8.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 106/ 338] blk.8.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 107/ 338] blk.8.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 108/ 338] blk.8.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 109/ 338] blk.8.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 110/ 338] blk.8.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 111/ 338] blk.9.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 112/ 338] blk.9.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 113/ 338] blk.9.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 114/ 338] blk.9.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 115/ 338] blk.9.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 116/ 338] blk.9.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 117/ 338] blk.9.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 118/ 338] blk.9.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 119/ 338] blk.9.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 120/ 338] blk.9.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 121/ 338] blk.9.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 122/ 338] blk.9.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 123/ 338] blk.10.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 124/ 338] blk.10.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 125/ 338] blk.10.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 126/ 338] blk.10.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 127/ 338] blk.10.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 128/ 338] blk.10.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 129/ 338] blk.10.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 130/ 338] blk.10.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 131/ 338] blk.10.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 132/ 338] blk.10.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 133/ 338] blk.10.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 134/ 338] blk.10.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 135/ 338] blk.11.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 136/ 338] blk.11.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 137/ 338] blk.11.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 138/ 338] blk.11.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 139/ 338] blk.11.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 140/ 338] blk.11.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 141/ 338] blk.11.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 142/ 338] blk.11.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 143/ 338] blk.11.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 144/ 338] blk.11.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 145/ 338] blk.11.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 146/ 338] blk.11.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 147/ 338] blk.12.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 148/ 338] blk.12.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 149/ 338] blk.12.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 150/ 338] blk.12.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 151/ 338] blk.12.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 152/ 338] blk.12.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 153/ 338] blk.12.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 154/ 338] blk.12.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 155/ 338] blk.12.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 156/ 338] blk.12.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 157/ 338] blk.12.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 158/ 338] blk.12.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 159/ 338] blk.13.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 160/ 338] blk.13.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 161/ 338] blk.13.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 162/ 338] blk.13.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 163/ 338] blk.13.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 164/ 338] blk.13.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 165/ 338] blk.13.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 166/ 338] blk.13.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 167/ 338] blk.13.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 168/ 338] blk.13.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 169/ 338] blk.13.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 170/ 338] blk.13.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 171/ 338] blk.14.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 172/ 338] blk.14.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 173/ 338] blk.14.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 174/ 338] blk.14.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 175/ 338] blk.14.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 176/ 338] blk.14.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 177/ 338] blk.14.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 178/ 338] blk.14.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 179/ 338] blk.14.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 180/ 338] blk.14.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 181/ 338] blk.14.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 182/ 338] blk.14.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 183/ 338] blk.15.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 184/ 338] blk.15.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 185/ 338] blk.15.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 186/ 338] blk.15.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 187/ 338] blk.15.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 188/ 338] blk.15.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 189/ 338] blk.15.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 190/ 338] blk.15.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 191/ 338] blk.15.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 192/ 338] blk.15.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 193/ 338] blk.15.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 194/ 338] blk.15.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 195/ 338] blk.16.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 196/ 338] blk.16.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 197/ 338] blk.16.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 198/ 338] blk.16.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 199/ 338] blk.16.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 200/ 338] blk.16.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 201/ 338] blk.16.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 202/ 338] blk.16.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 203/ 338] blk.16.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 204/ 338] blk.16.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 205/ 338] blk.16.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 206/ 338] blk.16.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 207/ 338] blk.17.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 208/ 338] blk.17.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 209/ 338] blk.17.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 210/ 338] blk.17.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 211/ 338] blk.17.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 212/ 338] blk.17.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 213/ 338] blk.17.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 214/ 338] blk.17.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 215/ 338] blk.17.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 216/ 338] blk.17.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 217/ 338] blk.17.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 218/ 338] blk.17.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 219/ 338] blk.18.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 220/ 338] blk.18.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 221/ 338] blk.18.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 222/ 338] blk.18.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 223/ 338] blk.18.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 224/ 338] blk.18.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 225/ 338] blk.18.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 226/ 338] blk.18.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 227/ 338] blk.18.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 228/ 338] blk.18.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 229/ 338] blk.18.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 230/ 338] blk.18.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 231/ 338] blk.19.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 232/ 338] blk.19.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 233/ 338] blk.19.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 234/ 338] blk.19.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 235/ 338] blk.19.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 236/ 338] blk.19.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 237/ 338] blk.19.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 238/ 338] blk.19.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 239/ 338] blk.19.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 240/ 338] blk.19.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 241/ 338] blk.19.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 242/ 338] blk.19.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 243/ 338] blk.20.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 244/ 338] blk.20.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 245/ 338] blk.20.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 246/ 338] blk.20.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 247/ 338] blk.20.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 248/ 338] blk.20.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 249/ 338] blk.20.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 250/ 338] blk.20.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 251/ 338] blk.20.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 252/ 338] blk.20.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 253/ 338] blk.20.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 254/ 338] blk.20.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 255/ 338] blk.21.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 256/ 338] blk.21.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 257/ 338] blk.21.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 258/ 338] blk.21.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 259/ 338] blk.21.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 260/ 338] blk.21.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 261/ 338] blk.21.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 262/ 338] blk.21.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 263/ 338] blk.21.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 264/ 338] blk.21.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 265/ 338] blk.21.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 266/ 338] blk.21.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 267/ 338] blk.22.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 268/ 338] blk.22.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 269/ 338] blk.22.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 270/ 338] blk.22.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 271/ 338] blk.22.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 272/ 338] blk.22.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 273/ 338] blk.22.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 274/ 338] blk.22.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 275/ 338] blk.22.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 276/ 338] blk.22.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 277/ 338] blk.22.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 278/ 338] blk.22.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 279/ 338] blk.23.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 280/ 338] blk.23.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 281/ 338] blk.23.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 282/ 338] blk.23.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 283/ 338] blk.23.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 284/ 338] blk.23.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 285/ 338] blk.23.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 286/ 338] blk.23.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 287/ 338] blk.23.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 288/ 338] blk.23.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 289/ 338] blk.23.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 290/ 338] blk.23.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 291/ 338] blk.24.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 292/ 338] blk.24.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 293/ 338] blk.24.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 294/ 338] blk.24.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 295/ 338] blk.24.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 296/ 338] blk.24.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 297/ 338] blk.24.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 298/ 338] blk.24.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 299/ 338] blk.24.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 300/ 338] blk.24.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 301/ 338] blk.24.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 302/ 338] blk.24.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 303/ 338] blk.25.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 304/ 338] blk.25.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 305/ 338] blk.25.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 306/ 338] blk.25.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 307/ 338] blk.25.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 308/ 338] blk.25.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 309/ 338] blk.25.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 310/ 338] blk.25.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 311/ 338] blk.25.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 312/ 338] blk.25.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 313/ 338] blk.25.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 314/ 338] blk.25.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 315/ 338] blk.26.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 316/ 338] blk.26.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 317/ 338] blk.26.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 318/ 338] blk.26.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 319/ 338] blk.26.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 320/ 338] blk.26.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 321/ 338] blk.26.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 322/ 338] blk.26.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 323/ 338] blk.26.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 324/ 338] blk.26.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 325/ 338] blk.26.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 326/ 338] blk.26.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 327/ 338] blk.27.attn_k.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 328/ 338] blk.27.attn_k.weight - [ 1536, 256, 1, 1], type = f16, converting to q4_K .. size = 0.75 MiB -> 0.21 MiB\n",
+ "[ 329/ 338] blk.27.attn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 330/ 338] blk.27.attn_output.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 331/ 338] blk.27.attn_q.bias - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 332/ 338] blk.27.attn_q.weight - [ 1536, 1536, 1, 1], type = f16, converting to q4_K .. size = 4.50 MiB -> 1.27 MiB\n",
+ "[ 333/ 338] blk.27.attn_v.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB\n",
+ "[ 334/ 338] blk.27.attn_v.weight - [ 1536, 256, 1, 1], type = f16, converting to q6_K .. size = 0.75 MiB -> 0.31 MiB\n",
+ "[ 335/ 338] blk.27.ffn_down.weight - [ 8960, 1536, 1, 1], type = f16, converting to q6_K .. size = 26.25 MiB -> 10.77 MiB\n",
+ "[ 336/ 338] blk.27.ffn_gate.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "[ 337/ 338] blk.27.ffn_norm.weight - [ 1536, 1, 1, 1], type = f32, size = 0.006 MB\n",
+ "[ 338/ 338] blk.27.ffn_up.weight - [ 1536, 8960, 1, 1], type = f16, converting to q4_K .. size = 26.25 MiB -> 7.38 MiB\n",
+ "llama_model_quantize_impl: model size = 2944.68 MB\n",
+ "llama_model_quantize_impl: quant size = 934.69 MB\n",
+ "\n",
+ "main: quantize time = 160895.28 ms\n",
+ "main: total time = 160895.28 ms\n",
+ "Unsloth: Conversion completed! Output location: /content/Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m/unsloth.Q4_K_M.gguf\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Save to 8bit Q8_0\n",
+ "if True: model.save_pretrained_gguf(\"model\", tokenizer,)\n",
+ "if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, token = \"\")\n",
+ "\n",
+ "# Save to 16bit GGUF\n",
+ "if False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"f16\")\n",
+ "if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"f16\", token = \"\")\n",
+ "\n",
+ "# Save to q4_k_m GGUF\n",
+ "if True: model.save_pretrained_gguf(\"Qwen2.5-Math-1.5B-Instruc-ORPO-q4_k_m\", tokenizer, quantization_method = \"q4_k_m\")\n",
+ "if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"q4_k_m\", token = \"\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "TL-PCYb1yMzx"
+ },
+ "source": [
+ "Now, use the `model-unsloth.gguf` file or `model-unsloth-Q4_K_M.gguf` file in `llama.cpp` or a UI based system like `GPT4All`. You can install GPT4All by going [here](https://gpt4all.io/index.html)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "XOFzC441vCtq"
+ },
+ "source": [
+ "\n",
+ "### Ollama Support\n",
+ "\n",
+ "[Unsloth](https://github.com/unslothai/unsloth) now allows you to automatically finetune and create a [Modelfile](https://github.com/ollama/ollama/blob/main/docs/modelfile.md), and export to [Ollama](https://ollama.com/)! This makes finetuning much easier and provides a seamless workflow from `Unsloth` to `Ollama`!\n",
+ "\n",
+ "Let's first install `Ollama`!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "NUxcyP_UfeLl",
+ "outputId": "52262c8e-341b-4dce-c967-cddc46aa659d"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ">>> Installing ollama to /usr/local\n",
+ ">>> Downloading Linux amd64 bundle\n",
+ "############################################################################################# 100.0%\n",
+ ">>> Creating ollama user...\n",
+ ">>> Adding ollama user to video group...\n",
+ ">>> Adding current user to ollama group...\n",
+ ">>> Creating ollama systemd service...\n",
+ "\u001b[1m\u001b[31mWARNING:\u001b[m systemd is not running\n",
+ "\u001b[1m\u001b[31mWARNING:\u001b[m Unable to detect NVIDIA/AMD GPU. Install lspci or lshw to automatically detect and install GPU dependencies.\n",
+ ">>> The Ollama API is now available at 127.0.0.1:11434.\n",
+ ">>> Install complete. Run \"ollama\" from the command line.\n"
+ ]
+ }
+ ],
+ "source": [
+ "!curl -fsSL https://ollama.com/install.sh | sh"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "id": "mcP9omF_tN7Q"
+ },
+ "outputs": [],
+ "source": [
+ "import subprocess\n",
+ "\n",
+ "subprocess.Popen([\"ollama\", \"serve\"])\n",
+ "import time\n",
+ "\n",
+ "time.sleep(3) # Wait for a few seconds for Ollama to load!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "md3PExRLRhOc"
+ },
+ "source": [
+ "`Ollama` needs a `Modelfile`, which specifies the model's prompt format. Let's print Unsloth's auto generated one:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 263
+ },
+ "id": "h82vfNigRhiz",
+ "outputId": "569d817b-a804-4ee8-fbf3-511a94afbdb6"
+ },
+ "outputs": [
+ {
+ "ename": "AttributeError",
+ "evalue": "Qwen2TokenizerFast has no attribute _ollama_modelfile",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ollama_modelfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1103\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dict__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1104\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{self.__class__.__name__} has no attribute {key}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1105\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1106\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mAttributeError\u001b[0m: Qwen2TokenizerFast has no attribute _ollama_modelfile"
+ ]
+ }
+ ],
+ "source": [
+ "print(tokenizer._ollama_modelfile)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "j6cipBJBudxv"
+ },
+ "source": [
+ "We now will create an `Ollama` model called `unsloth_model` using the `Modelfile` which we auto generated!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "SDTUJv_QiaVh",
+ "outputId": "ab427885-22c0-4786-eaf5-d76e97d765a4"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Error: specified Modelfile wasn't found\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ollama create unsloth_model -f ./model/Modelfile"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-KSoKTKQukba"
+ },
+ "source": [
+ "And now we can do inference on it via `Ollama`!\n",
+ "\n",
+ "You can also upload to `Ollama` and try the `Ollama` Desktop app by heading to https://www.ollama.com/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "rkp0uMrNpYaW",
+ "outputId": "38bb3bd7-4a29-4c81-e319-388dcd96a449"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:04.241326628Z\",\"message\":{\"role\":\"assistant\",\"content\":\"The\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:04.465575479Z\",\"message\":{\"role\":\"assistant\",\"content\":\" next\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:04.760101468Z\",\"message\":{\"role\":\"assistant\",\"content\":\" number\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.051240606Z\",\"message\":{\"role\":\"assistant\",\"content\":\" in\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.376545126Z\",\"message\":{\"role\":\"assistant\",\"content\":\" the\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.515751946Z\",\"message\":{\"role\":\"assistant\",\"content\":\" Fibonacci\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.658721744Z\",\"message\":{\"role\":\"assistant\",\"content\":\" sequence\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.795226527Z\",\"message\":{\"role\":\"assistant\",\"content\":\" after\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:05.923676364Z\",\"message\":{\"role\":\"assistant\",\"content\":\" \"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.053599585Z\",\"message\":{\"role\":\"assistant\",\"content\":\"8\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.187220374Z\",\"message\":{\"role\":\"assistant\",\"content\":\" is\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.316237671Z\",\"message\":{\"role\":\"assistant\",\"content\":\" \"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.448901764Z\",\"message\":{\"role\":\"assistant\",\"content\":\"13\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.585864644Z\",\"message\":{\"role\":\"assistant\",\"content\":\" (\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.712030586Z\",\"message\":{\"role\":\"assistant\",\"content\":\"the\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.835728964Z\",\"message\":{\"role\":\"assistant\",\"content\":\" sum\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:06.962898827Z\",\"message\":{\"role\":\"assistant\",\"content\":\" of\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.088064406Z\",\"message\":{\"role\":\"assistant\",\"content\":\" the\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.212942126Z\",\"message\":{\"role\":\"assistant\",\"content\":\" previous\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.336569966Z\",\"message\":{\"role\":\"assistant\",\"content\":\" two\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.46094096Z\",\"message\":{\"role\":\"assistant\",\"content\":\" numbers\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.593857726Z\",\"message\":{\"role\":\"assistant\",\"content\":\").\"},\"done\":false}\n",
+ "{\"model\":\"unsloth_model\",\"created_at\":\"2024-10-01T06:47:07.741203726Z\",\"message\":{\"role\":\"assistant\",\"content\":\"\"},\"done_reason\":\"stop\",\"done\":true,\"total_duration\":3741960321,\"load_duration\":48967410,\"prompt_eval_count\":47,\"prompt_eval_duration\":150430000,\"eval_count\":23,\"eval_duration\":3499634000}\n"
+ ]
+ }
+ ],
+ "source": [
+ "!curl http://localhost:11434/api/chat -d '{ \\\n",
+ " \"model\": \"unsloth_model\", \\\n",
+ " \"messages\": [ \\\n",
+ " { \"role\": \"user\", \"content\": \"Continue the Fibonacci sequence: 1, 1, 2, 3, 5, 8,\" } \\\n",
+ " ] \\\n",
+ " }'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "XnMbhp7KsKhr"
+ },
+ "source": [
+ "# ChatGPT interactive mode\n",
+ "\n",
+ "### β To run the finetuned model like in a ChatGPT style interface, first click the **| >_ |** button.\n",
+ "\n",
+ "\n",
+ "---\n",
+ "---\n",
+ "---\n",
+ "\n",
+ "### β Then, type `ollama run unsloth_model`\n",
+ "\n",
+ "\n",
+ "\n",
+ "---\n",
+ "---\n",
+ "---\n",
+ "### β And you have a ChatGPT style assistant!\n",
+ "\n",
+ "### Type any question you like and press `ENTER`. If you want to exit, hit `CTRL + D`\n",
+ "You can also use the `model-unsloth.gguf` file or `model-unsloth-Q4_K_M.gguf` file in llama.cpp or a UI based system like Jan or Open WebUI. You can install Jan [here](https://github.com/janhq/jan) and Open WebUI [here](https://github.com/open-webui/open-webui)\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "00936bda447f46e5b1439913aeb730de": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "05096318093d4ded8f6aacf0d8baa5a1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_8e10bfda957d412592a2b5a7a33c81aa",
+ "IPY_MODEL_2157c5a847d24a9c88fe0a657fa4a7d5",
+ "IPY_MODEL_df90e41a4fc241c1ad61506089b59e83"
+ ],
+ "layout": "IPY_MODEL_7ec180f6335341d594e0a726c48777a3"
+ }
+ },
+ "0576ab6d4cce4cb09f0755a230d50980": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "061df7aa5cd44f8481ec3af743ffc442": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "06a7440478ee4c35a58bba8a434d9e69": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "075bc926c06742d399070029d705173f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4543871a84b646f286bd601896d5d8cc",
+ "max": 632,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_07bac781635f4da69b591e7579359c64",
+ "value": 632
+ }
+ },
+ "078aba59954f45d5bae22aacaa9a5a8b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "07bac781635f4da69b591e7579359c64": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "0a9c24d18391432f84fe2f89f69193b7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0aa69e1780cf4537ada828298340f744": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ce3e19e662e94dec89ee565ed892d571",
+ "max": 7522,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3fa8e45950cd4a28b7314b6545f9bbc8",
+ "value": 7522
+ }
+ },
+ "0b5a520ef7ef4df7ac67d2fce9036a92": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "0c506dbd691d428082ed8151cfebd5e6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4698d859bc2648f8bd41de734d3e6b6f",
+ "placeholder": "β",
+ "style": "IPY_MODEL_30ed19bc86744c8b92b685ef97eff53d",
+ "value": "β34.1M/34.1Mβ[00:00<00:00,β39.3MB/s]"
+ }
+ },
+ "102875abfc5b4e6cab735f75bf200d50": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ad90ff35111d48d78a4a8595947ab8d3",
+ "max": 16000,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_a2d515ef793b4435b1fe6035de08585b",
+ "value": 16000
+ }
+ },
+ "1215442ba51b430d8de2358f44f7a9fa": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "13339cb1901644b9be473299832f6ced": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1596274edd804f1e858d2b5fb307300c",
+ "placeholder": "β",
+ "style": "IPY_MODEL_81654beebf7e425aa4f23504c1b71112",
+ "value": "β7.52k/7.52kβ[00:00<00:00,β379kB/s]"
+ }
+ },
+ "140f686ee529400b81876ccd4a37558c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_aafb1c4d69944584aaf0bc30e47df550",
+ "IPY_MODEL_e420266f780541e481cfbe4a80b2675c",
+ "IPY_MODEL_0c506dbd691d428082ed8151cfebd5e6"
+ ],
+ "layout": "IPY_MODEL_522c004154f0468e9057fdc2609a11d7"
+ }
+ },
+ "1596274edd804f1e858d2b5fb307300c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "16088f5268744b63a2fe9f2d22a0dcbb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "16b2957328bf4093a00cb3bf2e655c24": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_50c5f5d69a164f8fa51721e1c44a7783",
+ "placeholder": "β",
+ "style": "IPY_MODEL_00936bda447f46e5b1439913aeb730de",
+ "value": "tokenizer_config.json:β100%"
+ }
+ },
+ "1a1777e3d39a4d98aa9bd0685925c52d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_83efa62da7c04b79829e69f81f229c56",
+ "placeholder": "β",
+ "style": "IPY_MODEL_7843d80c596749abae891f9855830943",
+ "value": "tokenizer.json:β100%"
+ }
+ },
+ "2157c5a847d24a9c88fe0a657fa4a7d5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e5dfd1e6fcf44da6bdb2957cf603710f",
+ "max": 16000,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6666615c6adc49bead154aff367c30e2",
+ "value": 16000
+ }
+ },
+ "216e1bd82a024ad895c7abd89025b771": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_61129d3bdae2454e997bf3263aa616b9",
+ "max": 7031863,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_b0732ed61af849678e127cdaaa67ce7d",
+ "value": 7031863
+ }
+ },
+ "22b564917f5f4c8e81a8347f90a3d557": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7927c572e90248c7a97ea2303fb9547e",
+ "placeholder": "β",
+ "style": "IPY_MODEL_738db5c70e8644d8825396a0c9f8309f",
+ "value": "model.safetensors:β100%"
+ }
+ },
+ "22dc4471a9b54eefafd7021177608984": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "25fe66aa53bc49d281a17ead8041ff1d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "28bf71c117fe456ba48b1107a22c1d16": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "29bdeef39f4f46ae910080a2b4547686": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_74363b66c6064709b98b95c4cd69662a",
+ "placeholder": "β",
+ "style": "IPY_MODEL_22dc4471a9b54eefafd7021177608984",
+ "value": "Map:β100%"
+ }
+ },
+ "2bdf072fd2b44e0abd3e2d5802a2054b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2cf07655e93b49d9823d460b68bfa553": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4e7b87ce3eec4994996c1d467bf6b2fb",
+ "max": 490,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1215442ba51b430d8de2358f44f7a9fa",
+ "value": 490
+ }
+ },
+ "2d2844e25ace497aba29b9fb68173b0f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_35d8b168ec234ca7a045d47c348ca9a6",
+ "IPY_MODEL_c2fcb84d8f834b70880e7bfbe1e691c7",
+ "IPY_MODEL_fc4242c15c81477899b8446c6ad6b1a3"
+ ],
+ "layout": "IPY_MODEL_dd3f008a31b24d86af3ae7e9760cae8d"
+ }
+ },
+ "2dfbe287b29e4f6b9c84f95d751daffa": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "30ed19bc86744c8b92b685ef97eff53d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "326908aed9c64fc6abb1f767bbc0211a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_689e62ad5ee249bfa66e675e778b96f3",
+ "placeholder": "β",
+ "style": "IPY_MODEL_ff3791fe18f141e59d63056c5816b0f5",
+ "value": "vocab.json:β100%"
+ }
+ },
+ "336d3e3f8280423ab2a62e0cdf7631d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_87cc4745ead94aaa8e41f4eed3cb1e59",
+ "placeholder": "β",
+ "style": "IPY_MODEL_6211db271a134ea9b2bd08089faf7d80",
+ "value": "β16000/16000β[00:01<00:00,β13001.59βexamples/s]"
+ }
+ },
+ "35d8b168ec234ca7a045d47c348ca9a6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c4560decbda04d628fd815fa8f3953a0",
+ "placeholder": "β",
+ "style": "IPY_MODEL_af65219a68e644698d5a687095007316",
+ "value": "generation_config.json:β100%"
+ }
+ },
+ "365fac5b8cb14b3c9c85c21529551c7d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "3ccf6630013f4c4d950d30eca6d8e2be": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3d360b6d73704cf9956bd7279a0af53a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "3e308101e62f43c799ca27b6eaa744d4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3f85a717e1d242eaa21deda5944c658f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b5f3983270e54dd3911a35fe3128d9b7",
+ "placeholder": "β",
+ "style": "IPY_MODEL_9d7d05e6711149aa8adf3d03f3468e2f",
+ "value": "β7.03M/7.03Mβ[00:00<00:00,β21.5MB/s]"
+ }
+ },
+ "3fa8e45950cd4a28b7314b6545f9bbc8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "40a09d1d63ad49eca2a955e3e7d0cc72": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "41ed1216684a40cba7c0cf3e3aaf18c1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "445862f7c16947d198aae928b2f3f6bb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4543871a84b646f286bd601896d5d8cc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "463bddb985dc4b62a9ee0be8919cc5cf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4698d859bc2648f8bd41de734d3e6b6f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "49a0e5d1681046c88e9bc1159d47d875": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fb7d899a343846f789adc4d9a3f54659",
+ "placeholder": "β",
+ "style": "IPY_MODEL_87d15d738ba4471cb86fa002620042a8",
+ "value": "special_tokens_map.json:β100%"
+ }
+ },
+ "4a82e48b80de44f29cef8b1724ef85e1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4e7b87ce3eec4994996c1d467bf6b2fb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "50c5f5d69a164f8fa51721e1c44a7783": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "512462794d674769b6629ad8c8c254b7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_16088f5268744b63a2fe9f2d22a0dcbb",
+ "placeholder": "β",
+ "style": "IPY_MODEL_b95d504adfd742c89a3fb277233fac5b",
+ "value": "β1.14G/1.14Gβ[00:08<00:00,β422MB/s]"
+ }
+ },
+ "522c004154f0468e9057fdc2609a11d7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "54faaf670f5e43aa9f0cada481be066c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d7535f7fad1d416a8936735cd0f23099",
+ "max": 613,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_061df7aa5cd44f8481ec3af743ffc442",
+ "value": 613
+ }
+ },
+ "56a1c1654c304b05876f0e3664a091ed": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "595074903827402b8a1246c101340eff": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "5b214ea52c60495bb35f4b89439c70b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5cd4555da19a47c8beb1c0c7d782aa10": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e5e50e29698645a1b0bc68d7821d27a7",
+ "placeholder": "β",
+ "style": "IPY_MODEL_3d360b6d73704cf9956bd7279a0af53a",
+ "value": "Map:β100%"
+ }
+ },
+ "5d53d4ee8b5747f4a29d3efcabc2a324": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5e7b3dcce78a463eb14e42cd3d2a0b0b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_22b564917f5f4c8e81a8347f90a3d557",
+ "IPY_MODEL_6b993dd64aa7489c8ca8c2f5d728178e",
+ "IPY_MODEL_512462794d674769b6629ad8c8c254b7"
+ ],
+ "layout": "IPY_MODEL_d8c30b721eae4c33a8fa2746c91f6ebe"
+ }
+ },
+ "61129d3bdae2454e997bf3263aa616b9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6211db271a134ea9b2bd08089faf7d80": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6666615c6adc49bead154aff367c30e2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "675bd5d813834814b6f9c8273a026e0c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ac97da47a07441f8829640460203fa34",
+ "IPY_MODEL_7b532513494f4b35b74573e89140dda6",
+ "IPY_MODEL_c8e9096bd7764591aa44e8a2e1852c74"
+ ],
+ "layout": "IPY_MODEL_d394bbf04b904c0eb87ce8bd19bc4788"
+ }
+ },
+ "689e62ad5ee249bfa66e675e778b96f3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "69eb1c785b6f46cfab022b650e0ccc7b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6b993dd64aa7489c8ca8c2f5d728178e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "danger",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3ccf6630013f4c4d950d30eca6d8e2be",
+ "max": 1143327678,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_b956ad4e4ffb47908402a0d9fce147fb",
+ "value": 1143327569
+ }
+ },
+ "738db5c70e8644d8825396a0c9f8309f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "73a0c53397be44c7974c4c9b1ad0423c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "73b62457b51e413dadc7bfca227abe39": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "74069380475f434bb34afec56314fa4e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f3d89e8a8407433db10d775dec82099f",
+ "placeholder": "β",
+ "style": "IPY_MODEL_c57b3e2abc4d4147a7410fdeb16f318e",
+ "value": "added_tokens.json:β100%"
+ }
+ },
+ "74363b66c6064709b98b95c4cd69662a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "76ea4577ebc34368a1f220104a0778af": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_16b2957328bf4093a00cb3bf2e655c24",
+ "IPY_MODEL_0aa69e1780cf4537ada828298340f744",
+ "IPY_MODEL_13339cb1901644b9be473299832f6ced"
+ ],
+ "layout": "IPY_MODEL_ae2e31a363ae4051ac40375bfec4dd42"
+ }
+ },
+ "7843d80c596749abae891f9855830943": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "7927c572e90248c7a97ea2303fb9547e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7b1d17800e4540ffb099bdef9eb8ae6c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_74069380475f434bb34afec56314fa4e",
+ "IPY_MODEL_075bc926c06742d399070029d705173f",
+ "IPY_MODEL_81300c1080894e34a3a5d88eeac33801"
+ ],
+ "layout": "IPY_MODEL_28bf71c117fe456ba48b1107a22c1d16"
+ }
+ },
+ "7b532513494f4b35b74573e89140dda6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f308b42d29b74c6fa49b9558dfe63409",
+ "max": 16000,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d3b092ed193c49e0ba26aeae08111b4f",
+ "value": 16000
+ }
+ },
+ "7b9032352b0a4ae4b7bacfd217e29451": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b272574e4eab4313a3ea5f97c1d5c359",
+ "IPY_MODEL_102875abfc5b4e6cab735f75bf200d50",
+ "IPY_MODEL_b1011b504fc145af88d96667cafef7d4"
+ ],
+ "layout": "IPY_MODEL_d9d77e5d7b014d6883b8dafa3a685dc2"
+ }
+ },
+ "7bf13dd9534d4e9197bd1874687862ec": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "7ec180f6335341d594e0a726c48777a3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7f565d6c60064b1f86a60e9037651f14": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8088d38d4d374574b9e52fd7356e2ea6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_326908aed9c64fc6abb1f767bbc0211a",
+ "IPY_MODEL_bc86d9704e2f460bb7cfd04b664eed12",
+ "IPY_MODEL_9e0ea9e88df94b4d9967ff1a7ed1bdd6"
+ ],
+ "layout": "IPY_MODEL_b47cd2b42af746d4ab10c7d0c640ba7a"
+ }
+ },
+ "811c37a344f84daabaa1e3e7eb985769": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1a1777e3d39a4d98aa9bd0685925c52d",
+ "IPY_MODEL_216e1bd82a024ad895c7abd89025b771",
+ "IPY_MODEL_3f85a717e1d242eaa21deda5944c658f"
+ ],
+ "layout": "IPY_MODEL_ad45988fb01248ac96ff1b127149fafb"
+ }
+ },
+ "81300c1080894e34a3a5d88eeac33801": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_894dcbc8129c4f979581aa8c05efb747",
+ "placeholder": "β",
+ "style": "IPY_MODEL_0b5a520ef7ef4df7ac67d2fce9036a92",
+ "value": "β632/632β[00:00<00:00,β54.1kB/s]"
+ }
+ },
+ "81654beebf7e425aa4f23504c1b71112": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "81c0046c66224f7582d72c1dbf919d33": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "83efa62da7c04b79829e69f81f229c56": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "850136c0706a448eb98a32181f4f192d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b88a783124404b9b807b83d6f7023285",
+ "max": 16000,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_f596811361d34a75b178553520aa3400",
+ "value": 16000
+ }
+ },
+ "85dba50a8bc04ccebec42f560bbc1b79": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "87cc4745ead94aaa8e41f4eed3cb1e59": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "87d15d738ba4471cb86fa002620042a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "894dcbc8129c4f979581aa8c05efb747": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8e10bfda957d412592a2b5a7a33c81aa": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_bfbfd237e36d46f9be546d2416b7bb36",
+ "placeholder": "β",
+ "style": "IPY_MODEL_73a0c53397be44c7974c4c9b1ad0423c",
+ "value": "Generatingβtrainβsplit:β100%"
+ }
+ },
+ "8f46cd3d936343aca34b328df6b3dea0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_5cd4555da19a47c8beb1c0c7d782aa10",
+ "IPY_MODEL_850136c0706a448eb98a32181f4f192d",
+ "IPY_MODEL_d4ba51ee7f864afb822a7e4076496ab4"
+ ],
+ "layout": "IPY_MODEL_2dfbe287b29e4f6b9c84f95d751daffa"
+ }
+ },
+ "8fc45c66242648cc94f51cf08ff18f25": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "91d3a3d009f84b05b62f54353dc955eb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "93b2a2bd27dc49d28acc3b6643cfc022": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_29bdeef39f4f46ae910080a2b4547686",
+ "IPY_MODEL_98ba361adff74799a8aeecfd313e3beb",
+ "IPY_MODEL_336d3e3f8280423ab2a62e0cdf7631d8"
+ ],
+ "layout": "IPY_MODEL_4a82e48b80de44f29cef8b1724ef85e1"
+ }
+ },
+ "98ba361adff74799a8aeecfd313e3beb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f4f28c070f7241539ec87dd82a701a7e",
+ "max": 16000,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_ab5f2943fb674f03bb003d11304b3188",
+ "value": 16000
+ }
+ },
+ "9c676e3f4e204349ae41edc8ae770ce3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9d7d05e6711149aa8adf3d03f3468e2f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9e0ea9e88df94b4d9967ff1a7ed1bdd6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_445862f7c16947d198aae928b2f3f6bb",
+ "placeholder": "β",
+ "style": "IPY_MODEL_c2ca493b2d8a487c8db0defbeea297cc",
+ "value": "β2.78M/2.78Mβ[00:00<00:00,β13.7MB/s]"
+ }
+ },
+ "a2d515ef793b4435b1fe6035de08585b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "a47c4b141e244e44b068a2d79f9ac3f8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d98b7e8cc525466598fe34609e41fa54",
+ "placeholder": "β",
+ "style": "IPY_MODEL_463bddb985dc4b62a9ee0be8919cc5cf",
+ "value": "β490/490β[00:00<00:00,β33.1kB/s]"
+ }
+ },
+ "a8e9032fb3e34ca08f0a742c20c15848": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "aafb1c4d69944584aaf0bc30e47df550": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3e308101e62f43c799ca27b6eaa744d4",
+ "placeholder": "β",
+ "style": "IPY_MODEL_0576ab6d4cce4cb09f0755a230d50980",
+ "value": "dpo_fixed.jsonl:β100%"
+ }
+ },
+ "ab5f2943fb674f03bb003d11304b3188": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "aba1e58a13744fbea62c85ced2969652": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ac97da47a07441f8829640460203fa34": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7f565d6c60064b1f86a60e9037651f14",
+ "placeholder": "β",
+ "style": "IPY_MODEL_f67d2789738a4b3dac5dfdd91c588c5d",
+ "value": "Map:β100%"
+ }
+ },
+ "ad2b896b04fb42c18839380b8a40d19b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ad42cefa580c4e84ba5201b704218c34": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ad45988fb01248ac96ff1b127149fafb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad90ff35111d48d78a4a8595947ab8d3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ae2e31a363ae4051ac40375bfec4dd42": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "af65219a68e644698d5a687095007316": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b0732ed61af849678e127cdaaa67ce7d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "b1011b504fc145af88d96667cafef7d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9c676e3f4e204349ae41edc8ae770ce3",
+ "placeholder": "β",
+ "style": "IPY_MODEL_ad2b896b04fb42c18839380b8a40d19b",
+ "value": "β16000/16000β[02:10<00:00,β100.14βexamples/s]"
+ }
+ },
+ "b272574e4eab4313a3ea5f97c1d5c359": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2bdf072fd2b44e0abd3e2d5802a2054b",
+ "placeholder": "β",
+ "style": "IPY_MODEL_f8973d77047c4106987ae37a4c6a97ec",
+ "value": "Map:β100%"
+ }
+ },
+ "b47cd2b42af746d4ab10c7d0c640ba7a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b5f3983270e54dd3911a35fe3128d9b7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b88a783124404b9b807b83d6f7023285": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b956ad4e4ffb47908402a0d9fce147fb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "b95d504adfd742c89a3fb277233fac5b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ba57df115cbd4571913b8399d3bdf2e0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "bc86d9704e2f460bb7cfd04b664eed12": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8fc45c66242648cc94f51cf08ff18f25",
+ "max": 2776833,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_7bf13dd9534d4e9197bd1874687862ec",
+ "value": 2776833
+ }
+ },
+ "bf846b25033d48bd818ae306e671b61b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "bfbfd237e36d46f9be546d2416b7bb36": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c2ca493b2d8a487c8db0defbeea297cc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "c2fcb84d8f834b70880e7bfbe1e691c7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_41ed1216684a40cba7c0cf3e3aaf18c1",
+ "max": 161,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_365fac5b8cb14b3c9c85c21529551c7d",
+ "value": 161
+ }
+ },
+ "c4560decbda04d628fd815fa8f3953a0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c46dbb7e089a46c5be6f9947c79cf23f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_85dba50a8bc04ccebec42f560bbc1b79",
+ "placeholder": "β",
+ "style": "IPY_MODEL_a8e9032fb3e34ca08f0a742c20c15848",
+ "value": "β1.67M/1.67Mβ[00:00<00:00,β6.40MB/s]"
+ }
+ },
+ "c57b3e2abc4d4147a7410fdeb16f318e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "c8e9096bd7764591aa44e8a2e1852c74": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_56a1c1654c304b05876f0e3664a091ed",
+ "placeholder": "β",
+ "style": "IPY_MODEL_0a9c24d18391432f84fe2f89f69193b7",
+ "value": "β16000/16000β[00:01<00:00,β10752.11βexamples/s]"
+ }
+ },
+ "cb7eb0355b084370a2ba96125d6aa939": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ce3e19e662e94dec89ee565ed892d571": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d196c1273a72403e926266ebde2ff551": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d25dfa432ab84d04b346b8cd58c89720": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d2f03b20726d489287e5ebce211303c4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_aba1e58a13744fbea62c85ced2969652",
+ "placeholder": "β",
+ "style": "IPY_MODEL_ad42cefa580c4e84ba5201b704218c34",
+ "value": "merges.txt:β100%"
+ }
+ },
+ "d394bbf04b904c0eb87ce8bd19bc4788": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d3b092ed193c49e0ba26aeae08111b4f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d47f521581c8423682ed590854f55702": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d196c1273a72403e926266ebde2ff551",
+ "placeholder": "β",
+ "style": "IPY_MODEL_cb7eb0355b084370a2ba96125d6aa939",
+ "value": "README.md:β100%"
+ }
+ },
+ "d4ba51ee7f864afb822a7e4076496ab4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_06a7440478ee4c35a58bba8a434d9e69",
+ "placeholder": "β",
+ "style": "IPY_MODEL_81c0046c66224f7582d72c1dbf919d33",
+ "value": "β16000/16000β[00:01<00:00,β10769.01βexamples/s]"
+ }
+ },
+ "d7535f7fad1d416a8936735cd0f23099": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d8c30b721eae4c33a8fa2746c91f6ebe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d98b7e8cc525466598fe34609e41fa54": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d9d77e5d7b014d6883b8dafa3a685dc2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "dd3f008a31b24d86af3ae7e9760cae8d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "df90e41a4fc241c1ad61506089b59e83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_69eb1c785b6f46cfab022b650e0ccc7b",
+ "placeholder": "β",
+ "style": "IPY_MODEL_91d3a3d009f84b05b62f54353dc955eb",
+ "value": "β16000/16000β[00:00<00:00,β72576.29βexamples/s]"
+ }
+ },
+ "e420266f780541e481cfbe4a80b2675c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_40a09d1d63ad49eca2a955e3e7d0cc72",
+ "max": 34098509,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_25fe66aa53bc49d281a17ead8041ff1d",
+ "value": 34098509
+ }
+ },
+ "e5dfd1e6fcf44da6bdb2957cf603710f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e5e50e29698645a1b0bc68d7821d27a7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e6683a89074d456e949a5072a55b1602": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_bf846b25033d48bd818ae306e671b61b",
+ "max": 1671853,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_595074903827402b8a1246c101340eff",
+ "value": 1671853
+ }
+ },
+ "e7dc3433d02642728f805d18ab987102": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e7f156806cf84f80bc6562cc7fe24e08": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e7dc3433d02642728f805d18ab987102",
+ "placeholder": "β",
+ "style": "IPY_MODEL_5d53d4ee8b5747f4a29d3efcabc2a324",
+ "value": "β613/613β[00:00<00:00,β42.9kB/s]"
+ }
+ },
+ "ec2052c4d20f4273b2fc9d20b9f6cc04": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_d47f521581c8423682ed590854f55702",
+ "IPY_MODEL_2cf07655e93b49d9823d460b68bfa553",
+ "IPY_MODEL_a47c4b141e244e44b068a2d79f9ac3f8"
+ ],
+ "layout": "IPY_MODEL_078aba59954f45d5bae22aacaa9a5a8b"
+ }
+ },
+ "f308b42d29b74c6fa49b9558dfe63409": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f3d89e8a8407433db10d775dec82099f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f4f28c070f7241539ec87dd82a701a7e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f596811361d34a75b178553520aa3400": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "f602392931b1423e867b9e37dad27300": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_d2f03b20726d489287e5ebce211303c4",
+ "IPY_MODEL_e6683a89074d456e949a5072a55b1602",
+ "IPY_MODEL_c46dbb7e089a46c5be6f9947c79cf23f"
+ ],
+ "layout": "IPY_MODEL_d25dfa432ab84d04b346b8cd58c89720"
+ }
+ },
+ "f67d2789738a4b3dac5dfdd91c588c5d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "f8973d77047c4106987ae37a4c6a97ec": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fb7d899a343846f789adc4d9a3f54659": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fc4242c15c81477899b8446c6ad6b1a3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_73b62457b51e413dadc7bfca227abe39",
+ "placeholder": "β",
+ "style": "IPY_MODEL_5b214ea52c60495bb35f4b89439c70b6",
+ "value": "β161/161β[00:00<00:00,β11.9kB/s]"
+ }
+ },
+ "fe4e7cd444b4449292b704253c574c2d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_49a0e5d1681046c88e9bc1159d47d875",
+ "IPY_MODEL_54faaf670f5e43aa9f0cada481be066c",
+ "IPY_MODEL_e7f156806cf84f80bc6562cc7fe24e08"
+ ],
+ "layout": "IPY_MODEL_ba57df115cbd4571913b8399d3bdf2e0"
+ }
+ },
+ "ff3791fe18f141e59d63056c5816b0f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
|