TorchTune --> torchtune (pytorch#1007)

joecummings · web-flow · commit 29ae975fc6d2 · 2024-05-20T13:58:28.000-04:00
diff --git a/README.md b/README.md
@@ -175,7 +175,7 @@ And should see the following output:
 ```bash
 usage: tune [-h] {ls,cp,download,run,validate} ...
 
-Welcome to the TorchTune CLI!
+Welcome to the torchtune CLI!
 
 options:
   -h, --help            show this help message and exit
diff --git a/docs/source/api_ref_utilities.rst b/docs/source/api_ref_utilities.rst
@@ -10,7 +10,7 @@ torchtune.utils
 Checkpointing
 -------------
 
-TorchTune offers checkpointers to allow seamless transitioning between checkpoint formats for training and interoperability with the rest of the ecosystem. For a comprehensive overview of
+torchtune offers checkpointers to allow seamless transitioning between checkpoint formats for training and interoperability with the rest of the ecosystem. For a comprehensive overview of
 checkpointing, please see the :ref:`checkpointing deep-dive <understand_checkpointer>`.
 
 .. autosummary::
@@ -68,7 +68,7 @@ Utilities to reduce memory consumption during training.
 Performance and Profiling
 -------------------------
 
-TorchTune provides utilities to profile and debug the performance
+torchtune provides utilities to profile and debug the performance
 of your finetuning job.
 
 .. autosummary::
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -84,7 +84,7 @@
 # Get TORCHTUNE_VERSION_DOCS during the build.
 torchtune_version_docs = os.environ.get("TORCHTUNE_VERSION_DOCS", None)
 print(f"torchtune_version_docs: {torchtune_version_docs}")
-project = "TorchTune"
+project = "torchtune"
 
 # The code below will cut version displayed in the dropdown like this:
 # By default, set to "main".
@@ -111,7 +111,7 @@
 master_doc = "index"
 
 # General information about the project.
-copyright = "2023-present, TorchTune Contributors"
+copyright = "2023-present, torchtune Contributors"
 author = "Torch Contributors"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -58,7 +58,7 @@ torchtune tutorials.
    :tags: finetuning,llama2,lora
 
 .. customcarditem::
-   :header: Understanding QLoRA in TorchTune
+   :header: Understanding QLoRA in torchtune
    :card_description: Using QLoRA to quantize base model weights and maximize memory savings
    :image: _static/img/generic-pytorch-logo.png
    :link: tutorials/qlora_finetune.html
diff --git a/docs/source/install.rst b/docs/source/install.rst
@@ -29,7 +29,7 @@ And should see the following output:
 
     usage: tune [-h] {download,ls,cp,run,validate} ...
 
-    Welcome to the TorchTune CLI!
+    Welcome to the torchtune CLI!
 
     options:
     -h, --help            show this help message and exit
diff --git a/docs/source/tutorials/e2e_flow.rst b/docs/source/tutorials/e2e_flow.rst
@@ -391,7 +391,7 @@ Modify ``custom_generation_config.yaml`` to include the following changes.
 .. code-block:: yaml
 
     checkpointer:
-        # we need to use the custom TorchTune checkpointer
+        # we need to use the custom torchtune checkpointer
         # instead of the HF checkpointer for loading
         # quantized models
         _component_: torchtune.utils.FullModelTorchTuneCheckpointer
diff --git a/docs/source/tutorials/llama3.rst b/docs/source/tutorials/llama3.rst
@@ -103,7 +103,7 @@ For example, on two devices:
 
     tune run --nproc_per_node 2 lora_finetune_distributed --config llama3/8B_lora
 
-Finally, if we want to use even less memory, we can leverage TorchTune's QLoRA recipe via:
+Finally, if we want to use even less memory, we can leverage torchtune's QLoRA recipe via:
 
 .. code-block:: bash
 
@@ -299,7 +299,7 @@ First, we'll make one more change to our ``custom_generation_config.yaml``.
 .. code-block:: yaml
 
     checkpointer:
-      # we need to use the custom TorchTune checkpointer
+      # we need to use the custom torchtune checkpointer
       # instead of the HF checkpointer for loading
       # quantized models
       _component_: torchtune.utils.FullModelTorchTuneCheckpointer
diff --git a/recipes/__init__.py b/recipes/__init__.py
@@ -18,6 +18,6 @@
 
 # TODO: Add proper link to pytorch.org/torchtune/... when the docs are live.
 raise ModuleNotFoundError(
-    "The TorchTune recipes directory isn't a package and you should not import anything from here. "
+    "The torchtune recipes directory isn't a package and you should not import anything from here. "
     "Refer to our docs for detailed instructions on how to use recipes!"
 )
diff --git a/tests/test_import_recipes.py b/tests/test_import_recipes.py
@@ -9,6 +9,6 @@
 
 def test_import_recipes():
     with pytest.raises(
-        ModuleNotFoundError, match="The TorchTune recipes directory isn't a package"
+        ModuleNotFoundError, match="The torchtune recipes directory isn't a package"
     ):
         import recipes  # noqa
diff --git a/tests/torchtune/_cli/test_tune.py b/tests/torchtune/_cli/test_tune.py
@@ -21,4 +21,4 @@ def test_tune_without_args_returns_help(self, capsys, monkeypatch):
         captured = capsys.readouterr()
         output = captured.out.rstrip("\n")
 
-        assert "Welcome to the TorchTune CLI!" in output
+        assert "Welcome to the torchtune CLI!" in output
diff --git a/tests/torchtune/utils/test_checkpointer.py b/tests/torchtune/utils/test_checkpointer.py
@@ -39,7 +39,7 @@ def weight_dtype(self):
     def state_dict_1(self, weight_dtype):
         """
         State dict for a HF format checkpoint. This state dict is "complete" and
-        can be loaded into a TorchTune model once correctly converted.
+        can be loaded into a torchtune model once correctly converted.
         """
         state_dict = {
             "model.embed_tokens.weight": randn(_VOCAB_SIZE, _DIM, dtype=weight_dtype),
@@ -189,7 +189,7 @@ def test_load_save_checkpoint_single_file(
         We test:
         * ``load_checkpoint`` loads the right sets of keys
         * Internal state of the checkpointer is correctly updated
-        * Converted checkpoint can be loaded into the llama2 TorchTune implementation
+        * Converted checkpoint can be loaded into the llama2 torchtune implementation
         * Saved checkpoint keys match the original checkpoint
         """
         # Read the state dict directly from file using torch.load. This will be the state
@@ -243,7 +243,7 @@ def test_save_load_checkpoint_multiple_file(
         We test:
         * ``load_checkpoint`` loads the right sets of keys
         * Internal state of the checkpointer is correctly updated
-        * Converted checkpoint can be loaded into the llama2 TorchTune implementation
+        * Converted checkpoint can be loaded into the llama2 torchtune implementation
         """
         # Read the state dict directly from files
         checkpoint_file_1, checkpoint_file_2 = llama2_hf_checkpoints
@@ -303,7 +303,7 @@ def weight_dtype(self):
     def state_dict(self, weight_dtype):
         """
         State dict for a HF format mistral reward model checkpoint. This state dict is
-        "complete" and can be loaded into a TorchTune model once correctly converted.
+        "complete" and can be loaded into a torchtune model once correctly converted.
         """
         state_dict = {
             "model.embed_tokens.weight": randn(_VOCAB_SIZE, _DIM, dtype=weight_dtype),
@@ -398,7 +398,7 @@ def test_load_save_checkpoint_single_file(
         We test:
         * ``load_checkpoint`` loads the right sets of keys
         * Internal state of the checkpointer is correctly updated
-        * Converted checkpoint can be loaded into the `mistral_classifier` TorchTune implementation
+        * Converted checkpoint can be loaded into the `mistral_classifier` torchtune implementation
         * Saved checkpoint keys match the original checkpoint
         """
         # Read the state dict directly from file using torch.load. This will be the state
diff --git a/torchtune/_cli/tune.py b/torchtune/_cli/tune.py
@@ -20,7 +20,7 @@ def __init__(self):
         # Initialize the top-level parser
         self._parser = argparse.ArgumentParser(
             prog="tune",
-            description="Welcome to the TorchTune CLI!",
+            description="Welcome to the torchtune CLI!",
             add_help=True,
         )
         # Default command is to print help
diff --git a/torchtune/config/_instantiate.py b/torchtune/config/_instantiate.py
@@ -44,7 +44,7 @@ def instantiate(
     """
     Given a DictConfig with a _component_ field specifying the object to instantiate and
     additional fields for keyword arguments, create an instance of the specified object.
-    You can use this function to create the exact instance of a TorchTune object you want
+    You can use this function to create the exact instance of a torchtune object you want
     to use in your recipe using the specification from the config.
 
     This function also supports passing in positional args and keyword args within the
diff --git a/torchtune/models/convert_weights.py b/torchtune/models/convert_weights.py
@@ -11,7 +11,7 @@
 import torch
 
 
-# state dict key mappings from Meta's format to TorchTune's format
+# state dict key mappings from Meta's format to torchtune's format
 _FROM_META = {
     "tok_embeddings.weight": "tok_embeddings.weight",
     "norm.weight": "norm.scale",
@@ -27,7 +27,7 @@
     "layers.{}.feed_forward.w3.weight": "layers.{}.mlp.w3.weight",
 }
 
-# state dict key mappings from HF's format to TorchTune's format
+# state dict key mappings from HF's format to torchtune's format
 _FROM_HF = {
     "model.embed_tokens.weight": "tok_embeddings.weight",
     "model.layers.{}.self_attn.q_proj.weight": "layers.{}.attn.q_proj.weight",
@@ -66,7 +66,7 @@ def get_mapped_key(key: str, mapping_dict: Dict[str, str]) -> str:
 
 def meta_to_tune(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
     """
-    Convert a state dict from Meta's format to TorchTune's format. State dicts
+    Convert a state dict from Meta's format to torchtune's format. State dicts
     from multiple checkpoint files should be consolidated into a single state dict
     before calling this function.
 
@@ -77,7 +77,7 @@ def meta_to_tune(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]
         state_dict (Dict[str, torch.Tensor]): State dict in Meta's format.
 
     Returns:
-        Dict[str, torch.Tensor]: State dict in TorchTune's format.
+        Dict[str, torch.Tensor]: State dict in torchtune's format.
     """
     converted_state_dict = {}
     for key, value in state_dict.items():
@@ -90,12 +90,12 @@ def meta_to_tune(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]
 
 def tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
     """
-    Convert a state dict from TorchTune's format to Meta's format. This function
+    Convert a state dict from torchtune's format to Meta's format. This function
     doesn't handle any sharding or splitting of state dicts. It follows the
     state_dict IN -> state_dict OUT pattern.
 
     Args:
-        state_dict (Dict[str, torch.Tensor]): State dict in TorchTune's format.
+        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
 
     Returns:
         Dict[str, torch.Tensor]: State dict in Meta's format.
@@ -118,7 +118,7 @@ def hf_to_tune(
     head_dim: int = None,
 ) -> Dict[str, torch.Tensor]:
     """
-    Convert a state dict from HF's format to TorchTune's format. State dicts
+    Convert a state dict from HF's format to torchtune's format. State dicts
     from multiple checkpoint files should be consolidated into a single state dict
     before calling this function.
 
@@ -134,7 +134,7 @@ def hf_to_tune(
             as dim // num_heads.
 
     Returns:
-        Dict[str, torch.Tensor]: State dict in TorchTune's format.
+        Dict[str, torch.Tensor]: State dict in torchtune's format.
     """
     converted_state_dict = {}
     if head_dim is None:
@@ -165,12 +165,12 @@ def tune_to_hf(
     dim: int = 4096,
 ):
     """
-    Convert a state dict from TorchTune's format to HF's format. This function
+    Convert a state dict from torchtune's format to HF's format. This function
     doesn't handle any sharding or splitting of state dicts. It follows the
     state_dict IN -> state_dict OUT pattern.
 
     Args:
-        state_dict (Dict[str, torch.Tensor]): State dict in TorchTune's format.
+        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
         num_heads (int): Number of heads in the model.
         num_kv_heads (int): Number of heads in the key/value projection layers.
         dim (int): Dimension of the model.
diff --git a/torchtune/models/gemma/_component_builders.py b/torchtune/models/gemma/_component_builders.py
@@ -23,7 +23,7 @@
 """
 Component builders for the Gemma 2B models and popular variants such as LoRA.
 
-TorchTune provides composable building blocks. Builder functions help
+torchtune provides composable building blocks. Builder functions help
 stitch these building blocks into higher-level components. This design has
 two benefits:
 - The building blocks themselves are very flexible. For example, ``CausalSelfAttention``
diff --git a/torchtune/models/llama2/_component_builders.py b/torchtune/models/llama2/_component_builders.py
@@ -27,7 +27,7 @@
 """
 Component builders for the Llama2 model and popular variants such as LoRA.
 
-TorchTune provides composable building blocks. Builder functions help
+torchtune provides composable building blocks. Builder functions help
 stitch these building blocks into higher-level components. This design has
 two benefits:
 - The building blocks themselves are very flexible. For example, ``CausalSelfAttention``
diff --git a/torchtune/models/llama3/_component_builders.py b/torchtune/models/llama3/_component_builders.py
@@ -28,7 +28,7 @@
 """
 Component builders for the Llama3 model and popular variants such as LoRA.
 
-TorchTune provides composable building blocks. Builder functions help
+torchtune provides composable building blocks. Builder functions help
 stitch these building blocks into higher-level components. This design has
 two benefits:
 - The building blocks themselves are very flexible. For example, ``CausalSelfAttention``
diff --git a/torchtune/models/mistral/_component_builders.py b/torchtune/models/mistral/_component_builders.py
@@ -24,7 +24,7 @@
 """
 Component builders for the Mistral 7B models and popular variants such as LoRA.
 
-TorchTune provides composable building blocks. Builder functions help
+torchtune provides composable building blocks. Builder functions help
 stitch these building blocks into higher-level components. This design has
 two benefits:
 - The building blocks themselves are very flexible. For example, ``CausalSelfAttention``
diff --git a/torchtune/models/mistral/_convert_weights.py b/torchtune/models/mistral/_convert_weights.py
@@ -34,7 +34,7 @@ def mistral_reward_hf_to_tune(
     head_dim: int = None,
 ) -> Dict[str, torch.Tensor]:
     """
-    Convert a state dict from HF's format to TorchTune's format, which contains the weights
+    Convert a state dict from HF's format to torchtune's format, which contains the weights
     of a Mistral reward model.
     State dicts from multiple checkpoint files should be consolidated into a single state dict
     before calling this function.
@@ -52,7 +52,7 @@ def mistral_reward_hf_to_tune(
             as dim // num_heads.
 
     Returns:
-        Dict[str, torch.Tensor]: State dict in TorchTune's format.
+        Dict[str, torch.Tensor]: State dict in torchtune's format.
     """
     converted_state_dict = {}
     if head_dim is None:
@@ -83,14 +83,14 @@ def mistral_reward_tune_to_hf(
     dim: int = 4096,
 ) -> Dict[str, torch.Tensor]:
     """
-    Convert a state dict from TorchTune's format to Hugging Face's format for a Mistral reward model.
+    Convert a state dict from torchtune's format to Hugging Face's format for a Mistral reward model.
 
-    This function takes a state dictionary in TorchTune's format, which contains the weights of a Mistral reward model,
+    This function takes a state dictionary in torchtune's format, which contains the weights of a Mistral reward model,
     and converts it into a format that can be loaded into a Hugging Face model.
     The logic is identical to :func:`~torchtune.models.convert_weights.tune_to_hf`, but with a different mapping.
 
     Args:
-        state_dict (Dict[str, torch.Tensor]): State dict in TorchTune's format.
+        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
         num_heads (int, optional): Number of heads in the model. Defaults to 32.
         num_kv_heads (int, optional): Number of heads in the key/value projection layers. Defaults to 32.
         dim (int, optional): Dimension of the model. Defaults to 4096.
diff --git a/torchtune/recipe_interfaces.py b/torchtune/recipe_interfaces.py
@@ -11,7 +11,7 @@ class FTRecipeInterface(Protocol):
     """
     This class provides a loose structure which every LLM fine-tuning recipe
     should follow. Please note that the interface itself should not be a vehicle for
-    code reuse. TorchTune strictly prohibits implementation inheritance in the codebase.
+    code reuse. torchtune strictly prohibits implementation inheritance in the codebase.
 
     A few notes about the design and the need for this interface:
     - This interface is meant to help recipe-writers organize their code in a way
@@ -64,7 +64,7 @@ class EvalRecipeInterface(Protocol):
     """
     This class provides a loose structure which every LLM evaluation recipe
     should follow. Please note that the interface itself should not be a vehicle for
-    code reuse. TorchTune strictly prohibits implementation inheritance in the codebase.
+    code reuse. torchtune strictly prohibits implementation inheritance in the codebase.
     """
 
     def load_checkpoint(self, **kwargs) -> None:
diff --git a/torchtune/utils/_checkpointing/_checkpointer.py b/torchtune/utils/_checkpointing/_checkpointer.py

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,6 @@`
`18`	`18`
`19`	`19`	`# TODO: Add proper link to pytorch.org/torchtune/... when the docs are live.`
`20`	`20`	`raise ModuleNotFoundError(`
`21`		`- "The TorchTune recipes directory isn't a package and you should not import anything from here. "`
	`21`	`+ "The torchtune recipes directory isn't a package and you should not import anything from here. "`
`22`	`22`	`"Refer to our docs for detailed instructions on how to use recipes!"`
`23`	`23`	`)`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ def __init__(self):`
`20`	`20`	`# Initialize the top-level parser`
`21`	`21`	`self._parser = argparse.ArgumentParser(`
`22`	`22`	`prog="tune",`
`23`		`- description="Welcome to the TorchTune CLI!",`
	`23`	`+ description="Welcome to the torchtune CLI!",`
`24`	`24`	`add_help=True,`
`25`	`25`	`)`
`26`	`26`	`# Default command is to print help`