AUTOMATIC1111
diff --git a/‎.eslintrc.js
+2 b/‎.eslintrc.js
+2
diff --git a/‎.github/workflows/on_pull_request.yaml
+5-5 b/‎.github/workflows/on_pull_request.yaml
+5-5
diff --git a/‎.github/workflows/run_tests.yaml
+5-5 b/‎.github/workflows/run_tests.yaml
+5-5
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎CHANGELOG.md
+133-11 b/‎CHANGELOG.md
+133-11
diff --git a/‎README.md
+1 b/‎README.md
+1
diff --git a/‎_typos.toml
+5 b/‎_typos.toml
+5
diff --git a/‎extensions-builtin/LDSR/sd_hijack_ddpm_v1.py
+4-4 b/‎extensions-builtin/LDSR/sd_hijack_ddpm_v1.py
+4-4
diff --git a/‎extensions-builtin/Lora/lyco_helpers.py
+1-1 b/‎extensions-builtin/Lora/lyco_helpers.py
+1-1
diff --git a/‎extensions-builtin/Lora/network.py
+33-1 b/‎extensions-builtin/Lora/network.py
+33-1
diff --git a/‎extensions-builtin/Lora/network_oft.py
+7-7 b/‎extensions-builtin/Lora/network_oft.py
+7-7
diff --git a/‎extensions-builtin/Lora/networks.py
+7-4 b/‎extensions-builtin/Lora/networks.py
+7-4
diff --git a/‎extensions-builtin/Lora/ui_edit_user_metadata.py
+2 b/‎extensions-builtin/Lora/ui_edit_user_metadata.py
+2
diff --git a/‎extensions-builtin/Lora/ui_extra_networks_lora.py
+1-1 b/‎extensions-builtin/Lora/ui_extra_networks_lora.py
+1-1
@@ -78,6 +78,8 @@ module.exports = {
         //extraNetworks.js
         requestGet: "readonly",
         popup: "readonly",
+        // profilerVisualization.js
+        createVisualizationTable: "readonly",
         // from python
         localization: "readonly",
         // progrssbar.js
 
@@ -11,16 +11,16 @@ jobs:
     if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
     steps:
       - name: Checkout Code
-        uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
+        uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: 3.11
           # NB: there's no cache: pip here since we're not installing anything
           #     from the requirements.txt file(s) in the repository; it's faster
           #     not to have GHA download an (at the time of writing) 4 GB cache
           #     of PyTorch and other dependencies.
       - name: Install Ruff
-        run: pip install ruff==0.1.6
+        run: pip install ruff==0.3.3
       - name: Run Ruff
         run: ruff .
   lint-js:
@@ -29,9 +29,9 @@ jobs:
     if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
     steps:
       - name: Checkout Code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Install Node.js
-        uses: actions/setup-node@v3
+        uses: actions/setup-node@v4
         with:
           node-version: 18
       - run: npm i --ci
 
@@ -11,9 +11,9 @@ jobs:
     if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
     steps:
       - name: Checkout Code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Set up Python 3.10
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: 3.10.6
           cache: pip
@@ -22,7 +22,7 @@ jobs:
             launch.py
       - name: Cache models
         id: cache-models
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: models
           key: "2023-12-30"
@@ -68,13 +68,13 @@ jobs:
           python -m coverage report -i
           python -m coverage html -i
       - name: Upload main app output
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         if: always()
         with:
           name: output
           path: output.txt
       - name: Upload coverage HTML
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         if: always()
         with:
           name: htmlcov
 
@@ -38,3 +38,4 @@ notification.mp3
 /package-lock.json
 /.coverage*
 /test/test_outputs
+/cache
@@ -98,6 +98,7 @@ Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-di
 - [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended)
 - [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
 - [Intel CPUs, Intel GPUs (both integrated and discrete)](https://github.com/openvinotoolkit/stable-diffusion-webui/wiki/Installation-on-Intel-Silicon) (external wiki page)
+- [Ascend NPUs](https://github.com/wangshuai09/stable-diffusion-webui/wiki/Install-and-run-on-Ascend-NPUs) (external wiki page)
 
 Alternatively, use online services (like Google Colab):
 
 
@@ -0,0 +1,5 @@
+[default.extend-words]
+# Part of "RGBa" (Pillow's pre-multiplied alpha RGB mode)
+Ba = "Ba"
+# HSA is something AMD uses for their GPUs
+HSA = "HSA"
@@ -301,7 +301,7 @@ def p_losses(self, x_start, t, noise=None):
         elif self.parameterization == "x0":
             target = x_start
         else:
-            raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported")
+            raise NotImplementedError(f"Parameterization {self.parameterization} not yet supported")
 
         loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3])
 
@@ -880,7 +880,7 @@ def forward(self, x, c, *args, **kwargs):
     def apply_model(self, x_noisy, t, cond, return_ids=False):
 
         if isinstance(cond, dict):
-            # hybrid case, cond is exptected to be a dict
+            # hybrid case, cond is expected to be a dict
             pass
         else:
             if not isinstance(cond, list):
@@ -916,7 +916,7 @@ def apply_model(self, x_noisy, t, cond, return_ids=False):
                 cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])]
 
             elif self.cond_stage_key == 'coordinates_bbox':
-                assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size'
+                assert 'original_image_size' in self.split_input_params, 'BoundingBoxRescaling is missing original_image_size'
 
                 # assuming padding of unfold is always 0 and its dilation is always 1
                 n_patches_per_row = int((w - ks[0]) / stride[0] + 1)
@@ -926,7 +926,7 @@ def apply_model(self, x_noisy, t, cond, return_ids=False):
                 num_downs = self.first_stage_model.encoder.num_resolutions - 1
                 rescale_latent = 2 ** (num_downs)
 
-                # get top left postions of patches as conforming for the bbbox tokenizer, therefore we
+                # get top left positions of patches as conforming for the bbbox tokenizer, therefore we
                 # need to rescale the tl patch coordinates to be in between (0,1)
                 tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w,
                                          rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h)
 
@@ -30,7 +30,7 @@ def factorization(dimension: int, factor:int=-1) -> tuple[int, int]:
     In LoRA with Kroneckor Product, first value is a value for weight scale.
     secon value is a value for weight.
 
-    Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+    Because of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
 
     examples)
     factor
 
@@ -29,7 +29,6 @@ def __init__(self, name, filename):
 
         def read_metadata():
             metadata = sd_models.read_metadata_from_safetensors(filename)
-            metadata.pop('ssmd_cover_images', None)  # those are cover images, and they are too big to display in UI as text
 
             return metadata
 
@@ -117,6 +116,12 @@ def __init__(self, net: Network, weights: NetworkWeights):
 
         if hasattr(self.sd_module, 'weight'):
             self.shape = self.sd_module.weight.shape
+        elif isinstance(self.sd_module, nn.MultiheadAttention):
+            # For now, only self-attn use Pytorch's MHA
+            # So assume all qkvo proj have same shape
+            self.shape = self.sd_module.out_proj.weight.shape
+        else:
+            self.shape = None
 
         self.ops = None
         self.extra_kwargs = {}
@@ -146,6 +151,9 @@ def __init__(self, net: Network, weights: NetworkWeights):
         self.alpha = weights.w["alpha"].item() if "alpha" in weights.w else None
         self.scale = weights.w["scale"].item() if "scale" in weights.w else None
 
+        self.dora_scale = weights.w.get("dora_scale", None)
+        self.dora_norm_dims = len(self.shape) - 1
+
     def multiplier(self):
         if 'transformer' in self.sd_key[:20]:
             return self.network.te_multiplier
@@ -160,6 +168,27 @@ def calc_scale(self):
 
         return 1.0
 
+    def apply_weight_decompose(self, updown, orig_weight):
+        # Match the device/dtype
+        orig_weight = orig_weight.to(updown.dtype)
+        dora_scale = self.dora_scale.to(device=orig_weight.device, dtype=updown.dtype)
+        updown = updown.to(orig_weight.device)
+
+        merged_scale1 = updown + orig_weight
+        merged_scale1_norm = (
+            merged_scale1.transpose(0, 1)
+            .reshape(merged_scale1.shape[1], -1)
+            .norm(dim=1, keepdim=True)
+            .reshape(merged_scale1.shape[1], *[1] * self.dora_norm_dims)
+            .transpose(0, 1)
+        )
+
+        dora_merged = (
+            merged_scale1 * (dora_scale / merged_scale1_norm)
+        )
+        final_updown = dora_merged - orig_weight
+        return final_updown
+
     def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
         if self.bias is not None:
             updown = updown.reshape(self.bias.shape)
@@ -175,6 +204,9 @@ def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
         if ex_bias is not None:
             ex_bias = ex_bias * self.multiplier()
 
+        if self.dora_scale is not None:
+            updown = self.apply_weight_decompose(updown, orig_weight)
+
         return updown * self.calc_scale() * self.multiplier(), ex_bias
 
     def calc_updown(self, target):
 
@@ -36,13 +36,6 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
             # self.alpha is unused
             self.dim = self.oft_blocks.shape[1] # (num_blocks, block_size, block_size)
 
-        # LyCORIS BOFT
-        if self.oft_blocks.dim() == 4:
-            self.is_boft = True
-        self.rescale = weights.w.get('rescale', None)
-        if self.rescale is not None:
-            self.rescale = self.rescale.reshape(-1, *[1]*(self.org_module[0].weight.dim() - 1))
-
         is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear]
         is_conv = type(self.sd_module) in [torch.nn.Conv2d]
         is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention] # unsupported
@@ -54,6 +47,13 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
         elif is_other_linear:
             self.out_dim = self.sd_module.embed_dim
 
+        # LyCORIS BOFT
+        if self.oft_blocks.dim() == 4:
+            self.is_boft = True
+        self.rescale = weights.w.get('rescale', None)
+        if self.rescale is not None and not is_other_linear:
+            self.rescale = self.rescale.reshape(-1, *[1]*(self.org_module[0].weight.dim() - 1))
+
         self.num_blocks = self.dim
         self.block_size = self.out_dim // self.dim
         self.constraint = (0 if self.alpha is None else self.alpha) * self.out_dim
 
@@ -355,7 +355,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
     """
     Applies the currently selected set of networks to the weights of torch layer self.
     If weights already have this particular set of networks applied, does nothing.
-    If not, restores orginal weights from backup and alters weights according to networks.
+    If not, restores original weights from backup and alters weights according to networks.
     """
 
     network_layer_name = getattr(self, 'network_layer_name', None)
@@ -429,9 +429,12 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
             if isinstance(self, torch.nn.MultiheadAttention) and module_q and module_k and module_v and module_out:
                 try:
                     with torch.no_grad():
-                        updown_q, _ = module_q.calc_updown(self.in_proj_weight)
-                        updown_k, _ = module_k.calc_updown(self.in_proj_weight)
-                        updown_v, _ = module_v.calc_updown(self.in_proj_weight)
+                        # Send "real" orig_weight into MHA's lora module
+                        qw, kw, vw = self.in_proj_weight.chunk(3, 0)
+                        updown_q, _ = module_q.calc_updown(qw)
+                        updown_k, _ = module_k.calc_updown(kw)
+                        updown_v, _ = module_v.calc_updown(vw)
+                        del qw, kw, vw
                         updown_qkv = torch.vstack([updown_q, updown_k, updown_v])
                         updown_out, ex_bias = module_out.calc_updown(self.out_proj.weight)
 
 
@@ -149,6 +149,8 @@ def generate_random_prompt_from_tags(self, tags):
 
             v = random.random() * max_count
             if count > v:
+                for x in "({[]})":
+                    tag = tag.replace(x, '\\' + x)
                 res.append(tag)
 
         return ", ".join(sorted(res))
 
@@ -31,7 +31,7 @@ def create_item(self, name, index=None, enable_filter=True):
             "name": name,
             "filename": lora_on_disk.filename,
             "shorthash": lora_on_disk.shorthash,
-            "preview": self.find_preview(path),
+            "preview": self.find_preview(path) or self.find_embedded_preview(path, name, lora_on_disk.metadata),
             "description": self.find_description(path),
             "search_terms": search_terms,
             "local_preview": f"{path}.{shared.opts.samples_format}",