adamkarvonen · adamkarvonen · Feb 21, 2025 · Feb 20, 2025
diff --git a/sae_bench/custom_saes/batch_topk_sae.py b/sae_bench/custom_saes/batch_topk_sae.py
@@ -8,6 +8,9 @@
 
 
 class BatchTopKSAE(base_sae.BaseSAE):
+    threshold: torch.Tensor
+    k: torch.Tensor
+
     def __init__(
         self,
         d_in: int,
@@ -47,7 +50,7 @@ def encode(self, x: torch.Tensor):
             )
             return encoded_acts_BF
 
-        post_topk = post_relu_feat_acts_BF.topk(self.k, sorted=False, dim=-1)
+        post_topk = post_relu_feat_acts_BF.topk(self.k, sorted=False, dim=-1)  # type: ignore
 
         tops_acts_BK = post_topk.values
         top_indices_BK = post_topk.indices

diff --git a/sae_bench/custom_saes/topk_sae.py b/sae_bench/custom_saes/topk_sae.py
@@ -8,6 +8,9 @@
 
 
 class TopKSAE(base_sae.BaseSAE):
+    threshold: torch.Tensor
+    k: torch.Tensor
+
     def __init__(
         self,
         d_in: int,
@@ -49,7 +52,7 @@ def encode(self, x: torch.Tensor):
             )
             return encoded_acts_BF
 
-        post_topk = post_relu_feat_acts_BF.topk(self.k, sorted=False, dim=-1)
+        post_topk = post_relu_feat_acts_BF.topk(self.k, sorted=False, dim=-1)  # type: ignore
 
         tops_acts_BK = post_topk.values
         top_indices_BK = post_topk.indices

diff --git a/sae_bench/evals/unlearning/utils/feature_activation.py b/sae_bench/evals/unlearning/utils/feature_activation.py
@@ -95,7 +95,7 @@ def gather_target_act_hook(mod, inputs, outputs):
         target_act = outputs[0]
         return outputs
 
-    handle = model.model.layers[target_layer].register_forward_hook(
+    handle = model.model.layers[target_layer].register_forward_hook(  # type: ignore
         gather_target_act_hook
     )
     _ = model.forward(inputs)  # type: ignore

diff --git a/sae_bench/sae_bench_utils/activation_collection.py b/sae_bench/sae_bench_utils/activation_collection.py
@@ -80,7 +80,7 @@ def activation_hook(resid_BLD: torch.Tensor, hook):
 
         if mask_bos_pad_eos_tokens:
             attn_mask_BL = get_bos_pad_eos_mask(tokens_BL, model.tokenizer)
-            acts_BLD = acts_BLD * attn_mask_BL[:, :, None]
+            acts_BLD = acts_BLD * attn_mask_BL[:, :, None]  # type: ignore
 
         all_acts_BLD.append(acts_BLD)
 
@@ -375,7 +375,7 @@ def encode_precomputed_activations(
                 sae_act_BLF = sae_act_BLF[:, :, selected_latents]
 
             if mask_bos_pad_eos_tokens:
-                attn_mask_BL = get_bos_pad_eos_mask(tokens_BL, sae.model.tokenizer)
+                attn_mask_BL = get_bos_pad_eos_mask(tokens_BL, sae.model.tokenizer)  # type: ignore
             else:
                 attn_mask_BL = torch.ones_like(tokens_BL, dtype=torch.bool)
 

diff --git a/tests/unit/evals/absorption/test_k_sparse_probing.py b/tests/unit/evals/absorption/test_k_sparse_probing.py
@@ -13,9 +13,9 @@
 
 def test_train_sparse_multi_probe_results_in_many_zero_weights():
     torch.set_grad_enabled(True)
-    x = torch.rand(1000, 100)
+    x = torch.rand(1000, 200)
     y = torch.randint(2, (1000, 3))
-    probe1 = train_sparse_multi_probe(x, y, l1_decay=0.01, device=torch.device("cpu"))
+    probe1 = train_sparse_multi_probe(x, y, l1_decay=0.03, device=torch.device("cpu"))
     probe2 = train_sparse_multi_probe(x, y, l1_decay=0.1, device=torch.device("cpu"))
 
     probe1_zero_weights = (probe1.weights.abs() < 1e-5).sum()