From 2b2a6d36b6a1a1f561cc86ecdd0f425568bd2378 Mon Sep 17 00:00:00 2001
From: Johnny Lin <hijohnnylin@gmail.com>
Date: Mon, 27 Jan 2025 12:45:07 -0800
Subject: [PATCH] fix: Update json schema jsons

---
 ...output_schema_absorption_first_letter.json | 56 ++++++++++++---
 .../eval_output_schema_autointerp.json        |  8 ++-
 .../evals/core/eval_output_schema_core.json   | 72 ++++++++++++++++++-
 sae_bench/evals/generate_json_schemas.py      |  4 +-
 .../eval_output_schema_sparse_probing.json    | 11 ++-
 5 files changed, 134 insertions(+), 17 deletions(-)

diff --git a/sae_bench/evals/absorption/eval_output_schema_absorption_first_letter.json b/sae_bench/evals/absorption/eval_output_schema_absorption_first_letter.json
index 609c027..ba56f35 100644
--- a/sae_bench/evals/absorption/eval_output_schema_absorption_first_letter.json
+++ b/sae_bench/evals/absorption/eval_output_schema_absorption_first_letter.json
@@ -74,9 +74,15 @@
     },
     "AbsorptionMeanMetrics": {
       "properties": {
-        "mean_absorption_score": {
-          "description": "Average of the absorption scores across all letters",
-          "title": "Mean Absorption Score",
+        "mean_absorption_fraction_score": {
+          "description": "Average of the absorption fraction scores across all letters",
+          "title": "Mean Absorption Fraction Score",
+          "type": "number",
+          "ui_default_display": true
+        },
+        "mean_full_absorption_score": {
+          "description": "Average of the full absorption scores across all letters",
+          "title": "Mean Full Absorption Score",
           "type": "number",
           "ui_default_display": true
         },
@@ -85,11 +91,33 @@
           "title": "Mean Number of Split Features",
           "type": "number",
           "ui_default_display": true
+        },
+        "std_dev_absorption_fraction_score": {
+          "description": "Standard deviation of the absorption fraction scores across all letters",
+          "title": "Standard Deviation of Absorption Fraction Score",
+          "type": "number",
+          "ui_default_display": true
+        },
+        "std_dev_full_absorption_score": {
+          "description": "Standard deviation of the full absorption scores across all letters",
+          "title": "Standard Deviation of Full Absorption Score",
+          "type": "number",
+          "ui_default_display": true
+        },
+        "std_dev_num_split_features": {
+          "description": "Standard deviation of the number of split features across all letters",
+          "title": "Standard Deviation of Number of Split Features",
+          "type": "number",
+          "ui_default_display": true
         }
       },
       "required": [
-        "mean_absorption_score",
-        "mean_num_split_features"
+        "mean_absorption_fraction_score",
+        "mean_full_absorption_score",
+        "mean_num_split_features",
+        "std_dev_absorption_fraction_score",
+        "std_dev_full_absorption_score",
+        "std_dev_num_split_features"
       ],
       "title": "AbsorptionMeanMetrics",
       "type": "object"
@@ -116,14 +144,19 @@
           "title": "First Letter",
           "type": "string"
         },
-        "absorption_rate": {
+        "mean_absorption_fraction": {
+          "description": "",
+          "title": "Mean Absorption Fraction",
+          "type": "number"
+        },
+        "full_absorption_rate": {
           "description": "",
-          "title": "Absorption Rate",
+          "title": "Rate of Full Absorption",
           "type": "number"
         },
-        "num_absorption": {
+        "num_full_absorption": {
           "description": "",
-          "title": "Num Absorption",
+          "title": "Num Full Absorption",
           "type": "integer"
         },
         "num_probe_true_positives": {
@@ -139,8 +172,9 @@
       },
       "required": [
         "first_letter",
-        "absorption_rate",
-        "num_absorption",
+        "mean_absorption_fraction",
+        "full_absorption_rate",
+        "num_full_absorption",
         "num_probe_true_positives",
         "num_split_features"
       ],
diff --git a/sae_bench/evals/autointerp/eval_output_schema_autointerp.json b/sae_bench/evals/autointerp/eval_output_schema_autointerp.json
index 3ac70db..df4e31a 100644
--- a/sae_bench/evals/autointerp/eval_output_schema_autointerp.json
+++ b/sae_bench/evals/autointerp/eval_output_schema_autointerp.json
@@ -164,10 +164,16 @@
           "title": "AutoInterp Score",
           "type": "number",
           "ui_default_display": true
+        },
+        "autointerp_std_dev": {
+          "description": "AutoInterp detection score standard deviation over all tested features",
+          "title": "AutoInterp Standard Deviation",
+          "type": "number"
         }
       },
       "required": [
-        "autointerp_score"
+        "autointerp_score",
+        "autointerp_std_dev"
       ],
       "title": "AutoInterpMetrics",
       "type": "object"
diff --git a/sae_bench/evals/core/eval_output_schema_core.json b/sae_bench/evals/core/eval_output_schema_core.json
index add9156..47fa525 100644
--- a/sae_bench/evals/core/eval_output_schema_core.json
+++ b/sae_bench/evals/core/eval_output_schema_core.json
@@ -133,6 +133,16 @@
           "description": "Cosine similarity between encoder and decoder weights for each feature",
           "title": "Encoder-Decoder Cosine Similarity",
           "type": "number"
+        },
+        "max_decoder_cosine_sim": {
+          "description": "Maximum cosine similarity with any other feature's decoder weights",
+          "title": "Max Decoder Cosine Similarity",
+          "type": "number"
+        },
+        "max_encoder_cosine_sim": {
+          "description": "Maximum cosine similarity with any other feature's encoder weights",
+          "title": "Max Encoder Cosine Similarity",
+          "type": "number"
         }
       },
       "required": [
@@ -141,7 +151,9 @@
         "consistent_activation_heuristic",
         "encoder_bias",
         "encoder_norm",
-        "encoder_decoder_cosine_sim"
+        "encoder_decoder_cosine_sim",
+        "max_decoder_cosine_sim",
+        "max_encoder_cosine_sim"
       ],
       "title": "CoreFeatureMetric",
       "type": "object"
@@ -177,6 +189,11 @@
           "$ref": "#/$defs/TokenStatsMetrics",
           "description": "Statistics about the number of tokens used in evaluation",
           "title": "Token Statistics"
+        },
+        "misc_metrics": {
+          "$ref": "#/$defs/MiscMetrics",
+          "description": "Miscellaneous metrics",
+          "title": "Miscellaneous Metrics"
         }
       },
       "required": [
@@ -185,11 +202,62 @@
         "reconstruction_quality",
         "shrinkage",
         "sparsity",
-        "token_stats"
+        "token_stats",
+        "misc_metrics"
       ],
       "title": "CoreMetricCategories",
       "type": "object"
     },
+    "MiscMetrics": {
+      "properties": {
+        "freq_over_1_percent": {
+          "description": "Proportion of tokens that activate each feature more than 1% of the time",
+          "title": "Activation Frequency Over 1%",
+          "type": "number"
+        },
+        "freq_over_10_percent": {
+          "description": "Proportion of tokens that activate each feature more than 10% of the time",
+          "title": "Activation Frequency Over 10%",
+          "type": "number"
+        },
+        "normalized_freq_over_1_percent": {
+          "description": "Sum of > 1% activation frequency probabilities, normalized by the sum of all feature probabilities",
+          "title": "Normalized Activation Frequency Over 1%",
+          "type": "number"
+        },
+        "normalized_freq_over_10_percent": {
+          "description": "Sum of > 10% activation frequency probabilities, normalized by the sum of all feature probabilities",
+          "title": "Normalized Activation Frequency Over 10%",
+          "type": "number"
+        },
+        "average_max_encoder_cosine_sim": {
+          "description": "Average of the maximum cosine similarity with any other feature's encoder weights",
+          "title": "Average Max Encoder Cosine Similarity",
+          "type": "number"
+        },
+        "average_max_decoder_cosine_sim": {
+          "description": "Average of the maximum cosine similarity with any other feature's decoder weights",
+          "title": "Average Max Decoder Cosine Similarity",
+          "type": "number"
+        },
+        "frac_alive": {
+          "description": "Fraction of features that fired at least once during evaluation. This will likely be an underestimation due to a limited amount of tokens",
+          "title": "Fraction of Alive Features",
+          "type": "number"
+        }
+      },
+      "required": [
+        "freq_over_1_percent",
+        "freq_over_10_percent",
+        "normalized_freq_over_1_percent",
+        "normalized_freq_over_10_percent",
+        "average_max_encoder_cosine_sim",
+        "average_max_decoder_cosine_sim",
+        "frac_alive"
+      ],
+      "title": "MiscMetrics",
+      "type": "object"
+    },
     "ModelBehaviorPreservationMetrics": {
       "properties": {
         "kl_div_score": {
diff --git a/sae_bench/evals/generate_json_schemas.py b/sae_bench/evals/generate_json_schemas.py
index 695919c..46e83ac 100644
--- a/sae_bench/evals/generate_json_schemas.py
+++ b/sae_bench/evals/generate_json_schemas.py
@@ -21,7 +21,9 @@ def main():
             if file == "eval_output.py":
                 print(file)
                 module_path = os.path.relpath(os.path.join(root, file), base_dir)
-                module_name = module_path.replace("/", ".").replace(".py", "")
+                module_name = "sae_bench." + module_path.replace("/", ".").replace(
+                    ".py", ""
+                )
 
                 try:
                     module = __import__(module_name, fromlist=[""])
diff --git a/sae_bench/evals/sparse_probing/eval_output_schema_sparse_probing.json b/sae_bench/evals/sparse_probing/eval_output_schema_sparse_probing.json
index 67e0b5c..8cc50b7 100644
--- a/sae_bench/evals/sparse_probing/eval_output_schema_sparse_probing.json
+++ b/sae_bench/evals/sparse_probing/eval_output_schema_sparse_probing.json
@@ -41,10 +41,17 @@
           "type": "integer"
         },
         "llm_batch_size": {
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "type": "null"
+            }
+          ],
           "default": null,
           "description": "LLM batch size. This is set by default in the main script, or it can be set with a command line argument.",
-          "title": "LLM Batch Size",
-          "type": "integer"
+          "title": "LLM Batch Size"
         },
         "llm_dtype": {
           "default": "",