feat: 🚩 add lpips+ and lpips-vgg+

chaofengc · chaofengc · commit b57dedeebffc · 2024-08-14T18:34:09.000+08:00
diff --git a/README.md b/README.md
@@ -36,6 +36,7 @@ This is a image quality assessment toolbox with **pure python and pytorch**. We
 ---
 
 ### :triangular_flag_on_post: Updates/Changelog
+- 🔥**Aug, 2024**. Add `lpips+` and `lpips-vgg+` proposed in our paper [TOPIQ](https://arxiv.org/abs/2308.03060). 
 - 🔥**June, 2024**. Add `arniqa` and its variances trained on different datasets, refer to official repo [here](https://github.com/miccunifi/ARNIQA). Thanks for the contribution from [Lorenzo Agnolucci](https://github.com/LorenzoAgnolucci) 🤗.
 - **Apr 24, 2024**. Add `inception_score` and console entry point with `pyiqa` command.
 - **Mar 11, 2024**. Add `unique`, refer to official repo [here](https://github.com/zwx8981/UNIQUE). Thanks for the contribution from [Weixia Zhang](https://github.com/zwx8981) 🤗.
diff --git a/benchmark_results.py b/benchmark_results.py
@@ -126,9 +126,9 @@ def main():
             for data in dataloader:
                 gt_labels += flatten_list(data['mos_label'].cpu().tolist())
                 if metric_mode == 'FR':
-                    iqa_score = iqa_model(data['img'], data['ref_img']).cpu().tolist()
+                    iqa_score = iqa_model(data['img'], data['ref_img']).squeeze().cpu().tolist()
                 else:
-                    iqa_score = iqa_model(data['img']).cpu().tolist()
+                    iqa_score = iqa_model(data['img']).squeeze().cpu().tolist()
                 result_scores += flatten_list(iqa_score)
                 pbar.update(1)
             pbar.close()
@@ -151,14 +151,12 @@ def main():
         if save_result_path is not None:
             csv_writer.writerow(results_row)
         
-        
-
     if save_result_path is not None:
         csv_file.close()
 
     if update_benchmark_file is not None:
+        benchmark = benchmark.sort_values(by=benchmark.columns[0], key=lambda x: x.str.split('/').str[0].astype(float))
         benchmark.to_csv(update_benchmark_file)
 
-
 if __name__ == '__main__':
     main()
diff --git a/pyiqa/archs/lpips_arch.py b/pyiqa/archs/lpips_arch.py
@@ -4,6 +4,14 @@
 
 Modified by: Jiadi Mo (https://github.com/JiadiMo)
 
+Reference:
+    Zhang, Richard, et al. "The unreasonable effectiveness of deep features as
+    a perceptual metric." Proceedings of the IEEE conference on computer vision
+    and pattern recognition. 2018.
+
+    TOPIQ: A Top-down Approach from Semantics to Distortions for Image Quality Assessment.
+    Chaofeng Chen, Jiadi Mo, Jingwen Hou, Haoning Wu, Liang Liao, Wenxiu Sun, Qiong Yan, Weisi Lin.
+    Transactions on Image Processing, 2024.
 """
 
 import torch
@@ -63,10 +71,6 @@ class LPIPS(nn.Module):
         pnet_tune (Boolean): Whether to tune the base/trunk network.
         use_dropout (Boolean): Whether to use dropout when training linear layers.
 
-    Reference:
-        Zhang, Richard, et al. "The unreasonable effectiveness of deep features as
-        a perceptual metric." Proceedings of the IEEE conference on computer vision
-        and pattern recognition. 2018.
 
         """
 
@@ -81,6 +85,7 @@ def __init__(self,
                  use_dropout=True,
                  pretrained_model_path=None,
                  eval_mode=True,
+                 semantic_weight_layer=-1,
                  **kwargs):
 
         super(LPIPS, self).__init__()
@@ -93,6 +98,8 @@ def __init__(self,
         self.version = version
         self.scaling_layer = ScalingLayer()
 
+        self.semantic_weight_layer = semantic_weight_layer 
+
         if (self.pnet_type in ['vgg', 'vgg16']):
             net_type = vgg16
             self.chns = [64, 128, 256, 512, 512]
@@ -156,8 +163,16 @@ def forward(self, in1, in0, retPerLayer=False, normalize=True):
             diffs[kk] = (feats0[kk] - feats1[kk])**2
 
         if (self.lpips):
-            if (self.spatial):
+            if self.spatial:
                 res = [upsample(self.lins[kk](diffs[kk]), out_HW=in0.shape[2:]) for kk in range(self.L)]
+            elif self.semantic_weight_layer >= 0:
+                res = []
+                semantic_feat = outs0[self.semantic_weight_layer] 
+                for kk in range(self.L):
+                    diff_score = self.lins[kk](diffs[kk])
+                    semantic_weight = torch.nn.functional.interpolate(semantic_feat, size=diff_score.shape[2:], mode='bilinear', align_corners=False)
+                    avg_score = torch.sum(diff_score * semantic_weight, dim=[1, 2, 3], keepdim=True) / torch.sum(semantic_weight, dim=[1, 2, 3], keepdim=True)
+                    res.append(avg_score)
             else:
                 res = [spatial_average(self.lins[kk](diffs[kk]), keepdim=True) for kk in range(self.L)]
         else:
diff --git a/pyiqa/archs/topiq_arch.py b/pyiqa/archs/topiq_arch.py
@@ -2,7 +2,7 @@
 
 TOPIQ: A Top-down Approach from Semantics to Distortions for Image Quality Assessment.
 Chaofeng Chen, Jiadi Mo, Jingwen Hou, Haoning Wu, Liang Liao, Wenxiu Sun, Qiong Yan, Weisi Lin.
-Arxiv 2023.
+Transactions on Image Processing, 2024.
 
 Paper link: https://arxiv.org/abs/2308.03060
 
diff --git a/pyiqa/default_model_configs.py b/pyiqa/default_model_configs.py
@@ -31,6 +31,26 @@
         'metric_mode': 'FR',
         'lower_better': True,
     },
+    'lpips+': {
+        'metric_opts': {
+            'type': 'LPIPS',
+            'net': 'alex',
+            'version': '0.1',
+            'semantic_weight_layer': 2,
+        },
+        'metric_mode': 'FR',
+        'lower_better': True,
+    },
+    'lpips-vgg+': {
+        'metric_opts': {
+            'type': 'LPIPS',
+            'net': 'vgg',
+            'version': '0.1',
+            'semantic_weight_layer': 2,
+        },
+        'metric_mode': 'FR',
+        'lower_better': True,
+    },
     'stlpips': {
         'metric_opts': {
             'type': 'STLPIPS',
diff --git a/tests/FR_benchmark_results.csv b/tests/FR_benchmark_results.csv
@@ -1,13 +1,17 @@
 Metric name,csiq(PLCC/SRCC/KRCC),live(PLCC/SRCC/KRCC),tid2008(PLCC/SRCC/KRCC),tid2013(PLCC/SRCC/KRCC)
-psnr,0.7857/0.8087/0.5989,0.7633/0.8013/0.5964,0.489/0.5245/0.3696,0.6601/0.6869/0.4958
+cw_ssim,0.6078/0.7588/0.5562,0.5714/0.7681/0.5673,0.5965/0.6473/0.4625,0.5815/0.6533/0.4715
 ssim,0.765/0.8367/0.6323,0.7369/0.8509/0.6547,0.6003/0.6242/0.4521,0.6558/0.6269/0.455
 ms_ssim,0.7717/0.9125/0.7372,0.679/0.9027/0.7227,0.7894/0.8531/0.6555,0.7814/0.7852/0.6033
-cw_ssim,0.6078/0.7588/0.5562,0.5714/0.7681/0.5673,0.5965/0.6473/0.4625,0.5815/0.6533/0.4715
+psnr,0.7857/0.8087/0.5989,0.7633/0.8013/0.5964,0.489/0.5245/0.3696,0.6601/0.6869/0.4958
 fsim,0.8207/0.9309/0.7683,0.7747/0.9204/0.7515,0.8341/0.884/0.6991,0.8322/0.8509/0.6665
-vif,0.9219/0.9194/0.7532,0.9409/0.9526/0.8067,0.7769/0.7491/0.5861,0.7336/0.677/0.5148
-lpips,0.9005/0.9233/0.7499,0.7672/0.869/0.6768,0.711/0.7151/0.5221,0.7529/0.7445/0.5477
-dists,0.9324/0.9296/0.7644,0.8392/0.9051/0.7283,0.7032/0.6648/0.4861,0.7538/0.7077/0.5212
-pieapp,0.838/0.8968/0.7109,0.8577/0.9182/0.7491,0.6443/0.7971/0.6089,0.7195/0.8438/0.6571
+stlpips,0.823/0.8952/0.7094,0.813/0.8826/0.6931,0.624/0.6404/0.454,0.7147/0.7365/0.5387
 ahiq,0.8234/0.8273/0.6168,0.8039/0.8967/0.7066,0.6772/0.6807/0.4842,0.7379/0.7075/0.5127
+pieapp,0.838/0.8968/0.7109,0.8577/0.9182/0.7491,0.6443/0.7971/0.6089,0.7195/0.8438/0.6571
+lpips,0.9005/0.9233/0.7499,0.7672/0.869/0.6768,0.711/0.7151/0.5221,0.7529/0.7445/0.5477
+lpips+,0.9041/0.9285/0.7575,0.8455/0.9248/0.7546,0.7318/0.7379/0.5424,0.7656/0.7622/0.5639
+lpips-vgg,0.9043/0.883/0.6968,0.9336/0.9318/0.7646,0.6974/0.6536/0.4822,0.7324/0.6696/0.497
 wadiqam_fr,0.9087/0.922/0.7461,0.9163/0.9308/0.7584,0.8221/0.8222/0.6245,0.8424/0.8264/0.628
+lpips-vgg+,0.9169/0.894/0.7128,0.9499/0.9503/0.7983,0.7406/0.6869/0.5113,0.7606/0.6913/0.5152
+vif,0.9219/0.9194/0.7532,0.9409/0.9526/0.8067,0.7769/0.7491/0.5861,0.7336/0.677/0.5148
+dists,0.9324/0.9296/0.7644,0.8392/0.9051/0.7283,0.7032/0.6648/0.4861,0.7538/0.7077/0.5212
 topiq_fr,0.9589/0.9674/0.8379,0.9542/0.9759/0.8617,0.9044/0.9226/0.7554,0.9158/0.9165/0.7441