From 7ef6a7cc2bf7a6fc76591daf8d3429c990206fed Mon Sep 17 00:00:00 2001 From: Nyakku Shigure Date: Tue, 23 Jul 2024 16:20:10 +0800 Subject: [PATCH] Use tensor in `window_reverse` to avoid precision issue (#617) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 暂时避免触发 scale OP,将 `window_size` 转为 Tensor 以触发 --- deploy/groundingdino/predict.py | 2 +- paddlemix/examples/groundingdino/run_predict.py | 2 +- paddlemix/models/audioldm2/clap_module/htsat_model.py | 2 +- paddlemix/models/groundingdino/backbone/swin_transformer.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy/groundingdino/predict.py b/deploy/groundingdino/predict.py index b2d11ff94..b49a41e0e 100644 --- a/deploy/groundingdino/predict.py +++ b/deploy/groundingdino/predict.py @@ -123,7 +123,7 @@ def plot_boxes_to_image(image_pil, tgt): # draw boxes and masks for box, label in zip(boxes, labels): # from 0..1 to 0..W, 0..H - box = box * paddle.to_tensor([W, H, W, H]) + box = box * paddle.to_tensor([W, H, W, H]).astype(paddle.float32) # from xywh to xyxy box[:2] -= box[2:] / 2 box[2:] += box[:2] diff --git a/paddlemix/examples/groundingdino/run_predict.py b/paddlemix/examples/groundingdino/run_predict.py index dc61f0324..8641fe3df 100644 --- a/paddlemix/examples/groundingdino/run_predict.py +++ b/paddlemix/examples/groundingdino/run_predict.py @@ -40,7 +40,7 @@ def plot_boxes_to_image(image_pil, tgt): # draw boxes and masks for box, label in zip(boxes, labels): # from 0..1 to 0..W, 0..H - box = box * paddle.to_tensor([W, H, W, H]) + box = box * paddle.to_tensor([W, H, W, H]).astype(paddle.float32) # from xywh to xyxy box[:2] -= box[2:] / 2 box[2:] += box[:2] diff --git a/paddlemix/models/audioldm2/clap_module/htsat_model.py b/paddlemix/models/audioldm2/clap_module/htsat_model.py index cb654ccdf..5005878d0 100644 --- a/paddlemix/models/audioldm2/clap_module/htsat_model.py +++ b/paddlemix/models/audioldm2/clap_module/htsat_model.py @@ -214,7 +214,7 @@ def window_reverse(windows, window_size, H, W): Returns: x: (B, H, W, C) """ - B = int(windows.shape[0] / (H * W / window_size / window_size)) + B = int(windows.shape[0] / (H * W / paddle.to_tensor(window_size * window_size))) x = windows.reshape([B, H // window_size, W // window_size, window_size, window_size, -1]) x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1]) return x diff --git a/paddlemix/models/groundingdino/backbone/swin_transformer.py b/paddlemix/models/groundingdino/backbone/swin_transformer.py index 76ef99777..7e8cac3cd 100644 --- a/paddlemix/models/groundingdino/backbone/swin_transformer.py +++ b/paddlemix/models/groundingdino/backbone/swin_transformer.py @@ -164,7 +164,7 @@ def window_reverse(windows, window_size, H, W): Returns: x: (B, H, W, C) """ - B = int(windows.shape[0] / (H * W / window_size / window_size)) + B = int(windows.shape[0] / (H * W / paddle.to_tensor(window_size * window_size))) x = windows.reshape([B, H // window_size, W // window_size, window_size, window_size, -1]) x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1]) return x