@@ -31,6 +31,7 @@ class TestCLIPImageTransform:
31
31
[
32
32
{
33
33
"image_size" : (100 , 400 , 3 ),
34
+ "image_type" : "PIL.Image" ,
34
35
"expected_shape" : torch .Size ([2 , 3 , 224 , 224 ]),
35
36
"resize_to_max_canvas" : False ,
36
37
"expected_tile_means" : [0.2230 , 0.1763 ],
@@ -40,6 +41,7 @@ class TestCLIPImageTransform:
40
41
},
41
42
{
42
43
"image_size" : (1000 , 300 , 3 ),
44
+ "image_type" : "PIL.Image" ,
43
45
"expected_shape" : torch .Size ([4 , 3 , 224 , 224 ]),
44
46
"resize_to_max_canvas" : True ,
45
47
"expected_tile_means" : [0.5007 , 0.4995 , 0.5003 , 0.1651 ],
@@ -49,6 +51,7 @@ class TestCLIPImageTransform:
49
51
},
50
52
{
51
53
"image_size" : (200 , 200 , 3 ),
54
+ "image_type" : "PIL.Image" ,
52
55
"expected_shape" : torch .Size ([4 , 3 , 224 , 224 ]),
53
56
"resize_to_max_canvas" : True ,
54
57
"expected_tile_means" : [0.5012 , 0.5020 , 0.5011 , 0.4991 ],
@@ -59,6 +62,48 @@ class TestCLIPImageTransform:
59
62
},
60
63
{
61
64
"image_size" : (600 , 200 , 3 ),
65
+ "image_type" : "torch.Tensor" ,
66
+ "expected_shape" : torch .Size ([3 , 3 , 224 , 224 ]),
67
+ "resize_to_max_canvas" : False ,
68
+ "expected_tile_means" : [0.4473 , 0.4469 , 0.3032 ],
69
+ "expected_tile_max" : [1.0 , 1.0 , 1.0 ],
70
+ "expected_tile_min" : [0.0 , 0.0 , 0.0 ],
71
+ "expected_aspect_ratio" : [3 , 1 ],
72
+ },
73
+ {
74
+ "image_size" : (100 , 400 , 3 ),
75
+ "image_type" : "torch.Tensor" ,
76
+ "expected_shape" : torch .Size ([2 , 3 , 224 , 224 ]),
77
+ "resize_to_max_canvas" : False ,
78
+ "expected_tile_means" : [0.2230 , 0.1763 ],
79
+ "expected_tile_max" : [1.0 , 1.0 ],
80
+ "expected_tile_min" : [0.0 , 0.0 ],
81
+ "expected_aspect_ratio" : [1 , 2 ],
82
+ },
83
+ {
84
+ "image_size" : (1000 , 300 , 3 ),
85
+ "image_type" : "torch.Tensor" ,
86
+ "expected_shape" : torch .Size ([4 , 3 , 224 , 224 ]),
87
+ "resize_to_max_canvas" : True ,
88
+ "expected_tile_means" : [0.5007 , 0.4995 , 0.5003 , 0.1651 ],
89
+ "expected_tile_max" : [0.9705 , 0.9694 , 0.9521 , 0.9314 ],
90
+ "expected_tile_min" : [0.0353 , 0.0435 , 0.0528 , 0.0 ],
91
+ "expected_aspect_ratio" : [4 , 1 ],
92
+ },
93
+ {
94
+ "image_size" : (200 , 200 , 3 ),
95
+ "image_type" : "torch.Tensor" ,
96
+ "expected_shape" : torch .Size ([4 , 3 , 224 , 224 ]),
97
+ "resize_to_max_canvas" : True ,
98
+ "expected_tile_means" : [0.5012 , 0.5020 , 0.5011 , 0.4991 ],
99
+ "expected_tile_max" : [0.9922 , 0.9926 , 0.9970 , 0.9908 ],
100
+ "expected_tile_min" : [0.0056 , 0.0069 , 0.0059 , 0.0033 ],
101
+ "expected_aspect_ratio" : [2 , 2 ],
102
+ "pad_tiles" : 1 ,
103
+ },
104
+ {
105
+ "image_size" : (600 , 200 , 3 ),
106
+ "image_type" : "torch.Tensor" ,
62
107
"expected_shape" : torch .Size ([3 , 3 , 224 , 224 ]),
63
108
"resize_to_max_canvas" : False ,
64
109
"expected_tile_means" : [0.4473 , 0.4469 , 0.3032 ],
@@ -99,7 +144,10 @@ def test_clip_image_transform(self, params):
99
144
.reshape (image_size )
100
145
.astype (np .uint8 )
101
146
)
102
- image = PIL .Image .fromarray (image )
147
+ if params ["image_type" ] == "PIL.Image" :
148
+ image = PIL .Image .fromarray (image )
149
+ elif params ["image_type" ] == "torch.Tensor" :
150
+ image = torch .from_numpy (image ).permute (2 , 0 , 1 )
103
151
104
152
# Apply the transformation
105
153
output = image_transform ({"image" : image })
0 commit comments