pytorch
diff --git a/‎packaging/pre_build_script.sh
+7-6 b/‎packaging/pre_build_script.sh
+7-6
diff --git a/‎setup.py
+10-3 b/‎setup.py
+10-3
diff --git a/‎test/common_utils.py
+18 b/‎test/common_utils.py
+18
diff --git a/‎test/datasets_utils.py
+18-5 b/‎test/datasets_utils.py
+18-5
diff --git a/‎test/test_datasets.py
+42 b/‎test/test_datasets.py
+42
diff --git a/‎test/test_image.py
+36 b/‎test/test_image.py
+36
@@ -9,15 +9,16 @@ if [[ "$(uname)" == Darwin ]]; then
     brew uninstall --ignore-dependencies --force $pkg || true
   done
 
-  conda install -yq wget
+  conda install -y wget
 fi
 
 if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then
-  conda install libpng libwebp -yq
+  conda install libpng libwebp -y
   # Installing webp also installs a non-turbo jpeg, so we uninstall jpeg stuff
   # before re-installing them
   conda uninstall libjpeg-turbo libjpeg -y
-  conda install -yq ffmpeg=4.2 libjpeg-turbo -c pytorch
+  conda install -y ffmpeg=4.2 -c pytorch
+  conda install -y libjpeg-turbo -c pytorch
 
   # Copy binaries to be included in the wheel distribution
   if [[ "$OSTYPE" == "msys" ]]; then
@@ -28,11 +29,11 @@ if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then
 else
 
   if [[ "$ARCH" == "aarch64" ]]; then
-    conda install libpng -yq
-    conda install -yq ffmpeg=4.2 libjpeg-turbo -c pytorch-nightly
+    conda install libpng -y
+    conda install -y ffmpeg=4.2 libjpeg-turbo -c pytorch-nightly
   fi
 
-  conda install libwebp -yq
+  conda install libwebp -y
   conda install libjpeg-turbo -c pytorch
   yum install -y freetype gnutls
   pip install auditwheel
 
@@ -2,6 +2,7 @@
 import distutils.spawn
 import glob
 import os
+import shlex
 import shutil
 import subprocess
 import sys
@@ -95,8 +96,14 @@ def get_dist(pkgname):
             return None
 
     pytorch_dep = os.getenv("TORCH_PACKAGE_NAME", "torch")
-    if os.getenv("PYTORCH_VERSION"):
-        pytorch_dep += "==" + os.getenv("PYTORCH_VERSION")
+    if version_pin := os.getenv("PYTORCH_VERSION"):
+        pytorch_dep += "==" + version_pin
+    elif (version_pin_ge := os.getenv("PYTORCH_VERSION_GE")) and (version_pin_lt := os.getenv("PYTORCH_VERSION_LT")):
+        # This branch and the associated env vars exist to help third-party
+        # builds like in https://github.com/pytorch/vision/pull/8936. This is
+        # supported on a best-effort basis, we don't guarantee that this won't
+        # eventually break (and we don't test it.)
+        pytorch_dep += f">={version_pin_ge},<{version_pin_lt}"
 
     requirements = [
         "numpy",
@@ -123,7 +130,7 @@ def get_macros_and_flags():
             if NVCC_FLAGS is None:
                 nvcc_flags = []
             else:
-                nvcc_flags = NVCC_FLAGS.split(" ")
+                nvcc_flags = shlex.split(NVCC_FLAGS)
         extra_compile_args["nvcc"] = nvcc_flags
 
     if sys.platform == "win32":
 
@@ -423,6 +423,7 @@ def sample_position(values, max_value):
     h, w = [torch.randint(1, s, (num_boxes,)) for s in canvas_size]
     y = sample_position(h, canvas_size[0])
     x = sample_position(w, canvas_size[1])
+    r = -360 * torch.rand((num_boxes,)) + 180
 
     if format is tv_tensors.BoundingBoxFormat.XYWH:
         parts = (x, y, w, h)
@@ -435,6 +436,23 @@ def sample_position(values, max_value):
         cx = x + w / 2
         cy = y + h / 2
         parts = (cx, cy, w, h)
+    elif format is tv_tensors.BoundingBoxFormat.XYWHR:
+        parts = (x, y, w, h, r)
+    elif format is tv_tensors.BoundingBoxFormat.CXCYWHR:
+        cx = x + w / 2
+        cy = y + h / 2
+        parts = (cx, cy, w, h, r)
+    elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
+        r_rad = r * torch.pi / 180.0
+        cos, sin = torch.cos(r_rad), torch.sin(r_rad)
+        x1, y1 = x, y
+        x3 = x1 + w * cos
+        y3 = y1 - w * sin
+        x2 = x3 + h * sin
+        y2 = y3 + h * cos
+        x4 = x1 + h * sin
+        y4 = y1 + h * cos
+        parts = (x1, y1, x3, y3, x2, y2, x4, y4)
     else:
         raise ValueError(f"Format {format} is not supported")
 
 
@@ -611,6 +611,7 @@ class ImageDatasetTestCase(DatasetTestCase):
     """
 
     FEATURE_TYPES = (PIL.Image.Image, int)
+    SUPPORT_TV_IMAGE_DECODE: bool = False
 
     @contextlib.contextmanager
     def create_dataset(
@@ -632,22 +633,34 @@ def create_dataset(
             # This problem only occurs during testing since some tests, e.g. DatasetTestCase.test_feature_types open an
             # image, but never use the underlying data. During normal operation it is reasonable to assume that the
             # user wants to work with the image he just opened rather than deleting the underlying file.
-            with self._force_load_images():
+            with self._force_load_images(loader=(config or {}).get("loader", None)):
                 yield dataset, info
 
     @contextlib.contextmanager
-    def _force_load_images(self):
-        open = PIL.Image.open
+    def _force_load_images(self, loader: Optional[Callable[[str], Any]] = None):
+        open = loader or PIL.Image.open
 
         def new(fp, *args, **kwargs):
             image = open(fp, *args, **kwargs)
-            if isinstance(fp, (str, pathlib.Path)):
+            if isinstance(fp, (str, pathlib.Path)) and isinstance(image, PIL.Image.Image):
                 image.load()
             return image
 
-        with unittest.mock.patch("PIL.Image.open", new=new):
+        with unittest.mock.patch(open.__module__ + "." + open.__qualname__, new=new):
             yield
 
+    def test_tv_decode_image_support(self):
+        if not self.SUPPORT_TV_IMAGE_DECODE:
+            pytest.skip(f"{self.DATASET_CLASS.__name__} does not support torchvision.io.decode_image.")
+
+        with self.create_dataset(
+            config=dict(
+                loader=torchvision.io.decode_image,
+            )
+        ) as (dataset, _):
+            image = dataset[0][0]
+            assert isinstance(image, torch.Tensor)
+
 
 class VideoDatasetTestCase(DatasetTestCase):
     """Abstract base class for video dataset testcases.
 
@@ -24,6 +24,7 @@
 import torch.nn.functional as F
 from common_utils import combinations_grid
 from torchvision import datasets
+from torchvision.io import decode_image
 from torchvision.transforms import v2
 
 
@@ -405,6 +406,8 @@ class ImageNetTestCase(datasets_utils.ImageDatasetTestCase):
     REQUIRED_PACKAGES = ("scipy",)
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val"))
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir, config):
         tmpdir = pathlib.Path(tmpdir)
 
@@ -1173,6 +1176,8 @@ class SBUTestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.SBU
     FEATURE_TYPES = (PIL.Image.Image, str)
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir, config):
         num_images = 3
 
@@ -1411,6 +1416,8 @@ class Flickr8kTestCase(datasets_utils.ImageDatasetTestCase):
     _IMAGES_FOLDER = "images"
     _ANNOTATIONS_FILE = "captions.html"
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def dataset_args(self, tmpdir, config):
         tmpdir = pathlib.Path(tmpdir)
         root = tmpdir / self._IMAGES_FOLDER
@@ -1480,6 +1487,8 @@ class Flickr30kTestCase(Flickr8kTestCase):
 
     _ANNOTATIONS_FILE = "captions.token"
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def _image_file_name(self, idx):
         return f"{idx}.jpg"
 
@@ -1940,6 +1949,8 @@ class LFWPeopleTestCase(datasets_utils.DatasetTestCase):
     _IMAGES_DIR = {"original": "lfw", "funneled": "lfw_funneled", "deepfunneled": "lfw-deepfunneled"}
     _file_id = {"10fold": "", "train": "DevTrain", "test": "DevTest"}
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir, config):
         tmpdir = pathlib.Path(tmpdir) / "lfw-py"
         os.makedirs(tmpdir, exist_ok=True)
@@ -1976,6 +1987,18 @@ def _create_random_id(self):
         part2 = datasets_utils.create_random_string(random.randint(4, 7))
         return f"{part1}_{part2}"
 
+    def test_tv_decode_image_support(self):
+        if not self.SUPPORT_TV_IMAGE_DECODE:
+            pytest.skip(f"{self.DATASET_CLASS.__name__} does not support torchvision.io.decode_image.")
+
+        with self.create_dataset(
+            config=dict(
+                loader=decode_image,
+            )
+        ) as (dataset, _):
+            image = dataset[0][0]
+            assert isinstance(image, torch.Tensor)
+
 
 class LFWPairsTestCase(LFWPeopleTestCase):
     DATASET_CLASS = datasets.LFWPairs
@@ -2308,6 +2331,7 @@ def inject_fake_data(self, tmpdir, config):
 class EuroSATTestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.EuroSAT
     FEATURE_TYPES = (PIL.Image.Image, int)
+    SUPPORT_TV_IMAGE_DECODE = True
 
     def inject_fake_data(self, tmpdir, config):
         data_folder = os.path.join(tmpdir, "eurosat", "2750")
@@ -2332,6 +2356,8 @@ class Food101TestCase(datasets_utils.ImageDatasetTestCase):
 
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         root_folder = pathlib.Path(tmpdir) / "food-101"
         image_folder = root_folder / "images"
@@ -2368,6 +2394,7 @@ class FGVCAircraftTestCase(datasets_utils.ImageDatasetTestCase):
     ADDITIONAL_CONFIGS = combinations_grid(
         split=("train", "val", "trainval", "test"), annotation_level=("variant", "family", "manufacturer")
     )
+    SUPPORT_TV_IMAGE_DECODE = True
 
     def inject_fake_data(self, tmpdir: str, config):
         split = config["split"]
@@ -2417,6 +2444,8 @@ def inject_fake_data(self, tmpdir: str, config):
 class SUN397TestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.SUN397
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         data_dir = pathlib.Path(tmpdir) / "SUN397"
         data_dir.mkdir()
@@ -2448,6 +2477,8 @@ class DTDTestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.DTD
     FEATURE_TYPES = (PIL.Image.Image, int)
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     ADDITIONAL_CONFIGS = combinations_grid(
         split=("train", "test", "val"),
         # There is no need to test the whole matrix here, since each fold is treated exactly the same
@@ -2608,6 +2639,7 @@ class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase):
     FEATURE_TYPES = (PIL.Image.Image, (int, type(None)))
 
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
+    SUPPORT_TV_IMAGE_DECODE = True
 
     def inject_fake_data(self, tmpdir, config):
         data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0"
@@ -2705,6 +2737,8 @@ class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase):
     REQUIRED_PACKAGES = ("scipy",)
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir, config):
         import scipy.io as io
         from numpy.core.records import fromarrays
@@ -2749,6 +2783,8 @@ class Country211TestCase(datasets_utils.ImageDatasetTestCase):
 
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "valid", "test"))
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         split_folder = pathlib.Path(tmpdir) / "country211" / config["split"]
         split_folder.mkdir(parents=True, exist_ok=True)
@@ -2777,6 +2813,8 @@ class Flowers102TestCase(datasets_utils.ImageDatasetTestCase):
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
     REQUIRED_PACKAGES = ("scipy",)
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         base_folder = pathlib.Path(tmpdir) / "flowers-102"
 
@@ -2835,6 +2873,8 @@ class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase):
     ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
     SPLIT_TO_FOLDER = {"train": "train", "val": "valid", "test": "test"}
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     def inject_fake_data(self, tmpdir: str, config):
         root_folder = pathlib.Path(tmpdir) / "rendered-sst2"
         image_folder = root_folder / self.SPLIT_TO_FOLDER[config["split"]]
@@ -3495,6 +3535,8 @@ class ImagenetteTestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.Imagenette
     ADDITIONAL_CONFIGS = combinations_grid(split=["train", "val"], size=["full", "320px", "160px"])
 
+    SUPPORT_TV_IMAGE_DECODE = True
+
     _WNIDS = [
         "n01440764",
         "n02102040",
 
@@ -623,6 +623,42 @@ def test_encode_jpeg_cuda(img_path, scripted, contiguous):
     assert abs_mean_diff < 3
 
 
+@needs_cuda
+def test_encode_jpeg_cuda_sync():
+    """
+    Non-regression test for https://github.com/pytorch/vision/issues/8587.
+    Attempts to reproduce an intermittent CUDA stream synchronization bug
+    by randomly creating images and round-tripping them via encode_jpeg
+    and decode_jpeg on the GPU. Fails if the mean difference in uint8 range
+    exceeds 5.
+    """
+    torch.manual_seed(42)
+
+    # manual testing shows this bug appearing often in iterations between 50 and 100
+    # as a synchronization bug, this can't be reliably reproduced
+    max_iterations = 100
+    threshold = 5.0  # in [0..255]
+
+    device = torch.device("cuda")
+
+    for iteration in range(max_iterations):
+        height, width = torch.randint(4000, 5000, size=(2,))
+
+        image = torch.linspace(0, 1, steps=height * width, device=device)
+        image = image.view(1, height, width).expand(3, -1, -1)
+
+        image = (image * 255).clamp(0, 255).to(torch.uint8)
+        jpeg_bytes = encode_jpeg(image, quality=100)
+
+        decoded_image = decode_jpeg(jpeg_bytes.cpu(), device=device)
+        mean_difference = (image.float() - decoded_image.float()).abs().mean().item()
+
+        assert mean_difference <= threshold, (
+            f"Encode/decode mismatch at iteration={iteration}, "
+            f"size={height}x{width}, mean diff={mean_difference:.2f}"
+        )
+
+
 @pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("scripted", (True, False))
 @pytest.mark.parametrize("contiguous", (True, False))