diff --git a/.gitignore b/.gitignore index 462b3f2..9391735 100644 --- a/.gitignore +++ b/.gitignore @@ -50,9 +50,6 @@ coverage.xml .hypothesis/ .pytest_cache/ -# Unit test files -bioio/tests/resources/ - # Translations *.mo *.pot diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e7ed4a8..ce440df 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ Ready to contribute? Here's how to set up `bioio` for local development. ```bash cd bioio/ - just setup-dev + just install ``` 4. Create a branch for local development: diff --git a/Justfile b/Justfile index ec7b66c..a832a68 100644 --- a/Justfile +++ b/Justfile @@ -23,6 +23,7 @@ clean: # install with all deps install: pip install -e .[lint,test,docs,dev] + pre-commit install # lint, format, and check all files lint: @@ -37,12 +38,6 @@ build: just lint just test -# install dependencies, setup pre-commit, download test resources -setup-dev: - just install - pre-commit install - python scripts/download_test_resources.py - # generate Sphinx HTML documentation generate-docs: rm -f docs/bioio*.rst diff --git a/bioio/bio_image.py b/bioio/bio_image.py index 5f18284..551f5f7 100644 --- a/bioio/bio_image.py +++ b/bioio/bio_image.py @@ -11,6 +11,7 @@ import xarray as xr from ome_types import OME +from .ome_utils import generate_ome_channel_id from .plugins import plugins_by_ext ############################################################################### @@ -176,36 +177,6 @@ def determine_reader( ), ) - @staticmethod - def _generate_ome_channel_id(image_id: str, channel_id: Union[str, int]) -> str: - """ - Naively generates the standard OME channel ID using a provided ID. - - Parameters - ---------- - image_id: str - An image id to pull the image specific index from. - See: `generate_ome_image_id` for more details. - channel_id: Union[str, int] - A string or int representing the ID for a channel. - In the context of the usage of this function, this is usually used with the - index of the channel. - - Returns - ------- - ome_channel_id: str - The OME standard for channel IDs. - - Notes - ----- - ImageIds are usually: "Image:0", "Image:1", or "Image:N", - ChannelIds are usually the combination of image index + channel index -- - "Channel:0:0" for the first channel of the first image for example. - """ - # Remove the prefix 'Image:' to get just the index - image_index = image_id.replace("Image:", "") - return f"Channel:{image_index}:{channel_id}" - def __init__( self, image: biob.types.ImageLike, @@ -349,7 +320,7 @@ def _transform_data_array_to_bioio_image_standard( # Add channel coordinate plane because it is required in BioImage if biob.dimensions.DimensionNames.Channel not in coords: coords[biob.dimensions.DimensionNames.Channel] = [ - BioImage._generate_ome_channel_id( + generate_ome_channel_id( image_id=self.current_scene, channel_id=0, ) diff --git a/bioio/ome_utils.py b/bioio/ome_utils.py new file mode 100644 index 0000000..4e9463c --- /dev/null +++ b/bioio/ome_utils.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import logging +import typing + +import numpy as np +from ome_types.model.simple_types import PixelType + +############################################################################### + +log = logging.getLogger(__name__) + +############################################################################### + + +def generate_ome_channel_id(image_id: str, channel_id: typing.Union[str, int]) -> str: + """ + Naively generates the standard OME channel ID using a provided ID. + + Parameters + ---------- + image_id: str + An image id to pull the image specific index from. + See: `generate_ome_image_id` for more details. + channel_id: Union[str, int] + A string or int representing the ID for a channel. + In the context of the usage of this function, this is usually used with the + index of the channel. + + Returns + ------- + ome_channel_id: str + The OME standard for channel IDs. + + Notes + ----- + ImageIds are usually: "Image:0", "Image:1", or "Image:N", + ChannelIds are usually the combination of image index + channel index -- + "Channel:0:0" for the first channel of the first image for example. + """ + # Remove the prefix 'Image:' to get just the index + image_index = image_id.replace("Image:", "") + return f"Channel:{image_index}:{channel_id}" + + +def generate_ome_image_id(image_id: typing.Union[str, int]) -> str: + """ + Naively generates the standard OME image ID using a provided ID. + + Parameters + ---------- + image_id: Union[str, int] + A string or int representing the ID for an image. + In the context of the usage of this function, this is usually used with the + index of the scene / image. + + Returns + ------- + ome_image_id: str + The OME standard for image IDs. + """ + return f"Image:{image_id}" + + +def dtype_to_ome_type(npdtype: np.dtype) -> PixelType: + """ + Convert numpy dtype to OME PixelType + + Parameters + ---------- + npdtype: numpy.dtype + A numpy datatype. + + Returns + ------- + ome_type: PixelType + One of the supported OME Pixels types + + Raises + ------ + ValueError + No matching pixel type for provided numpy type. + """ + ometypedict = { + np.dtype(np.int8): PixelType.INT8, + np.dtype(np.int16): PixelType.INT16, + np.dtype(np.int32): PixelType.INT32, + np.dtype(np.uint8): PixelType.UINT8, + np.dtype(np.uint16): PixelType.UINT16, + np.dtype(np.uint32): PixelType.UINT32, + np.dtype(np.float32): PixelType.FLOAT, + np.dtype(np.float64): PixelType.DOUBLE, + np.dtype(np.complex64): PixelType.COMPLEXFLOAT, + np.dtype(np.complex128): PixelType.COMPLEXDOUBLE, + } + ptype = ometypedict.get(npdtype) + if ptype is None: + raise ValueError(f"Ome utils can't resolve pixel type: {npdtype.name}") + return ptype + + +def ome_to_numpy_dtype(ome_type: PixelType) -> np.dtype: + """ + Convert OME PixelType to numpy dtype + + Parameters + ---------- + ome_type: PixelType + One of the supported OME Pixels types + + Returns + ------- + npdtype: numpy.dtype + A numpy datatype. + + Raises + ------ + ValueError + No matching numpy type for the provided pixel type. + """ + ometypedict: typing.Dict[PixelType, np.dtype] = { + PixelType.INT8: np.dtype(np.int8), + PixelType.INT16: np.dtype(np.int16), + PixelType.INT32: np.dtype(np.int32), + PixelType.UINT8: np.dtype(np.uint8), + PixelType.UINT16: np.dtype(np.uint16), + PixelType.UINT32: np.dtype(np.uint32), + PixelType.FLOAT: np.dtype(np.float32), + PixelType.DOUBLE: np.dtype(np.float64), + PixelType.COMPLEXFLOAT: np.dtype(np.complex64), + PixelType.COMPLEXDOUBLE: np.dtype(np.complex128), + } + nptype = ometypedict.get(ome_type) + if nptype is None: + raise ValueError(f"Ome utils can't resolve pixel type: {ome_type.value}") + return nptype diff --git a/bioio/tests/conftest.py b/bioio/tests/conftest.py index bcdf977..f1cdce8 100644 --- a/bioio/tests/conftest.py +++ b/bioio/tests/conftest.py @@ -16,19 +16,40 @@ https://docs.pytest.org/en/latest/plugins.html#requiring-loading-plugins-in-a-test-module-or-conftest-file """ +import logging import pathlib import typing +import dask.array as da +import numpy as np import pytest -LOCAL_RESOURCES_DIR = pathlib.Path(__file__).parent / "resources" -LOCAL_RESOURCES_WRITE_DIR = pathlib.Path(__file__).parent / "writer_products" +############################################################################### +log = logging.getLogger(__name__) -def get_resource_full_path(filename: str) -> typing.Union[str, pathlib.Path]: - return LOCAL_RESOURCES_DIR / filename +############################################################################### @pytest.fixture -def data_dir() -> pathlib.Path: - return pathlib.Path(__file__).parent / "data" +def sample_text_file(tmp_path: pathlib.Path) -> pathlib.Path: + example_file = tmp_path / "temp-example.txt" + example_file.write_text("just some example text here") + return example_file + + +def np_random_from_shape( + shape: typing.Tuple[int, ...], **kwargs: typing.Any +) -> np.ndarray: + return np.random.randint(255, size=shape, **kwargs) + + +def da_random_from_shape( + shape: typing.Tuple[int, ...], **kwargs: typing.Any +) -> da.Array: + return da.random.randint(255, size=shape, **kwargs) + + +array_constructor = pytest.mark.parametrize( + "array_constructor", [np_random_from_shape, da_random_from_shape] +) diff --git a/bioio/tests/test_bio_image.py b/bioio/tests/test_bio_image.py index d68a849..069a9c6 100644 --- a/bioio/tests/test_bio_image.py +++ b/bioio/tests/test_bio_image.py @@ -1,30 +1,21 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import pathlib + import bioio_base as biob import pytest from bioio import BioImage -from .conftest import get_resource_full_path + +def test_bioimage_with_text_file(sample_text_file: pathlib.Path) -> None: + with pytest.raises(biob.exceptions.UnsupportedFileFormatError): + BioImage(sample_text_file) -@pytest.mark.parametrize( - "filename", - [ - pytest.param( - "example.txt", - marks=pytest.mark.xfail(raises=biob.exceptions.UnsupportedFileFormatError), - ), - pytest.param( - "does-not-exist-klafjjksdafkjl.bad", - marks=pytest.mark.xfail(raises=FileNotFoundError), - ), - ], -) -def test_bioimage( - filename: str, -) -> None: +def test_bioimage_with_missing_file(tmp_path: pathlib.Path) -> None: # Construct full filepath - uri = get_resource_full_path(filename) - BioImage(uri) + uri = tmp_path / "does-not-exist-klafjjksdafkjl.bad" + with pytest.raises(FileNotFoundError): + BioImage(uri) diff --git a/bioio/tests/writers/__init__.py b/bioio/tests/writers/__init__.py new file mode 100644 index 0000000..faa18be --- /dev/null +++ b/bioio/tests/writers/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- diff --git a/bioio/tests/writers/test_ome_tiff_writer.py b/bioio/tests/writers/test_ome_tiff_writer.py new file mode 100644 index 0000000..71eb6e1 --- /dev/null +++ b/bioio/tests/writers/test_ome_tiff_writer.py @@ -0,0 +1,478 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import pathlib +from typing import Callable, List, Optional, Tuple, Union + +import bioio_base as biob +import numpy as np +import pytest +import tifffile +from ome_types import to_xml +from ome_types.model import OME + +from bioio.writers import OmeTiffWriter + +from ..conftest import array_constructor + + +@array_constructor +@pytest.mark.parametrize( + "write_shape, write_dim_order, expected_read_shape, expected_read_dim_order", + [ + ((5, 16, 16), None, (5, 16, 16), "ZYX"), + ((5, 16, 16), "ZYX", (5, 16, 16), "ZYX"), + ((5, 16, 16), "CYX", (5, 16, 16), "CYX"), + ((10, 5, 16, 16), "ZCYX", (10, 5, 16, 16), "ZCYX"), + ((5, 10, 16, 16), "CZYX", (5, 10, 16, 16), "CZYX"), + ((16, 16), "YX", (16, 16), "YX"), + pytest.param( + (2, 3, 3), + "AYX", + None, + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + pytest.param( + (2, 3, 3), + "YXZ", + None, + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + pytest.param( + (2, 5, 16, 16), + "CYX", + None, + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + ((1, 2, 3, 4, 5), None, (2, 3, 4, 5), "CZYX"), + ((2, 3, 4, 5, 6), "TCZYX", (2, 3, 4, 5, 6), "TCZYX"), + ((2, 3, 4, 5, 6), None, (2, 3, 4, 5, 6), "TCZYX"), + ((1, 2, 3, 4, 5, 3), None, (2, 3, 4, 5, 3), "CZYXS"), + # error 6D data doesn't work unless last dim is 3 or 4 + pytest.param( + (1, 2, 3, 4, 5, 6), + None, + (1, 2, 3, 4, 5, 6), + "TCZYXS", + marks=pytest.mark.xfail(raises=ValueError), + ), + ((5, 16, 16, 3), "ZYXS", (5, 16, 16, 3), "ZYXS"), + ((5, 16, 16, 4), "CYXS", (5, 16, 16, 4), "CYXS"), + ((3, 5, 16, 16, 4), "ZCYXS", (3, 5, 16, 16, 4), "ZCYXS"), + ], +) +@pytest.mark.parametrize("filename", ["e.ome.tiff"]) +def test_ome_tiff_writer_no_meta( + array_constructor: Callable, + write_shape: Tuple[int, ...], + write_dim_order: Optional[str], + expected_read_shape: Tuple[int, ...], + expected_read_dim_order: str, + filename: str, + tmp_path: pathlib.Path, +) -> None: + # Create array + arr = array_constructor(write_shape, dtype=np.uint8) + + # Construct save end point + save_uri = tmp_path / filename + + # Normal save + OmeTiffWriter.save(arr, save_uri, write_dim_order) + + fs, path = biob.io.pathlike_to_fs(save_uri) + with fs.open(path) as open_resource: + with tifffile.TiffFile(open_resource, is_mmstack=False) as tiff: + assert len(tiff.series) == 1 + scene = tiff.series[0] + assert scene.shape == expected_read_shape + assert scene.pages.axes == expected_read_dim_order + + +@array_constructor +@pytest.mark.parametrize( + "shape_to_create, ome_xml, expected_shape, expected_dim_order", + [ + # ok dims + ( + (1, 2, 3, 4, 5), + to_xml(OmeTiffWriter.build_ome([(1, 2, 3, 4, 5)], [np.dtype(np.uint8)])), + (2, 3, 4, 5), + "CZYX", + ), + ( + (1, 2, 3, 4, 5), + OmeTiffWriter.build_ome([(1, 2, 3, 4, 5)], [np.dtype(np.uint8)]), + (2, 3, 4, 5), + "CZYX", + ), + # with RGB data: + ( + (2, 2, 3, 4, 5, 3), + to_xml(OmeTiffWriter.build_ome([(2, 2, 3, 4, 5, 3)], [np.dtype(np.uint8)])), + (2, 2, 3, 4, 5, 3), + "TCZYXS", + ), + ( + (2, 2, 3, 4, 5, 3), + OmeTiffWriter.build_ome([(2, 2, 3, 4, 5, 3)], [np.dtype(np.uint8)]), + (2, 2, 3, 4, 5, 3), + "TCZYXS", + ), + # wrong dtype + pytest.param( + (1, 2, 3, 4, 5), + to_xml(OmeTiffWriter.build_ome([(1, 2, 3, 4, 5)], [np.dtype(np.float32)])), + (2, 3, 4, 5), + "CZYX", + marks=pytest.mark.xfail(raises=ValueError), + ), + # wrong dtype + pytest.param( + (1, 2, 3, 4, 5), + OmeTiffWriter.build_ome([(1, 2, 3, 4, 5)], [np.dtype(np.float32)]), + (2, 3, 4, 5), + "CZYX", + marks=pytest.mark.xfail(raises=ValueError), + ), + # wrong dims + pytest.param( + (1, 2, 3, 4, 5), + to_xml(OmeTiffWriter.build_ome([(2, 2, 3, 4, 5)], [np.dtype(np.float32)])), + (2, 3, 4, 5), + "CZYX", + marks=pytest.mark.xfail(raises=ValueError), + ), + # wrong dims + pytest.param( + (1, 2, 3, 4, 5), + OmeTiffWriter.build_ome([(2, 2, 3, 4, 5)], [np.dtype(np.float32)]), + (2, 3, 4, 5), + "CZYX", + marks=pytest.mark.xfail(raises=ValueError), + ), + # just totally wrong but valid ome + pytest.param( + (1, 2, 3, 4, 5), + to_xml(OME()), + (2, 3, 4, 5), + "CZYX", + marks=pytest.mark.xfail(raises=ValueError), + ), + # just totally wrong but valid ome + pytest.param( + (1, 2, 3, 4, 5), + OME(), + (2, 3, 4, 5), + "CZYX", + marks=pytest.mark.xfail(raises=ValueError), + ), + # even more blatantly bad ome + pytest.param( + (1, 2, 3, 4, 5), + "bad ome string", + (2, 3, 4, 5), + "CZYX", + # raised from within ome-types + marks=pytest.mark.xfail(raises=ValueError), + ), + ], +) +@pytest.mark.parametrize("filename", ["e.ome.tiff"]) +def test_ome_tiff_writer_with_meta( + array_constructor: Callable, + shape_to_create: Tuple[int, ...], + ome_xml: Union[str, OME, None], + expected_shape: Tuple[int, ...], + expected_dim_order: Tuple[str, ...], + filename: str, + tmp_path: pathlib.Path, +) -> None: + # Create array + arr = array_constructor(shape_to_create, dtype=np.uint8) + + # Construct save end point + save_uri = tmp_path / filename + + # Normal save + OmeTiffWriter.save(arr, save_uri, dimension_order=None, ome_xml=ome_xml) + + # Check basics + fs, path = biob.io.pathlike_to_fs(save_uri) + with fs.open(path) as open_resource: + with tifffile.TiffFile(open_resource, is_mmstack=False) as tiff: + assert len(tiff.series) == 1 + scene = tiff.series[0] + assert scene.shape == tuple(expected_shape) + assert scene.pages.axes == expected_dim_order + + +@pytest.mark.parametrize( + "array_data, write_dim_order, read_shapes, read_dim_order", + [ + ([np.random.rand(5, 16, 16)], None, [(5, 16, 16)], ["ZYX"]), + ( + [np.random.rand(5, 16, 16), np.random.rand(4, 12, 12)], + None, + [(5, 16, 16), (4, 12, 12)], + ["ZYX", "ZYX"], + ), + ( + [np.random.rand(5, 16, 16, 3), np.random.rand(4, 12, 12, 3)], + None, + [(5, 16, 16, 3), (4, 12, 12, 3)], + ["CZYX", "CZYX"], + ), + ( + [np.random.rand(5, 16, 16, 3), np.random.rand(4, 12, 12, 3)], + ["ZYXS", "CYXS"], + [(5, 16, 16, 3), (4, 12, 12, 3)], + ["ZYXS", "CYXS"], + ), + # spread dim_order to each image written + ( + [np.random.rand(3, 10, 16, 16), np.random.rand(4, 12, 16, 16)], + "CZYX", + [(3, 10, 16, 16), (4, 12, 16, 16)], + ["CZYX", "CZYX"], + ), + # different dims, rgb last + ( + [np.random.rand(5, 16, 16), np.random.rand(4, 12, 12, 3)], + ["ZYX", "CYXS"], + [(5, 16, 16), (4, 12, 12, 3)], + ["ZYX", "CYXS"], + ), + # different dims, rgb first + ( + [np.random.rand(5, 16, 16, 3), np.random.rand(4, 12, 12)], + ["ZYXS", "CYX"], + [(5, 16, 16, 3), (4, 12, 12)], + ["ZYXS", "CYX"], + ), + # two scenes but only one dimension order as list + pytest.param( + [np.random.rand(5, 16, 16, 3), np.random.rand(4, 12, 12)], + ["ZYXS"], + None, + None, + marks=pytest.mark.xfail(raises=biob.exceptions.ConflictingArgumentsError), + ), + # bad dims + pytest.param( + [np.random.rand(2, 3, 3)], + "AYX", + None, + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + ], +) +@pytest.mark.parametrize("filename", ["e.ome.tiff"]) +def test_ome_tiff_writer_multiscene( + array_data: List[biob.types.ArrayLike], + write_dim_order: List[Optional[str]], + read_shapes: List[Tuple[int, ...]], + read_dim_order: List[str], + filename: str, + tmp_path: pathlib.Path, +) -> None: + # Construct save end point + save_uri = tmp_path / filename + + # Normal save + OmeTiffWriter.save(array_data, save_uri, write_dim_order) + + # Check basics + fs, path = biob.io.pathlike_to_fs(save_uri) + with fs.open(path) as open_resource: + with tifffile.TiffFile(open_resource, is_mmstack=False) as tiff: + assert len(tiff.series) == len(read_shapes) + for i in range(len(tiff.series)): + scene = tiff.series[i] + assert scene.shape == tuple(read_shapes[i]) + assert scene.pages.axes == read_dim_order[i] + + +@pytest.mark.parametrize( + "array_data, " + "write_dim_order, " + "pixel_size, " + "channel_names, " + "channel_colors, " + "read_shapes, " + "read_dim_order", + [ + ( + np.random.rand(1, 2, 5, 16, 16), + "TCZYX", + None, + ["C0", "C1"], + None, + [(2, 5, 16, 16)], + ["CZYX"], + ), + ( + [np.random.rand(1, 2, 5, 16, 16), np.random.rand(1, 2, 4, 15, 15)], + "TCZYX", + None, + ["C0", "C1"], + None, + [(2, 5, 16, 16), (2, 4, 15, 15)], + ["CZYX", "CZYX"], + ), + ( + [np.random.rand(5, 16, 16)], + None, + [biob.types.PhysicalPixelSizes(1.0, 2.0, 3.0)], + ["C0"], + None, + [(5, 16, 16)], + ["ZYX"], + ), + ( + [np.random.rand(5, 16, 16)], + None, + [biob.types.PhysicalPixelSizes(None, 2.0, 3.0)], + ["C0"], + None, + [(5, 16, 16)], + ["ZYX"], + ), + ( + [np.random.rand(2, 16, 16), np.random.rand(2, 12, 12)], + "CYX", + [ + biob.types.PhysicalPixelSizes(1.0, 2.0, 3.0), + biob.types.PhysicalPixelSizes(4.0, 5.0, 6.0), + ], + [["C0", "C1"], None], + None, + [(2, 16, 16), (2, 12, 12)], + ["CYX", "CYX"], + ), + ( + np.random.rand(3, 16, 16), + "CYX", + biob.types.PhysicalPixelSizes(None, 1.0, 1.0), + ["C0", "C1", "C2"], + [[255, 0, 0], [0, 255, 0], [0, 0, 255]], + [(3, 16, 16)], + ["CYX"], + ), + pytest.param( + np.random.rand(3, 16, 16), + "CYX", + biob.types.PhysicalPixelSizes(None, 1.0, 1.0), + ["C0", "C1", "C2"], + [[255, 0, 0], [0, 255, 0], [0, 0, 255], [1, 1, 1]], + [(3, 16, 16)], + ["CYX"], + marks=pytest.mark.xfail(raises=ValueError), + ), + ( + [np.random.rand(3, 16, 16)], + ["CYX"], + [biob.types.PhysicalPixelSizes(None, 1.0, 1.0)], + [["C0", "C1", "C2"]], + [[[255, 0, 0], [0, 255, 0], [0, 0, 255]]], + [(3, 16, 16)], + ["CYX"], + ), + ( + [np.random.rand(3, 16, 16)], + ["CYX"], + [biob.types.PhysicalPixelSizes(None, 1.0, 1.0)], + [["C0", "C1", "C2"]], + [None], + [(3, 16, 16)], + ["CYX"], + ), + ( + [np.random.rand(3, 16, 16), np.random.rand(3, 16, 16)], + "CYX", + biob.types.PhysicalPixelSizes(None, 1.0, 1.0), + ["C0", "C1", "C2"], + [[255, 0, 0], [0, 255, 0], [0, 0, 255]], + [(3, 16, 16), (3, 16, 16)], + ["CYX", "CYX"], + ), + ( + [np.random.rand(3, 16, 16), np.random.rand(3, 4, 16, 16)], + ["CYX", "CZYX"], + [ + biob.types.PhysicalPixelSizes(None, 1.0, 1.0), + biob.types.PhysicalPixelSizes(1.0, 1.0, 1.0), + ], + [["C0", "C1", "C2"], ["C4", "C5", "C6"]], + [ + [[255, 0, 0], [0, 255, 0], [0, 0, 255]], + [[0, 255, 0], [0, 0, 255], [255, 0, 0]], + ], + [(3, 16, 16), (3, 4, 16, 16)], + ["CYX", "CZYX"], + ), + ( + [np.random.rand(3, 16, 16), np.random.rand(3, 4, 16, 16)], + ["CYX", "CZYX"], + [ + biob.types.PhysicalPixelSizes(None, 1.0, 1.0), + biob.types.PhysicalPixelSizes(1.0, 1.0, 1.0), + ], + [["C0", "C1", "C2"], ["C4", "C5", "C6"]], + [ + None, + [[0, 255, 0], [0, 0, 255], [255, 0, 0]], + ], + [(3, 16, 16), (3, 4, 16, 16)], + ["CYX", "CZYX"], + ), + ], +) +@pytest.mark.parametrize("filename", ["e.ome.tiff"]) +def test_ome_tiff_writer_common_metadata( + array_data: Union[biob.types.ArrayLike, List[biob.types.ArrayLike]], + write_dim_order: Union[Optional[str], List[Optional[str]]], + pixel_size: Union[ + biob.types.PhysicalPixelSizes, List[biob.types.PhysicalPixelSizes] + ], + channel_names: Union[List[str], List[Optional[List[str]]]], + channel_colors: Union[Optional[List[List[int]]], List[Optional[List[List[int]]]]], + read_shapes: List[Tuple[int, ...]], + read_dim_order: List[str], + filename: str, + tmp_path: pathlib.Path, +) -> None: + # Construct save end point + save_uri = tmp_path / filename + + # Normal save + OmeTiffWriter.save( + array_data, + save_uri, + write_dim_order, + channel_names=channel_names, + channel_colors=channel_colors, + physical_pixel_sizes=pixel_size, + ) + + # Check basics + fs, path = biob.io.pathlike_to_fs(save_uri) + with fs.open(path) as open_resource: + with tifffile.TiffFile(open_resource, is_mmstack=False) as tiff: + assert len(tiff.series) == len(read_shapes) + for i in range(len(tiff.series)): + scene = tiff.series[i] + assert scene.shape == read_shapes[i] + assert scene.pages.axes == read_dim_order[i] diff --git a/bioio/tests/writers/test_ome_zarr_writer.py b/bioio/tests/writers/test_ome_zarr_writer.py new file mode 100644 index 0000000..7d2dbad --- /dev/null +++ b/bioio/tests/writers/test_ome_zarr_writer.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import pathlib +import shutil +from typing import Callable, List, Optional, Tuple + +import bioio_base as biob +import numpy as np +import pytest +from ome_zarr.io import parse_url +from ome_zarr.reader import Reader + +from bioio.writers import OmeZarrWriter + +from ..conftest import array_constructor + + +@array_constructor +@pytest.mark.parametrize( + "write_shape, write_dim_order, expected_read_shape, expected_read_dim_order", + [ + ((1, 2, 3, 4, 5), None, (1, 2, 3, 4, 5), "TCZYX"), + ((1, 2, 3, 4, 5), "TCZYX", (1, 2, 3, 4, 5), "TCZYX"), + ((2, 3, 4, 5, 6), None, (2, 3, 4, 5, 6), "TCZYX"), + ((1, 1, 1, 1, 1), None, (1, 1, 1, 1, 1), "TCZYX"), + ((5, 16, 16), None, (5, 16, 16), "ZYX"), + ((5, 16, 16), "ZYX", (5, 16, 16), "ZYX"), + ((5, 16, 16), "CYX", (5, 16, 16), "CYX"), + ((5, 16, 16), "TYX", (5, 16, 16), "TYX"), + pytest.param( + (10, 5, 16, 16), + "ZCYX", + (10, 5, 16, 16), + "ZCYX", + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + ((5, 10, 16, 16), "CZYX", (5, 10, 16, 16), "CZYX"), + ((15, 16), "YX", (15, 16), "YX"), + pytest.param( + (2, 3, 3), + "AYX", + None, + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + ((2, 3, 3), "YXZ", (2, 3, 3), "YXZ"), + pytest.param( + (2, 5, 16, 16), + "CYX", + None, + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + # error 6D data doesn't work yet + pytest.param( + (1, 2, 3, 4, 5, 3), + None, + None, + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + ], +) +@pytest.mark.parametrize("filename", ["e.zarr"]) +def test_ome_zarr_writer_dims( + array_constructor: Callable, + write_shape: Tuple[int, ...], + write_dim_order: Optional[str], + expected_read_shape: Tuple[int, ...], + expected_read_dim_order: str, + filename: str, + tmp_path: pathlib.Path, +) -> None: + # Create array + arr = array_constructor(write_shape, dtype=np.uint8) + + # Construct save end point + save_uri = tmp_path / filename + # clear out anything left over + shutil.rmtree(save_uri, ignore_errors=True) + + # Normal save + writer = OmeZarrWriter(save_uri) + writer.write_image(arr, "", None, None, None, dimension_order=write_dim_order) + + # Read written result and check basics + reader = Reader(parse_url(save_uri)) + node = list(reader())[0] + num_levels = len(node.data) + assert num_levels == 1 + level = 0 + shape = node.data[level].shape + assert shape == expected_read_shape + axes = node.metadata["axes"] + dims = "".join([a["name"] for a in axes]).upper() + assert dims == expected_read_dim_order + + +@array_constructor +@pytest.mark.parametrize( + "write_shape, num_levels, scale, expected_read_shapes, expected_read_scales", + [ + ( + (2, 4, 8, 16, 32), + 2, + 2, + [(2, 4, 8, 16, 32), (2, 4, 8, 8, 16), (2, 4, 8, 4, 8)], + [ + [1.0, 1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 2.0, 2.0], + [1.0, 1.0, 1.0, 4.0, 4.0], + ], + ), + ( + (16, 32), + 2, + 4, + [(16, 32), (4, 8), (1, 2)], + [ + [1.0, 1.0], + [4.0, 4.0], + [16.0, 16.0], + ], + ), + ], +) +@pytest.mark.parametrize("filename", ["f.zarr"]) +def test_ome_zarr_writer_scaling( + array_constructor: Callable, + write_shape: Tuple[int, ...], + num_levels: int, + scale: float, + expected_read_shapes: List[Tuple[int, ...]], + expected_read_scales: List[List[int]], + filename: str, + tmp_path: pathlib.Path, +) -> None: + # Create array + arr = array_constructor(write_shape, dtype=np.uint8) + + # Construct save end point + save_uri = tmp_path / filename + # clear out anything left over + shutil.rmtree(save_uri, ignore_errors=True) + + # Normal save + writer = OmeZarrWriter(save_uri) + writer.write_image( + arr, "", None, None, None, scale_num_levels=num_levels, scale_factor=scale + ) + + # Read written result and check basics + reader = Reader(parse_url(save_uri)) + node = list(reader())[0] + read_num_levels = len(node.data) + assert num_levels == read_num_levels + print(node.metadata) + for i in range(num_levels): + shape = node.data[i].shape + assert shape == expected_read_shapes[i] + xforms = node.metadata["coordinateTransformations"][i] + assert len(xforms) == 1 + assert xforms[0]["type"] == "scale" + assert xforms[0]["scale"] == expected_read_scales[i] diff --git a/bioio/tests/writers/test_timeseries_writer.py b/bioio/tests/writers/test_timeseries_writer.py new file mode 100644 index 0000000..0e6a974 --- /dev/null +++ b/bioio/tests/writers/test_timeseries_writer.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import pathlib +from typing import Callable, Tuple + +import bioio_base as biob +import imageio +import numpy as np +import pytest + +from bioio.writers.timeseries_writer import TimeseriesWriter +from bioio.writers.two_d_writer import TwoDWriter + +from ..conftest import array_constructor + + +@array_constructor +@pytest.mark.parametrize( + "write_shape, write_dim_order, read_shape", + [ + # TODO: Failing currently, needs work, + # see https://github.com/bioio-devs/bioio/issues/10 + # ((30, 100, 100), None, (30, 100, 100)), + # Note that files get saved out with RGBA, instead of just RGB + ((30, 100, 100, 3), None, (30, 100, 100, 4)), + # TODO: Failing currently, needs work, + # see https://github.com/bioio-devs/bioio/issues/10 + # ((100, 30, 100), "XTY", (30, 100, 100)), + # Note that files get saved out with RGBA, instead of just RGB + ((3, 100, 30, 100), "SYTX", (30, 100, 100, 4)), + pytest.param( + (1, 1), + None, + None, + marks=pytest.mark.xfail(raises=biob.exceptions.UnexpectedShapeError), + ), + pytest.param( + (1, 1, 1, 1, 1), + None, + None, + marks=pytest.mark.xfail(raises=biob.exceptions.UnexpectedShapeError), + ), + pytest.param( + (1, 1, 1, 1, 1, 1), + "STCZYX", + None, + marks=pytest.mark.xfail(raises=biob.exceptions.UnexpectedShapeError), + ), + pytest.param( + (1, 1, 1, 1), + "ABCD", + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + ], +) +@pytest.mark.parametrize("filename", ["e.gif"]) +def test_timeseries_writer( + array_constructor: Callable, + write_shape: Tuple[int, ...], + write_dim_order: str, + read_shape: Tuple[int, ...], + filename: str, + tmp_path: pathlib.Path, +) -> None: + # Create array + arr = array_constructor(write_shape, dtype=np.uint8) + + # Construct save end point + save_uri = tmp_path / filename + + # Normal save + TimeseriesWriter.save(arr, save_uri, write_dim_order) + + # TODO: Actually uncovered bug in DefaultReader :( + # dask_a = aicsimageio.readers.default_reader.DefaultReader(save_uri).dask_data + # data = dask_a.compute() + # assert data.shape == read_shape + # assert reader.shape[-1] < + + # Read written result and check basics + fs, path = biob.io.pathlike_to_fs(save_uri) + extension, mode = TwoDWriter.get_extension_and_mode(path) + with fs.open(path) as open_resource: + with imageio.get_reader(open_resource, format=extension, mode=mode) as reader: + # Read and stack all frames + frames = [] + for frame in reader: + print(frame.shape) + frames.append(frame) + + data = np.stack(frames) + + # Check basics + assert data.shape == read_shape + assert data.shape[-1] <= 4 + + # Can't do "easy" testing because compression + shape mismatches on RGB data + + +@array_constructor +@pytest.mark.parametrize( + "write_shape, write_dim_order, read_shape", + [ + # We use 112 instead of 100 because FFMPEG block size warnings are annoying + ((30, 112, 112), None, (30, 112, 112, 3)), + # Note that files get saved out with RGBA, instead of just RGB + ((30, 112, 112, 3), None, (30, 112, 112, 3)), + ((112, 30, 112), "XTY", (30, 112, 112, 3)), + # Note that files get saved out with RGBA, instead of just RGB + ((3, 112, 30, 112), "SYTX", (30, 112, 112, 3)), + pytest.param( + (1, 1), + None, + None, + marks=pytest.mark.xfail(raises=biob.exceptions.UnexpectedShapeError), + ), + pytest.param( + (1, 1, 1, 1, 1), + None, + None, + marks=pytest.mark.xfail(raises=biob.exceptions.UnexpectedShapeError), + ), + pytest.param( + (1, 1, 1, 1, 1, 1), + "STCZYX", + None, + marks=pytest.mark.xfail(raises=biob.exceptions.UnexpectedShapeError), + ), + pytest.param( + (1, 1, 1, 1), + "ABCD", + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + ], +) +@pytest.mark.parametrize("filename", ["f.mp4"]) +def test_timeseries_writer_ffmpeg( + array_constructor: Callable, + write_shape: Tuple[int, ...], + write_dim_order: str, + read_shape: Tuple[int, ...], + filename: str, + tmp_path: pathlib.Path, +) -> None: + # Create array + arr = array_constructor(write_shape, dtype=np.uint8) + + # Construct save end point + save_uri = tmp_path / filename + + # Normal save + TimeseriesWriter.save(arr, save_uri, write_dim_order) + + # Read written result and check basics + fs, path = biob.io.pathlike_to_fs(save_uri) + extension, mode = TwoDWriter.get_extension_and_mode(path) + with fs.open(path) as open_resource: + with imageio.get_reader(open_resource, format=extension, mode=mode) as reader: + # Read and stack all frames + frames = [] + for frame in reader: + frames.append(frame) + + data = np.stack(frames) + + # Check basics + assert data.shape == read_shape + assert data.shape[-1] <= 4 + + # Can't do "easy" testing because compression + shape mismatches on RGB data diff --git a/bioio/tests/writers/test_two_d_writer.py b/bioio/tests/writers/test_two_d_writer.py new file mode 100644 index 0000000..ea98df4 --- /dev/null +++ b/bioio/tests/writers/test_two_d_writer.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import pathlib +from typing import Callable, Tuple + +import bioio_base as biob +import imageio +import numpy as np +import pytest + +from bioio.writers.two_d_writer import TwoDWriter + +from ..conftest import array_constructor + + +@array_constructor +@pytest.mark.parametrize( + "write_shape, write_dim_order, read_shape", + [ + ((100, 100, 3), None, (100, 100, 3)), + ((100, 100), None, (100, 100)), + ((100, 100), "XY", (100, 100)), + ((3, 100, 100), "SYX", (100, 100, 3)), + ((100, 3, 100), "XSY", (100, 100, 3)), + pytest.param( + (1, 1, 1, 1), + None, + None, + marks=pytest.mark.xfail(raises=biob.exceptions.UnexpectedShapeError), + ), + pytest.param( + (1, 1, 1, 1, 1), + None, + None, + marks=pytest.mark.xfail(raises=biob.exceptions.UnexpectedShapeError), + ), + pytest.param( + (1, 1, 1, 1, 1, 1), + "STCZYX", + None, + marks=pytest.mark.xfail(raises=biob.exceptions.UnexpectedShapeError), + ), + pytest.param( + (1, 1), + "AB", + None, + marks=pytest.mark.xfail( + raises=biob.exceptions.InvalidDimensionOrderingError + ), + ), + ], +) +@pytest.mark.parametrize("filename", ["a.png", "d.bmp"]) +def test_two_d_writer( + array_constructor: Callable, + write_shape: Tuple[int, ...], + write_dim_order: str, + read_shape: Tuple[int, ...], + filename: str, + tmp_path: pathlib.Path, +) -> None: + # Create array + arr = array_constructor(write_shape, dtype=np.uint8) + + # Construct save end point + save_uri = tmp_path / filename + + # Save + TwoDWriter.save(arr, save_uri, write_dim_order) + + # Read written result and check basics + fs, path = biob.io.pathlike_to_fs(save_uri) + extension, mode = TwoDWriter.get_extension_and_mode(path) + with fs.open(path) as open_resource: + with imageio.get_reader(open_resource, format=extension, mode=mode) as reader: + data = np.asarray(reader.get_data(0)) + + # Check basics + assert data.shape == read_shape diff --git a/bioio/writers/__init__.py b/bioio/writers/__init__.py index 6a51895..c666beb 100644 --- a/bioio/writers/__init__.py +++ b/bioio/writers/__init__.py @@ -1,27 +1,10 @@ -# TODO: PLACEHOLDER +#!/usr/bin/env python +# -*- coding: utf-8 -*- -from typing import Any, Dict, List, Optional, Union +from .ome_tiff_writer import OmeTiffWriter +from .ome_zarr_writer import OmeZarrWriter -import bioio_base as bb -import ome_types - - -class OmeTiffWriter: - @staticmethod - def save( - data: Union[List[bb.types.ArrayLike], bb.types.ArrayLike], - uri: bb.types.PathLike, - dim_order: Optional[Union[str, List[Union[str, None]]]] = None, - ome_xml: Optional[Union[str, ome_types.model.OME]] = None, - channel_names: Optional[Union[List[str], List[Optional[List[str]]]]] = None, - image_name: Optional[Union[str, List[Union[str, None]]]] = None, - physical_pixel_sizes: Optional[ - Union[bb.types.PhysicalPixelSizes, List[bb.types.PhysicalPixelSizes]] - ] = None, - channel_colors: Optional[ - Union[List[List[int]], List[Optional[List[List[int]]]]] - ] = None, - fs_kwargs: Dict[str, Any] = {}, - **kwargs: Any, - ) -> None: - raise NotImplementedError("TODO") +__all__ = [ + "OmeTiffWriter", + "OmeZarrWriter", +] diff --git a/bioio/writers/ome_tiff_writer.py b/bioio/writers/ome_tiff_writer.py new file mode 100644 index 0000000..a9e6821 --- /dev/null +++ b/bioio/writers/ome_tiff_writer.py @@ -0,0 +1,732 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import importlib.metadata +from typing import Any, Dict, List, Optional, Tuple, Union + +import bioio_base as biob +import dask.array as da +import numpy as np +import tifffile +from bioio_base.types import PhysicalPixelSizes +from fsspec.implementations.local import LocalFileSystem +from ome_types import from_xml, to_xml +from ome_types.model import OME, Channel, Image, Pixels, TiffData +from ome_types.model.simple_types import ChannelID, Color, PositiveFloat, PositiveInt +from tifffile import TIFF + +from ..ome_utils import ( + dtype_to_ome_type, + generate_ome_channel_id, + generate_ome_image_id, + ome_to_numpy_dtype, +) +from .writer import Writer + +# This is the threshold to use BigTiff, if it's the 4GB boundary it should be 2**22 but +# the libtiff writer was unable to handle a 2GB numpy array. +# It would be great if we better understood exactly what this threshold is and how to +# calculate it but for now this is a stopgap working value +BIGTIFF_BYTE_LIMIT = 2**21 + + +class OmeTiffWriter(Writer): + @staticmethod + def save( + data: Union[List[biob.types.ArrayLike], biob.types.ArrayLike], + uri: biob.types.PathLike, + dim_order: Optional[Union[str, List[Union[str, None]]]] = None, + ome_xml: Optional[Union[str, OME]] = None, + channel_names: Optional[Union[List[str], List[Optional[List[str]]]]] = None, + image_name: Optional[Union[str, List[Union[str, None]]]] = None, + physical_pixel_sizes: Optional[ + Union[biob.types.PhysicalPixelSizes, List[biob.types.PhysicalPixelSizes]] + ] = None, + channel_colors: Optional[ + Union[List[List[int]], List[Optional[List[List[int]]]]] + ] = None, + fs_kwargs: Dict[str, Any] = {}, + **kwargs: Any, + ) -> None: + """ + Write a data array to a file. + + Parameters + ---------- + data: Union[List[biob.types.ArrayLike], biob.types.ArrayLike] + The array of data to store. Data arrays must have 2 to 6 dimensions. If a + list is provided, then it is understood to be multiple images written to the + ome-tiff file. All following metadata parameters will be expanded to the + length of this list. + uri: biob.types.PathLike + The URI or local path for where to save the data. + Note: OmeTiffWriter can only write to local file systems. + dim_order: Optional[Union[str, List[Union[str, None]]]] + The dimension order of the provided data. + Dimensions must be a list of T, C, Z, Y, Z, and S (S=samples for rgb data). + Dimension strings must be same length as number of dimensions in the data. + If S is present it must be last and its data count must be 3 or 4. + Default: None. + If None is provided for any data array, we will guess dimensions based on a + TCZYX ordering. + In the None case, data will be assumed to be scalar, not RGB. + ome_xml: Optional[Union[str, OME]] + Provided OME metadata. The metadata can be an xml string or an OME object + from ome-biob.types. A provided ome_xml will override any other provided + metadata arguments. + Default: None + The passed-in metadata will be validated against current OME_XML schema and + raise exception if invalid. + The ome_xml will also be compared against the dimensions of the input data. + If None is given, then OME-XML metadata will be generated from the data + array and any of the following metadata arguments. + channel_names: Optional[Union[List[str], List[Optional[List[str]]]]] + Lists of strings representing the names of the data channels + Default: None + If None is given, the list will be generated as a 0-indexed list of strings + of the form "Channel:image_index:channel_index" + image_names: Optional[Union[str, List[Union[str, None]]]] + List of strings representing the names of the images + Default: None + If None is given, the list will be generated as a 0-indexed list of strings + of the form "Image:image_index" + physical_pixel_sizes: Optional[Union[biob.types.PhysicalPixelSizes, + List[biob.types.PhysicalPixelSizes]]] + List of numbers representing the physical pixel sizes in Z, Y, X in microns + Default: None + channel_colors: Optional[Union[List[List[int]], List[Optional[List[List[int]]]]] + List of rgb color values per channel or a list of lists for each image. + These must be values compatible with the OME spec. + Default: None + fs_kwargs: Dict[str, Any] + Any specific keyword arguments to pass down to the fsspec created + filesystem. + Default: {} + + Raises + ------ + ValueError: + Non-local file system URI provided. + + Examples + -------- + Write a TCZYX data set to OME-Tiff + + >>> image = numpy.ndarray([1, 10, 3, 1024, 2048]) + ... OmeTiffWriter.save(image, "file.ome.tif") + + Write data with a dimension order into OME-Tiff + + >>> image = numpy.ndarray([10, 3, 1024, 2048]) + ... OmeTiffWriter.save(image, "file.ome.tif", dim_order="ZCYX") + + Write multi-scene data to OME-Tiff, specifying channel names + + >>> image0 = numpy.ndarray([3, 10, 1024, 2048]) + ... image1 = numpy.ndarray([3, 10, 512, 512]) + ... OmeTiffWriter.save( + ... [image0, image1], + ... "file.ome.tif", + ... dim_order="CZYX", # this single value will be repeated to each image + ... channel_names=[["C00","C01","C02"],["C10","C11","C12"]] + ... ) + """ + # Resolve final destination + fs, path = biob.io.pathlike_to_fs(uri, fs_kwargs=fs_kwargs) + + # Catch non-local file system + if not isinstance(fs, LocalFileSystem): + raise ValueError( + f"Cannot write to non-local file system. " + f"Received URI: {uri}, which points to {type(fs)}." + ) + + # If metadata is attached as lists, enforce matching shape + if isinstance(data, list): + num_images = len(data) + if isinstance(dim_order, list): + if len(dim_order) != num_images: + raise biob.exceptions.ConflictingArgumentsError( + f"OmeTiffWriter received a list of arrays to use as scenes " + f"but the provided list of dimension_order is of different " + f"length. " + f"Number of provided scenes: {num_images}, " + f"Number of provided dimension strings: " + f"{len(dim_order)}" + ) + if isinstance(image_name, list): + if len(image_name) != num_images: + raise biob.exceptions.ConflictingArgumentsError( + f"OmeTiffWriter received a list of arrays to use as scenes " + f"but the provided list of image_names is of different " + f"length. " + f"Number of provided scenes: {num_images}, " + f"Number of provided dimension strings: {len(image_name)}" + ) + if isinstance(physical_pixel_sizes, list): + if len(physical_pixel_sizes) != num_images: + raise biob.exceptions.ConflictingArgumentsError( + f"OmeTiffWriter received a list of arrays to use as scenes " + f"but the provided list of image_names is of different " + f"length. " + f"Number of provided scenes: {num_images}, " + f"Number of provided dimension strings: " + f"{len(physical_pixel_sizes)}" + ) + + if channel_names is not None: + if isinstance(channel_names[0], list): + if len(channel_names) != num_images: + raise biob.exceptions.ConflictingArgumentsError( + f"OmeTiffWriter received a list of arrays to use as scenes " + f"but the provided list of channel_names is of different " + f"length. " + f"Number of provided scenes: {num_images}, " + f"Number of provided dimension strings: " + f"{len(channel_names)}" + ) + if channel_colors is not None: + if isinstance(channel_colors[0], list): + if not isinstance(channel_colors[0][0], int): + if len(channel_colors) != num_images: + raise biob.exceptions.ConflictingArgumentsError( + f"OmeTiffWriter received a list of arrays to use as " + f"scenes but the provided list of channel_colors is of " + f"different length. " + f"Number of provided scenes: {num_images}, " + f"Number of provided dimension strings: " + f"{len(channel_colors)}" + ) + + # make sure data is a list + if not isinstance(data, list): + data = [data] + num_images = len(data) + + # If metadata is attached as singles, expand to lists to match data + if dim_order is None or isinstance(dim_order, str): + dim_order = [dim_order] * num_images + if image_name is None or isinstance(image_name, str): + image_name = [image_name] * num_images + if isinstance(physical_pixel_sizes, tuple): + physical_pixel_sizes = [physical_pixel_sizes] * num_images + elif physical_pixel_sizes is None: + physical_pixel_sizes = [ + biob.types.PhysicalPixelSizes(None, None, None) + ] * num_images + if channel_names is None or isinstance(channel_names[0], str): + channel_names = [channel_names] * num_images # type: ignore + + if channel_colors is not None: + if all( + [ + ( + channel_colors[img_idx] is None + or isinstance(channel_colors[img_idx], list) + ) + for img_idx in range(num_images) + ] + ): + single_image_channel_colors_provided = False + else: + single_image_channel_colors_provided = True + + if ( + channel_colors[0] is not None + and isinstance(channel_colors[0], list) + and isinstance(channel_colors[0][0], int) + ): + single_image_channel_colors_provided = True + + if channel_colors is None or single_image_channel_colors_provided: + channel_colors = [channel_colors] * num_images # type: ignore + + xml = b"" + # try to construct OME from params + if ome_xml is None: + ome_xml = OmeTiffWriter.build_ome( + [i.shape for i in data], + [i.dtype for i in data], + channel_names=channel_names, # type: ignore + image_name=image_name, + physical_pixel_sizes=physical_pixel_sizes, + channel_colors=channel_colors, # type: ignore + dimension_order=dim_order, + ) + # else if string, then construct OME from string + elif isinstance(ome_xml, str): + ome_xml = from_xml(ome_xml) + + # if we do not have an OME object now, something is wrong + if not isinstance(ome_xml, OME): + raise TypeError( + "Unknown OME-XML metadata passed in. Use OME object, or xml string or \ + None" + ) + + # vaidate ome + for scene_index in range(num_images): + OmeTiffWriter._check_ome_dims( + ome_xml, scene_index, data[scene_index].shape, data[scene_index].dtype + ) + + # convert to string for writing + xml = to_xml(ome_xml).encode() + + # Save image to tiff! + with fs.open(path, "wb") as open_resource: + tif = tifffile.TiffWriter( + open_resource, + bigtiff=OmeTiffWriter._size_of_ndarray(data=data) > BIGTIFF_BYTE_LIMIT, + ) + + # now the heavy lifting. assemble the raw data and write it + for scene_index in range(num_images): + image_data = data[scene_index] + # Assumption: if provided a dask array to save, it can fit into memory + if isinstance(image_data, da.core.Array): + image_data = data[scene_index].compute() + + description = xml if scene_index == 0 else None + # assume if first channel is rgb then all of it is + spp = ome_xml.images[scene_index].pixels.channels[0].samples_per_pixel + is_rgb = spp is not None and spp > 1 + photometric = ( + TIFF.PHOTOMETRIC.RGB if is_rgb else TIFF.PHOTOMETRIC.MINISBLACK + ) + planarconfig = TIFF.PLANARCONFIG.CONTIG if is_rgb else None + tif.write( + image_data, + description=description, + photometric=photometric, + metadata=None, + planarconfig=planarconfig, + compression=TIFF.COMPRESSION.ADOBE_DEFLATE, + ) + + tif.close() + + @staticmethod + def _resolve_OME_dimension_order( + shape: Tuple[int, ...], dimension_order: Union[str, None] + ) -> Tuple[str, bool]: + """ + Do some dimension validation and return an ome-compatible 5D dimension order + and whether the data is rgb multisample + + Parameters + ---------- + shape: Tuple[int, ...] + A data array shape + dimension_order: Union[str, None] + A dimension order string, composed of some subset of TCZYXS + + Returns + ------- + Tuple[str, bool] + An OME-compatible 5D dimension_order string and a boolean for whether the + data shape had rgb samples + """ + ndims = len(shape) + + if ndims > 5 and (shape[-1] != 3 and shape[-1] != 4): + raise ValueError( + f"Passed in greater than 5D data but last dimension is not 3 or 4: " + f"{shape[-1]}" + ) + + if dimension_order is not None and len(dimension_order) != ndims: + raise biob.exceptions.InvalidDimensionOrderingError( + f"Dimension order string has {len(dimension_order)} dims but data " + f"shape has {ndims} dims" + ) + + # data is rgb if last dimension is S and its size is 3 or 4 + is_rgb = False + if dimension_order is None: + # we will only guess rgb here if ndims > 5 + # I could make a better guess if I look at any ome-xml passed in + is_rgb = ndims > 5 and (shape[-1] == 3 or shape[-1] == 4) + dimension_order = ( + biob.dimensions.DEFAULT_DIMENSION_ORDER_WITH_SAMPLES + if is_rgb + else biob.dimensions.DEFAULT_DIMENSION_ORDER + ) + else: + is_rgb = bool( + dimension_order[-1] == biob.dimensions.DimensionNames.Samples + ) and (shape[-1] == 3 or shape[-1] == 4) + + if (ndims > 5 and not is_rgb) or ndims > 6 or ndims < 2: + raise ValueError( + f"Data array has unexpected number of dimensions: is_rgb = {is_rgb} " + f"and shape is {shape}" + ) + + # assert valid characters in dimension_order + if not ( + all( + d in biob.dimensions.DEFAULT_DIMENSION_ORDER_LIST_WITH_SAMPLES + for d in dimension_order + ) + ): + raise biob.exceptions.InvalidDimensionOrderingError( + f"Invalid dimension_order {dimension_order}" + ) + if ( + dimension_order.find(biob.dimensions.DimensionNames.Samples) > -1 + and not is_rgb + ): + raise biob.exceptions.InvalidDimensionOrderingError( + "Samples must be last dimension if present, and only S=3 or 4 is \ + supported." + ) + if dimension_order[-2:] != "YX" and dimension_order[-3:] != "YXS": + raise biob.exceptions.InvalidDimensionOrderingError( + f"Last characters of dimension_order {dimension_order} expected to \ + be YX or YXS. Please transpose your data." + ) + + # remember whether S was a dim or not, and remove it for now + if is_rgb: + ndims = ndims - 1 + dimension_order = dimension_order[:-1] + + # expand to 5D and add appropriate dimensions + if len(dimension_order) == 2: + dimension_order = "TCZ" + dimension_order + + # expand to 5D and add appropriate dimensions + elif len(dimension_order) == 3: + # prepend either TC, TZ or CZ + if dimension_order[0] == biob.dimensions.DimensionNames.Time: + dimension_order = "CZ" + dimension_order + elif dimension_order[0] == biob.dimensions.DimensionNames.Channel: + dimension_order = "TZ" + dimension_order + elif dimension_order[0] == biob.dimensions.DimensionNames.SpatialZ: + dimension_order = "TC" + dimension_order + + # expand to 5D and add appropriate dimensions + elif len(dimension_order) == 4: + # prepend either T, C, or Z + first2 = dimension_order[:2] + if first2 == "TC" or first2 == "CT": + dimension_order = ( + biob.dimensions.DimensionNames.SpatialZ + dimension_order + ) + elif first2 == "TZ" or first2 == "ZT": + dimension_order = ( + biob.dimensions.DimensionNames.Channel + dimension_order + ) + elif first2 == "CZ" or first2 == "ZC": + dimension_order = biob.dimensions.DimensionNames.Time + dimension_order + + return str(dimension_order), is_rgb + + @staticmethod + def _size_of_ndarray(data: List[biob.types.ArrayLike]) -> int: + """ + Calculate the size of data to determine if we require bigtiff + + Parameters + ---------- + data: list of data arrays, one per image to be saved to tiff + + Returns + ------- + the total size of data in bytes + """ + size = 0 + for i in range(len(data)): + size += data[i].size * data[i].itemsize + return size + + @staticmethod + def _extend_data_shape(shape: Tuple[int, ...], num_dims: int) -> Tuple[int, ...]: + # extend data shape to be same len as dimension_order + if len(shape) < num_dims: + shape = tuple([1] * (num_dims - len(shape))) + shape + return shape + + @staticmethod + def _build_ome_image( + image_index: int = 0, + tiff_plane_offset: int = 0, + data_shape: Tuple[int, ...] = (1, 1, 1, 1, 1), + data_dtype: np.dtype = np.dtype(np.uint8), + is_rgb: bool = False, + dimension_order: str = biob.dimensions.DEFAULT_DIMENSION_ORDER, + image_name: Optional[str] = "I0", + physical_pixel_sizes: PhysicalPixelSizes = PhysicalPixelSizes(None, None, None), + channel_names: Optional[List[str]] = None, + channel_colors: Optional[List[List[int]]] = None, + ) -> Image: + if len(data_shape) < 2 or len(data_shape) > 6: + raise ValueError(f"Bad OME image shape length: {data_shape}") + + # extend data shape to be same len as dimension_order, accounting for rgb + if is_rgb: + data_shape = OmeTiffWriter._extend_data_shape( + data_shape, len(dimension_order) + 1 + ) + else: + data_shape = OmeTiffWriter._extend_data_shape( + data_shape, len(dimension_order) + ) + + def dim_or_1(dim: str) -> int: + idx = dimension_order.find(dim) + return 1 if idx == -1 else data_shape[idx] + + channel_count = dim_or_1(biob.dimensions.DimensionNames.Channel) + + if len(dimension_order) != 5: + raise ValueError(f"Unrecognized OME TIFF dimension order {dimension_order}") + for c in dimension_order: + if c not in biob.dimensions.DEFAULT_DIMENSION_ORDER: + raise ValueError(f"Unrecognized OME TIFF dimension {c}") + if isinstance(channel_names, list) and len(channel_names) != channel_count: + raise ValueError(f"Wrong number of channel names {len(channel_names)}") + if isinstance(channel_colors, list) and len(channel_colors) != channel_count: + raise ValueError( + f"Wrong number of channel colors. " + f"Received: {len(channel_colors)} ({channel_colors}) " + f"Expected: {channel_count}." + ) + + samples_per_pixel = 1 + if is_rgb: + samples_per_pixel = data_shape[-1] + + # dimension_order must be set to the *reverse* of what dimensionality + # the ome tif file is saved as + pixels = Pixels( + id=f"Pixels:{image_index}:0", + dimension_order=dimension_order[::-1], + type=dtype_to_ome_type(data_dtype), + size_t=dim_or_1(biob.dimensions.DimensionNames.Time), + size_c=channel_count * samples_per_pixel, + size_z=dim_or_1(biob.dimensions.DimensionNames.SpatialZ), + size_y=dim_or_1(biob.dimensions.DimensionNames.SpatialY), + size_x=dim_or_1(biob.dimensions.DimensionNames.SpatialX), + interleaved=True if samples_per_pixel > 1 else None, + ) + if physical_pixel_sizes.Z is None or physical_pixel_sizes.Z == 0: + pixels.physical_size_z = None + else: + pixels.physical_size_z = PositiveFloat(physical_pixel_sizes.Z) + if physical_pixel_sizes.Y is None or physical_pixel_sizes.Y == 0: + pixels.physical_size_y = None + else: + pixels.physical_size_y = PositiveFloat(physical_pixel_sizes.Y) + if physical_pixel_sizes.X is None or physical_pixel_sizes.X == 0: + pixels.physical_size_x = None + else: + pixels.physical_size_x = PositiveFloat(physical_pixel_sizes.X) + + # one single tiffdata indicating sequential tiff IFDs based on dimension_order + pixels.tiff_data_blocks = [ + TiffData( + plane_count=pixels.size_t * channel_count * pixels.size_z, + ifd=tiff_plane_offset, + ) + ] + + pixels.channels = [ + Channel(samples_per_pixel=samples_per_pixel) for i in range(channel_count) + ] + if channel_names is None: + for i in range(channel_count): + pixels.channels[i].id = ChannelID( + generate_ome_channel_id(str(image_index), i) + ) + pixels.channels[i].name = "C:" + str(i) + else: + for i in range(channel_count): + name = channel_names[i] + pixels.channels[i].id = ChannelID( + generate_ome_channel_id(str(image_index), i) + ) + pixels.channels[i].name = name + + if channel_colors is not None: + assert len(channel_colors) >= pixels.size_c + for i in range(channel_count): + this_channel_color_def = channel_colors[i] + if len(this_channel_color_def) != 3: + raise ValueError( + f"Expected RGB (3) color definition for channel color. " + f"Received {len(this_channel_color_def)} values " + f"({this_channel_color_def}) for image {image_index} " + f"channel {i}." + ) + else: + # Handle List[int] -> Tuple[int, int, int] for color def + # Naive cast of tuple(List[int]) generates type: Tuple[int, ...] + this_channel_color = ( + this_channel_color_def[0], + this_channel_color_def[1], + this_channel_color_def[2], + ) + + pixels.channels[i].color = Color(this_channel_color) + + img = Image( + name=image_name, + id=generate_ome_image_id(str(image_index)), + pixels=pixels, + ) + return img + + @staticmethod + def build_ome( + data_shapes: List[Tuple[int, ...]], + data_types: List[np.dtype], + dimension_order: Optional[List[Optional[str]]] = None, + channel_names: Optional[List[Optional[List[str]]]] = None, + image_name: Optional[List[Optional[str]]] = None, + physical_pixel_sizes: Optional[List[biob.types.PhysicalPixelSizes]] = None, + channel_colors: Optional[List[Optional[List[List[int]]]]] = None, + ) -> OME: + """ + + Create the necessary metadata for an OME tiff image + + Parameters + ---------- + data_shapes: + A list of 5- or 6-d tuples + data_types: + A list of data types + dimension_order: + The order of dimensions in the data array, using + T,C,Z,Y,X and optionally S + channel_names: + The names for each channel to be put into the OME metadata + image_name: + The name of the image to be put into the OME metadata + physical_pixel_sizes: + Z,Y, and X physical dimensions of each pixel, + defaulting to microns + channel_colors: + List of all images channel colors to be put into the OME metadata + is_rgb: + is a S dimension present? S is expected to be the last dim in + the data shape + + Returns + ------- + OME + An OME object that can be converted to a valid OME-XML string + """ + num_images = len(data_shapes) + # resolve defaults that are None + if dimension_order is None: + dimension_order = [None] * num_images + if channel_names is None: + channel_names = [None] * num_images + if image_name is None: + image_name = [None] * num_images + if physical_pixel_sizes is None: + physical_pixel_sizes = [ + biob.types.PhysicalPixelSizes(None, None, None) + ] * num_images + if channel_colors is None: + channel_colors = [None] * num_images + + # assert all lists are same length + if ( + num_images != len(data_types) + or num_images != len(dimension_order) + or num_images != len(channel_names) + or num_images != len(image_name) + or num_images != len(physical_pixel_sizes) + or num_images != len(channel_colors) + ): + raise ValueError("Mismatched array counts in parameters to build_ome") + + images = [] + tiff_plane_offset = 0 + for image_index in range(len(data_shapes)): + # correct the dimension_order for ome + ome_dimension_order, is_rgb = OmeTiffWriter._resolve_OME_dimension_order( + data_shapes[image_index], dimension_order[image_index] + ) + img = OmeTiffWriter._build_ome_image( + image_index, + tiff_plane_offset, + data_shapes[image_index], + data_types[image_index], + is_rgb, + ome_dimension_order, + image_name[image_index], + physical_pixel_sizes[image_index], + channel_names[image_index], + channel_colors[image_index], + ) + # increment tiff_plane_offset for next image + tiff_plane_offset += ( + img.pixels.size_z * img.pixels.size_t * len(img.pixels.channels) + ) + images.append(img) + + ome_object = OME( + creator=f"bioio {importlib.metadata.version('bioio')}", images=images + ) + + # validate! (TODO: Is there a better api in ome-types for this?) + test = to_xml(ome_object) + from_xml(test) + + return ome_object + + @staticmethod + def _check_ome_dims( + ome_xml: OME, image_index: int, data_shape: Tuple, data_dtype: np.dtype + ) -> None: + if len(ome_xml.images) < 1: + raise ValueError("OME has no images") + + # look at number of samples from first channel only (possible bad assumption) + samples = ome_xml.images[image_index].pixels.channels[0].samples_per_pixel + + # reverse the OME dimension order to compare against numpy shape + dimension_order = ome_xml.images[image_index].pixels.dimension_order.value[::-1] + dims = { + biob.dimensions.DimensionNames.Time: ome_xml.images[ + image_index + ].pixels.size_t, + biob.dimensions.DimensionNames.Channel: ome_xml.images[ + image_index + ].pixels.size_c, + biob.dimensions.DimensionNames.SpatialZ: ome_xml.images[ + image_index + ].pixels.size_z, + biob.dimensions.DimensionNames.SpatialY: ome_xml.images[ + image_index + ].pixels.size_y, + biob.dimensions.DimensionNames.SpatialX: ome_xml.images[ + image_index + ].pixels.size_x, + } + if samples is not None and samples > 1: + dims[biob.dimensions.DimensionNames.Channel] = PositiveInt( + len(ome_xml.images[image_index].pixels.channels) + ) + dims[biob.dimensions.DimensionNames.Samples] = samples + dimension_order += biob.dimensions.DimensionNames.Samples + + expected_shape = tuple(dims[i] for i in dimension_order) + data_shape = OmeTiffWriter._extend_data_shape(data_shape, len(dimension_order)) + if expected_shape != data_shape: + raise ValueError( + f"OME shape {expected_shape} is not the same as data array shape: \ + {data_shape}" + ) + + expected_type = ome_to_numpy_dtype(ome_xml.images[image_index].pixels.type) + if expected_type != data_dtype: + raise ValueError( + f"OME pixel type {expected_type.name} is not the same as data \ + array type: {data_dtype.name}" + ) diff --git a/bioio/writers/ome_zarr_writer.py b/bioio/writers/ome_zarr_writer.py new file mode 100644 index 0000000..132c495 --- /dev/null +++ b/bioio/writers/ome_zarr_writer.py @@ -0,0 +1,356 @@ +import math +from typing import Dict, List, Optional, Tuple + +import bioio_base as biob +import zarr +from ome_zarr.io import parse_url +from ome_zarr.scale import Scaler +from ome_zarr.writer import write_image +from zarr.storage import default_compressor + +from ..ome_utils import generate_ome_channel_id + + +class OmeZarrWriter: + def __init__(self, uri: biob.types.PathLike): + """ + Constructor. + + Parameters + ---------- + uri: biob.types.PathLike + The URI or local path for where to save the data. + """ + # Resolve final destination + fs, path = biob.io.pathlike_to_fs(uri) + + # Save image to zarr store! + self.store = parse_url(uri, mode="w").store + self.root_group = zarr.group(store=self.store) + + @staticmethod + def build_ome( + size_z: int, + image_name: str, + channel_names: List[str], + channel_colors: List[int], + channel_minmax: List[Tuple[float, float]], + ) -> Dict: + """ + Create the omero metadata for an OME zarr image + + Parameters + ---------- + size_z: + Number of z planes + image_name: + The name of the image + channel_names: + The names for each channel + channel_colors: + List of all channel colors + channel_minmax: + List of all (min, max) pairs of channel intensities + + Returns + ------- + Dict + An "omero" metadata object suitable for writing to ome-zarr + """ + ch = [] + for i in range(len(channel_names)): + ch.append( + { + "active": True, + "coefficient": 1, + "color": f"{channel_colors[i]:06x}", + "family": "linear", + "inverted": False, + "label": channel_names[i], + "window": { + "end": float(channel_minmax[i][1]), + "max": float(channel_minmax[i][1]), + "min": float(channel_minmax[i][0]), + "start": float(channel_minmax[i][0]), + }, + } + ) + + omero = { + "id": 1, # ID in OMERO + "name": image_name, # Name as shown in the UI + "version": "0.4", # Current version + "channels": ch, + "rdefs": { + "defaultT": 0, # First timepoint to show the user + "defaultZ": size_z // 2, # First Z section to show the user + "model": "color", # "color" or "greyscale" + }, + # TODO: can we add more metadata here? + # # from here down this is all extra and not part of the ome-zarr spec + # "meta": { + # "projectDescription": "20+ lines of gene edited cells etc", + # "datasetName": "aics_hipsc_v2020.1", + # "projectId": 2, + # "imageDescription": "foo bar", + # "imageTimestamp": 1277977808.0, + # "imageId": 12, + # "imageAuthor": "danielt", + # "imageName": "AICS-12_143.ome.tif", + # "datasetDescription": "variance dataset after QC", + # "projectName": "aics cell variance project", + # "datasetId": 3 + # }, + } + return omero + + def write_image( + self, + # TODO how to pass in precomputed multiscales? + image_data: biob.types.ArrayLike, # must be 3D, 4D or 5D + image_name: str, + physical_pixel_sizes: Optional[biob.types.PhysicalPixelSizes], + channel_names: Optional[List[str]], + channel_colors: Optional[List[int]], + scale_num_levels: int = 1, + scale_factor: float = 2.0, + dimension_order: Optional[str] = None, + ) -> None: + """ + Write a data array to a file. + NOTE that this API is not yet finalized and will change in the future. + + Parameters + ---------- + image_data: biob.types.ArrayLike + The array of data to store. Data arrays must have 2 to 6 dimensions. If a + list is provided, then it is understood to be multiple images written to the + ome-tiff file. All following metadata parameters will be expanded to the + length of this list. + image_name: str + string representing the name of the image + physical_pixel_sizes: Optional[biob.types.PhysicalPixelSizes] + PhysicalPixelSizes object representing the physical pixel sizes in Z, Y, X + in microns. + Default: None + channel_names: Optional[List[str]] + Lists of strings representing the names of the data channels + Default: None + If None is given, the list will be generated as a 0-indexed list of strings + of the form "Channel:image_index:channel_index" + channel_colors: Optional[List[int]] + List of rgb color values per channel or a list of lists for each image. + These must be values compatible with the OME spec. + Default: None + scale_num_levels: Optional[int] + Number of pyramid levels to use for the image. + Default: 1 (represents no downsampled levels) + scale_factor: Optional[float] + The scale factor to use for the image. Only active if scale_num_levels > 1. + Default: 2.0 + dimension_order: Optional[str] + The dimension order of the data. If None is given, the dimension order will + be guessed from the number of dimensions in the data according to TCZYX + order. + + Examples + -------- + Write a TCZYX data set to OME-Zarr + + >>> image = numpy.ndarray([1, 10, 3, 1024, 2048]) + ... writer = OmeZarrWriter("/path/to/file.ome.zarr") + ... writer.write_image(image) + + Write multi-scene data to OME-Zarr, specifying channel names + + >>> image0 = numpy.ndarray([3, 10, 1024, 2048]) + ... image1 = numpy.ndarray([3, 10, 512, 512]) + ... writer = OmeZarrWriter("/path/to/file.ome.zarr") + ... writer.write_image(image0, "Image:0", ["C00","C01","C02"]) + ... writer.write_image(image1, "Image:1", ["C10","C11","C12"]) + """ + ndims = len(image_data.shape) + if ndims < 2 or ndims > 5: + raise biob.exceptions.InvalidDimensionOrderingError( + f"Image data must have 2, 3, 4, or 5 dimensions. " + f"Received image data with shape: {image_data.shape}" + ) + if dimension_order is None: + dimension_order = "TCZYX"[-ndims:] + if len(dimension_order) != ndims: + raise biob.exceptions.InvalidDimensionOrderingError( + f"Dimension order {dimension_order} does not match data " + f"shape: {image_data.shape}" + ) + if (len(set(dimension_order) - set("TCZYX")) > 0) or len( + dimension_order + ) != len(set(dimension_order)): + raise biob.exceptions.InvalidDimensionOrderingError( + f"Dimension order {dimension_order} is invalid or " + "contains unexpected dimensions. Only TCZYX currently supported." + ) + xdimindex = dimension_order.find("X") + ydimindex = dimension_order.find("Y") + zdimindex = dimension_order.find("Z") + cdimindex = dimension_order.find("C") + if cdimindex > min(i for i in [xdimindex, ydimindex, zdimindex] if i > -1): + raise biob.exceptions.InvalidDimensionOrderingError( + f"Dimension order {dimension_order} is invalid. Channel dimension " + "must be before X, Y, and Z." + ) + + if physical_pixel_sizes is None: + pixelsizes = (1.0, 1.0, 1.0) + else: + pixelsizes = ( + physical_pixel_sizes.Z if physical_pixel_sizes.Z is not None else 1.0, + physical_pixel_sizes.Y if physical_pixel_sizes.Y is not None else 1.0, + physical_pixel_sizes.X if physical_pixel_sizes.X is not None else 1.0, + ) + if channel_names is None: + # TODO this isn't generating a very pretty looking name but it will be + # unique + channel_names = ( + [ + generate_ome_channel_id(image_id=image_name, channel_id=i) + for i in range(image_data.shape[cdimindex]) + ] + if cdimindex > -1 + else [generate_ome_channel_id(image_id=image_name, channel_id=0)] + ) + if channel_colors is None: + # TODO generate proper colors or confirm that the underlying lib can handle + # None + channel_colors = ( + [i for i in range(image_data.shape[cdimindex])] + if cdimindex > -1 + else [0] + ) + scale_dim_map = { + "T": 1.0, + "C": 1.0, + "Z": pixelsizes[0], + "Y": pixelsizes[1], + "X": pixelsizes[2], + } + transforms = [ + [ + # the voxel size for the first scale level + { + "type": "scale", + "scale": [scale_dim_map[d] for d in dimension_order], + } + ] + ] + # TODO precompute sizes for downsampled also. + plane_size = ( + image_data.shape[xdimindex] + * image_data.shape[ydimindex] + * image_data.itemsize + ) + target_chunk_size = 16 * (1024 * 1024) # 16 MB + # this is making an assumption of chunking whole XY planes. + # TODO allow callers to configure chunk dims? + nplanes_per_chunk = int(math.ceil(target_chunk_size / plane_size)) + nplanes_per_chunk = ( + min(nplanes_per_chunk, image_data.shape[zdimindex]) if zdimindex > -1 else 1 + ) + chunk_dim_map = { + "T": 1, + "C": 1, + "Z": nplanes_per_chunk, + "Y": image_data.shape[ydimindex], + "X": image_data.shape[xdimindex], + } + chunk_dims = [ + dict( + chunks=tuple(chunk_dim_map[d] for d in dimension_order), + compressor=default_compressor, + ) + ] + lasty = image_data.shape[ydimindex] + lastx = image_data.shape[xdimindex] + # TODO scaler might want to use different method for segmentations than raw + # TODO allow custom scaler or pre-computed multiresolution levels + if scale_num_levels > 1: + # TODO As of this writing, this Scaler is not the most general + # implementation (it does things by xy plane) but it's code already + # written that also works with dask, so it's a good starting point. + scaler = Scaler() + scaler.method = "nearest" + scaler.max_layer = scale_num_levels - 1 + scaler.downscale = scale_factor if scale_factor is not None else 2 + for i in range(scale_num_levels - 1): + scale_dim_map["Y"] *= scaler.downscale + scale_dim_map["X"] *= scaler.downscale + transforms.append( + [ + { + "type": "scale", + "scale": [scale_dim_map[d] for d in dimension_order], + } + ] + ) + lasty = int(math.ceil(lasty / scaler.downscale)) + lastx = int(math.ceil(lastx / scaler.downscale)) + plane_size = lasty * lastx * image_data.itemsize + nplanes_per_chunk = int(math.ceil(target_chunk_size / plane_size)) + nplanes_per_chunk = ( + min(nplanes_per_chunk, image_data.shape[zdimindex]) + if zdimindex > -1 + else 1 + ) + chunk_dim_map["Z"] = nplanes_per_chunk + chunk_dim_map["Y"] = lasty + chunk_dim_map["X"] = lastx + chunk_dims.append( + dict( + chunks=tuple(chunk_dim_map[d] for d in dimension_order), + compressor=default_compressor, + ) + ) + else: + scaler = None + + # try to construct per-image metadata + ome_json = OmeZarrWriter.build_ome( + image_data.shape[zdimindex] if zdimindex > -1 else 1, + image_name, + channel_names=channel_names, # type: ignore + channel_colors=channel_colors, # type: ignore + # This can be slow if computed here. + # TODO: Rely on user to supply the per-channel min/max. + channel_minmax=[ + (0.0, 1.0) + for i in range(image_data.shape[cdimindex] if cdimindex > -1 else 1) + ], + ) + # TODO user supplies units? + dim_to_axis = { + "T": {"name": "t", "type": "time", "unit": "millisecond"}, + "C": {"name": "c", "type": "channel"}, + "Z": {"name": "z", "type": "space", "unit": "micrometer"}, + "Y": {"name": "y", "type": "space", "unit": "micrometer"}, + "X": {"name": "x", "type": "space", "unit": "micrometer"}, + } + + axes = [dim_to_axis[d] for d in dimension_order] + + # TODO image name must be unique within this root group + group = self.root_group # .create_group(image_name, overwrite=True) + group.attrs["omero"] = ome_json + + write_image( + image=image_data, + group=group, + scaler=scaler, + axes=axes, + # For each resolution, we have a List of transformation Dicts (not + # validated). Each list of dicts are added to each datasets in order. + coordinate_transformations=transforms, + # Options to be passed on to the storage backend. A list would need to + # match the number of datasets in a multiresolution pyramid. One can + # provide different chunk size for each level of a pyramid using this + # option. + storage_options=chunk_dims, + ) diff --git a/bioio/writers/timeseries_writer.py b/bioio/writers/timeseries_writer.py new file mode 100644 index 0000000..2b85656 --- /dev/null +++ b/bioio/writers/timeseries_writer.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Any, Dict, Optional + +import bioio_base as biob +import dask.array as da +import numpy as np +from fsspec.implementations.local import LocalFileSystem +from imageio import get_writer + +from .two_d_writer import TwoDWriter +from .writer import Writer + +############################################################################### + + +class TimeseriesWriter(Writer): + """ + A writer for timeseries Greyscale, RGB, or RGBA image data. + Primarily directed at formats: "gif", "mp4", "mkv", etc. + + Notes + ----- + To use this writer, install with: `pip install aicsimageio[base-imageio]`. + """ + + _TIMEPOINT_DIMENSIONS = [ + biob.dimensions.DimensionNames.Time, + biob.dimensions.DimensionNames.SpatialY, + biob.dimensions.DimensionNames.SpatialX, + ] + _TIMEPOINT_WITH_SAMPLES_DIMENSIONS = _TIMEPOINT_DIMENSIONS + [ + biob.dimensions.DimensionNames.Samples + ] + + DIM_ORDERS = { + 3: "".join(_TIMEPOINT_DIMENSIONS), # Greyscale + 4: "".join(_TIMEPOINT_WITH_SAMPLES_DIMENSIONS), # RGB / RGBA + } + + @staticmethod + def _write_chunks( + f: str, + extension: str, + imageio_mode: str, + fps: int, + data: da.Array, + dim_order: str, + ) -> None: + with get_writer( + f, + format=extension, + mode=imageio_mode, + fps=fps, + ) as writer: + # Make each chunk of the dask array be a frame + chunks = tuple( + 1 if dim == biob.dimensions.DimensionNames.Time else -1 + for dim in dim_order + ) + data = data.rechunk(chunks) + + # Save each frame + for block in data.blocks: + # Need to squeeze to remove the singleton T dimension + writer.append_data(da.squeeze(block).compute()) + + @staticmethod + def save( + data: biob.types.ArrayLike, + uri: biob.types.PathLike, + dim_order: Optional[str] = None, + fps: int = 24, + fs_kwargs: Dict[str, Any] = {}, + **kwargs: Any, + ) -> None: + """ + Write a data array to a file. + + Parameters + ---------- + data: biob.types.ArrayLike + The array of data to store. Data must have either three or four dimensions. + uri: biob.types.PathLike + The URI or local path for where to save the data. + dim_order: str + The dimension order of the provided data. + Default: None. Based off the number of dimensions, will assume + the dimensions -- three dimensions: TYX and four dimensions: TYXS. + fps: int + Frames per second to attach as metadata. + Default: 24 + fs_kwargs: Dict[str, Any] + Any specific keyword arguments to pass down to the fsspec created + filesystem. + Default: {} + + Examples + -------- + Data is the correct shape and dimension order + + >>> image = dask.array.random.random((50, 100, 100)) + ... TimeseriesWriter.save(image, "file.gif") + + Data provided with current dimension order + + >>> image = numpy.random.rand(100, 3, 1024, 2048) + ... TimeseriesWriter.save(image, "file.mkv", "TSYX") + + Save to remote + + >>> image = numpy.random.rand(300, 100, 100, 3) + ... TimeseriesWriter.save(image, "s3://my-bucket/file.png") + + Raises + ------ + IOError + Cannot write FFMPEG formats to remote storage. + + Notes + ----- + This writer can also be useful when wanting to create a timeseries image using + a non-time dimension. For example, creating a timeseries image where each frame + is a Z-plane from a source volumetric image as seen below. + + >>> image = AICSImageIO("some_z_stack.ome.tiff") + ... TimeseriesWriter.save( + ... data=image.get_image_data("ZYX", T=0, C=0), + ... uri="some_z_stack.mp4", + ... # Overloading the Z dimension as the Time dimension + ... # Technically not needed as it would have been assumed due to three dim + ... dim_order="TYX", + ... ) + + """ + # Check unpack uri and extension + fs, path = biob.io.pathlike_to_fs(uri, fs_kwargs=fs_kwargs) + ( + extension, + imageio_mode, + ) = TwoDWriter.get_extension_and_mode(path) + + # Convert to dask array to make downstream usage of data have a consistent API + if isinstance(data, np.ndarray): + data = da.from_array(data) + + # Shorthand num dimensions + n_dims = len(data.shape) + + # Check num dimensions + if n_dims not in TimeseriesWriter.DIM_ORDERS: + raise biob.exceptions.UnexpectedShapeError( + f"TimeseriesWriter requires that data must have either 3 or 4 " + f"dimensions. Provided data with {n_dims} dimensions. ({data.shape})" + ) + + # Assume dim order if not provided + if dim_order is None: + dim_order = TimeseriesWriter.DIM_ORDERS[n_dims] + + # Uppercase dim order + dim_order = dim_order.upper() + + # Check dimensions provided in the dim order string are all T, Y, X, or S + if any( + [ + dim not in TimeseriesWriter._TIMEPOINT_WITH_SAMPLES_DIMENSIONS + for dim in dim_order + ] + ): + raise biob.exceptions.InvalidDimensionOrderingError( + f"The dim_order parameter only accepts dimensions: " + f"{TimeseriesWriter._TIMEPOINT_WITH_SAMPLES_DIMENSIONS}. " + f"Provided dim_order string: '{dim_order}'." + ) + + # Transpose dimensions if dim_order not ready for imageio + if dim_order != TimeseriesWriter.DIM_ORDERS[n_dims]: + # Actual reshape of the data + data = biob.transforms.reshape_data( + data, + given_dims=dim_order, + return_dims=TimeseriesWriter.DIM_ORDERS[n_dims], + ) + + # Set dim order to updated order + dim_order = TimeseriesWriter.DIM_ORDERS[n_dims] + + # Handle FFMPEG formats + if extension in TwoDWriter.FFMPEG_FORMATS: + # FFMPEG can only handle local files + # https://github.com/imageio/imageio-ffmpeg/issues/28#issuecomment-566012783 + if not isinstance(fs, LocalFileSystem): + raise IOError( + f"Can only write to local files for formats: " + f"{TwoDWriter.FFMPEG_FORMATS}." + ) + + # Else, write with local + TimeseriesWriter._write_chunks( + f=path, + extension=extension, + imageio_mode=imageio_mode, + fps=fps, + data=data, + dim_order=dim_order, + ) + + # Handle all non-ffmpeg formats + else: + with fs.open(path, "wb") as open_resource: + TimeseriesWriter._write_chunks( + f=open_resource, + extension=extension, + imageio_mode=imageio_mode, + fps=fps, + data=data, + dim_order=dim_order, + ) diff --git a/bioio/writers/two_d_writer.py b/bioio/writers/two_d_writer.py new file mode 100644 index 0000000..eca56b0 --- /dev/null +++ b/bioio/writers/two_d_writer.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Any, Dict, Optional, Tuple + +import bioio_base as biob +import dask.array as da +from imageio import get_writer + +from .writer import Writer + +############################################################################### + + +class TwoDWriter(Writer): + """ + A writer for image data is only 2 dimension with samples (RGB / RGBA) optional. + Primarily directed at formats: "png", "jpg", etc. + + This is primarily a passthrough to imageio.imwrite. + + Notes + ----- + To use this writer, install with: `pip install aicsimageio[base-imageio]`. + """ + + _PLANE_DIMENSIONS = [ + biob.dimensions.DimensionNames.SpatialY, + biob.dimensions.DimensionNames.SpatialX, + ] + _PLANE_WITH_SAMPLES_DIMENSIONS = _PLANE_DIMENSIONS + [ + biob.dimensions.DimensionNames.Samples + ] + + DIM_ORDERS = { + 2: "".join(_PLANE_DIMENSIONS), # Greyscale + 3: "".join(_PLANE_WITH_SAMPLES_DIMENSIONS), # RGB / RGBA + } + FFMPEG_FORMATS = ["mov", "avi", "mpg", "mpeg", "mp4", "mkv", "wmv", "ogg"] + + @staticmethod + def get_extension_and_mode(path: str) -> Tuple[str, str]: + """ + Provided a path to a file, provided back the extension (format) of the file + and the imageio read mode. + + Parameters + ---------- + path: str + The file to provide extension and mode info for. + + Returns + ------- + extension: str + The extension (a naive guess at the format) of the file. + mode: str + The imageio read mode to use for image reading. + """ + # Select extension to handle special formats + extension = path.split(".")[-1] + + # Set mode to many-image reading if FFMPEG format was provided + # https://imageio.readthedocs.io/en/stable/userapi.html#imageio.get_reader + if extension in TwoDWriter.FFMPEG_FORMATS: + mode = "I" + # Otherwise, have imageio infer the mode + else: + mode = "?" + + return extension, mode + + @staticmethod + def save( + data: biob.types.ArrayLike, + uri: biob.types.PathLike, + dim_order: Optional[str] = None, + fs_kwargs: Dict[str, Any] = {}, + **kwargs: Any, + ) -> None: + """ + Write a data array to a file. + + Parameters + ---------- + data: types.ArrayLike + The array of data to store. Data must have either two or three dimensions. + uri: types.PathLike + The URI or local path for where to save the data. + dim_order: str + The dimension order of the provided data. + Default: None. Based off the number of dimensions, will assume + the dimensions similar to how + aicsimageio.readers.default_reader.DefaultReader reads in + data. That is, two dimensions: YX and three dimensions: YXS. + fs_kwargs: Dict[str, Any] + Any specific keyword arguments to pass down to the fsspec created + filesystem. + Default: {} + + Examples + -------- + Data is the correct shape and dimension order + + >>> image = dask.array.random.random((100, 100, 4)) + ... TwoDWriter.save(image, "file.png") + + Data provided with current dimension order + + >>> image = numpy.random.rand(3, 1024, 2048) + ... TwoDWriter.save(image, "file.png", "SYX") + + Save to remote + + >>> image = numpy.random.rand(100, 100, 3) + ... TwoDWriter.save(image, "s3://my-bucket/file.png") + """ + # Check unpack uri and extension + fs, path = biob.io.pathlike_to_fs(uri, fs_kwargs=fs_kwargs) + ( + extension, + imageio_mode, + ) = TwoDWriter.get_extension_and_mode(path) + + # Assumption: if provided a dask array to save, it can fit into memory + if isinstance(data, da.core.Array): + data = data.compute() + + # Shorthand num dimensions + n_dims = len(data.shape) + + # Check num dimensions + if n_dims not in TwoDWriter.DIM_ORDERS: + raise biob.exceptions.UnexpectedShapeError( + f"TwoDWriter requires that data must have either 2 or 3 dimensions. " + f"Provided data with {n_dims} dimensions. ({data.shape})" + ) + + # Assume dim order if not provided + if dim_order is None: + dim_order = TwoDWriter.DIM_ORDERS[n_dims] + + # Uppercase dim order + dim_order = dim_order.upper() + + # Check dimensions provided in the dim order string are all Y, X, or S + if any( + [dim not in TwoDWriter._PLANE_WITH_SAMPLES_DIMENSIONS for dim in dim_order] + ): + raise biob.exceptions.InvalidDimensionOrderingError( + f"The dim_order parameter only accepts dimensions: " + f"{TwoDWriter._PLANE_WITH_SAMPLES_DIMENSIONS}. " + f"Provided dim_order string: '{dim_order}'." + ) + + # Transpose dimensions if dim_order not ready for imageio + if dim_order != TwoDWriter.DIM_ORDERS[n_dims]: + data = biob.transforms.reshape_data( + data, given_dims=dim_order, return_dims=TwoDWriter.DIM_ORDERS[n_dims] + ) + + # Save image + with fs.open(path, "wb") as open_resource: + with get_writer( + open_resource, + format=extension, + mode=imageio_mode, + ) as writer: + writer.append_data(data) diff --git a/bioio/writers/writer.py b/bioio/writers/writer.py new file mode 100644 index 0000000..b759b52 --- /dev/null +++ b/bioio/writers/writer.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import abc +import typing + +import bioio_base as biob + +############################################################################### + + +class Writer(abc.ABC): + """ + A small class to build standardized image writer functions. + """ + + @staticmethod + @abc.abstractmethod + def save( + data: biob.types.ArrayLike, + uri: biob.types.PathLike, + dim_order: str = biob.dimensions.DEFAULT_DIMENSION_ORDER, + **kwargs: typing.Any, + ) -> None: + """ + Write a data array to a file. + + Parameters + ---------- + data: types.ArrayLike + The array of data to store. + uri: types.PathLike + The URI or local path for where to save the data. + dim_order: str + The dimension order of the data. + + Examples + -------- + >>> image = numpy.ndarray([1, 10, 3, 1024, 2048]) + ... DerivedWriter.save(image, "file.ome.tif", "TCZYX") + + >>> image = dask.array.ones((4, 100, 100)) + ... DerivedWriter.save(image, "file.png", "CYX") + """ + # There are no requirements for n-dimensions of data. + # The data provided can be 2D - ND. diff --git a/pyproject.toml b/pyproject.toml index 152fab3..256544d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,15 @@ classifiers = [ dynamic = ["version"] dependencies = [ "bioio-base>=0.1.1", + "dask[array]>=2021.4.1", + "fsspec>=2022.8.0", + "imageio[ffmpeg]>=2.11.0,<2.28.0", + "numpy>=1.16.0,<2.0.0", + "ome-types[lxml]>=0.4.0", + "ome-zarr>=0.6.1", "semver>=3.0.1", + "tifffile>=2021.8.30", + "zarr>=2.6.0,<3.0.0", ] [project.urls] diff --git a/scripts/TEST_RESOURCES_HASH.txt b/scripts/TEST_RESOURCES_HASH.txt deleted file mode 100644 index c9596f9..0000000 --- a/scripts/TEST_RESOURCES_HASH.txt +++ /dev/null @@ -1 +0,0 @@ -4e3100e86a7fb0a6d1d6f38d1bd04b98165dbcea2323a8130bf16525a532d65d diff --git a/scripts/download_test_resources.py b/scripts/download_test_resources.py deleted file mode 100644 index 89e8eac..0000000 --- a/scripts/download_test_resources.py +++ /dev/null @@ -1,118 +0,0 @@ -# TODO: The way test files are downloaded needs to be looked into. At the very least -# test files for this repository need to be given their own S3 bucket or folder - -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -import logging -import sys -import traceback -from pathlib import Path - -from quilt3 import Package - -############################################################################### - -logging.basicConfig( - level=logging.INFO, - format="[%(levelname)4s: %(module)s:%(lineno)4s %(asctime)s] %(message)s", -) -log = logging.getLogger(__name__) - -############################################################################### -# Args - -EXTREMELY_LAZY_TEMPORARY_TEST_FILE_LIST = [ - "example.txt" -] - - -class Args(argparse.Namespace): - def __init__(self): - self.__parse() - - def __parse(self): - # Setup parser - p = argparse.ArgumentParser( - prog="download_test_resources", - description=( - "Download files used for testing this project. This will download " - "all the required test resources and place them in the " - "`tests/resources` directory." - ), - ) - - # Arguments - p.add_argument( - "--top-hash", - # Generated package hash from upload_test_resources - default=None, - help=( - "A specific version of the package to retrieve. " - "If none, will read from the TEST_RESOURCES_HASH.txt file." - ), - ) - p.add_argument( - "--debug", - action="store_true", - help="Show traceback if the script were to fail.", - ) - - # Parse - p.parse_args(namespace=self) - - -############################################################################### -# Build package - - -def download_test_resources(args: Args): - # Try running the download pipeline - try: - # Get test resources dir - resources_dir = ( - Path(__file__).parent.parent / "bioio" / "tests" / "resources" - ).resolve() - resources_dir.mkdir(exist_ok=True) - - # Use or read top hash - if args.top_hash is None: - with open(Path(__file__).parent / "TEST_RESOURCES_HASH.txt", "r") as f: - top_hash = f.readline().rstrip() - else: - top_hash = args.top_hash - - log.info(f"Downloading test resources using top hash: {top_hash}") - - # Get quilt package - package = Package.browse( - "aicsimageio/test_resources", - "s3://aics-modeling-packages-test-resources", - top_hash=top_hash, - ) - - # Download - log.info(f"Downloading {len(EXTREMELY_LAZY_TEMPORARY_TEST_FILE_LIST)} files") - for file_name in EXTREMELY_LAZY_TEMPORARY_TEST_FILE_LIST: - package["resources"][file_name].fetch(resources_dir / file_name) - - log.info(f"Completed package download.") - - # Catch any exception - except Exception as e: - log.error("=============================================") - if args.debug: - log.error("\n\n" + traceback.format_exc()) - log.error("=============================================") - log.error("\n\n" + str(e) + "\n") - log.error("=============================================") - sys.exit(1) - - -############################################################################### -# Allow caller to directly run this module (usually in development scenarios) - -if __name__ == "__main__": - args = Args() - download_test_resources(args)