Skip to content

Commit

Permalink
ENH: Use pooch for test data retrieval
Browse files Browse the repository at this point in the history
Currently, fetching from the IPFS gateway is not reliable unless it is
local, and it is difficult to have a local gateway unless you are on
Linux (including the CI use case).

Possibly revisit following: thewtex/pytest-web3-data#5
  • Loading branch information
thewtex committed Sep 18, 2022
1 parent 871ad60 commit 07ca203
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 64 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
dist/
examples/.ipynb_checkpoints
test/data.tar.gz
test/data
examples/monkey_brain.nrrd
examples/cthead1.zarr/
Expand Down
27 changes: 10 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ To run the test suite:
git clone https://github.com/spatial-image/multiscale-spatial-image
cd multiscale-spatial-image
pip install -e ".[test]"
# We recommend running IPFS, e.g. via https://docs.ipfs.io/install/ipfs-desktop/
pytest
# Notebook tests
pytest --nbmake --nbmake-timeout=3000 examples/*ipynb
Expand All @@ -124,17 +123,9 @@ dataset_name = "cthead1"
image = input_images[dataset_name]
baseline_name = "2_4/XARRAY_COARSEN"
multiscale = to_multiscale(image, [2, 4], method=Methods.XARRAY_COARSEN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
```

First copy the current testing data to a staging directory, e.g.

```console
cp -a ./test/data/bafy* ./test/data/staging
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)
```

And update the `web3_data_dir` path in the *pytest.ini* file from `test/data/bafy*` to `test/data/staging`.

Add a `store_new_image` call in your test block:

```py
Expand All @@ -150,16 +141,18 @@ verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)

Run the tests to generate the output. Remove the `store_new_image` call.

Once the new test data is present locally, upload the result to IPFS:
Then, create a tarball of the current testing data

```sh
npm install -g @web3-storage/w3
# Get an upload token from https://web3.storage
w3 token
w3 put ./test/data/staging --no-wrap --name multiscale-spatial-image-topic-name --hidden
```console
cd test/data
tar cvf ../data.tar *
gzip -9 ../data.tar
python3 -c 'import pooch; print(pooch.file_hash("../data.tar.gz"))'
```

The update the resulting root [Content Identifier (CID)](https://proto.school/anatomy-of-a-cid/01)in the *pytest.ini* `web3_data_dir` path.
Update the `test_data_sha256` variable in the *test/test_multiscale_spatial_image.py* file.
Upload the data to [web3.storage](https://web3.storage).
nd update the `test_data_ipfs_cid` [Content Identifier (CID)](https://proto.school/anatomy-of-a-cid/01) variable, which is available in the web3.storage web page interface.


[spatial-image]: https://github.com/spatial-image/spatial-image
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ test = [
"itk-filtering>=5.3rc4",
"dask_image",
"jsonschema",
"pooch",
"pytest",
"pytest-mypy",
"pytest-web3-data",
"fsspec",
"ipfsspec",
"urllib3",
Expand Down
2 changes: 0 additions & 2 deletions pytest.ini

This file was deleted.

108 changes: 64 additions & 44 deletions test/test_multiscale_spatial_image.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,52 @@
from platform import processor
import pytest

from pathlib import Path

import xarray as xr
from zarr.storage import DirectoryStore
from datatree import open_datatree
from pathlib import Path
import pooch

test_data_ipfs_cid = 'bafybeidr5be65a67njdaiw4cm27gjqpcmxlnhor7wak5hgm3jbhcnikt4y'
test_data_sha256 = '95c5836b49c0f2a29b48a3865b3e5e23858d555c8dceebcd43f129052ee4525d'


test_dir = Path(__file__).resolve().parent
extract_dir = "data"
test_data_dir = test_dir / extract_dir
test_data = pooch.create(path=test_dir,
base_url=f"https://{test_data_ipfs_cid}.ipfs.w3s.link/ipfs/{test_data_ipfs_cid}/",
registry= {
"data.tar.gz": f"sha256:{test_data_sha256}",
},
retry_if_failed=5
)

from multiscale_spatial_image import Methods, to_multiscale, itk_image_to_multiscale

@pytest.fixture
def input_images(web3_data):
def input_images():
untar = pooch.Untar(extract_dir=extract_dir)
test_data.fetch("data.tar.gz", processor=untar)
result = {}

store = DirectoryStore(
web3_data / "input" / "cthead1.zarr", dimension_separator="/"
test_data_dir / "input" / "cthead1.zarr", dimension_separator="/"
)
image_ds = xr.open_zarr(store)
image_da = image_ds.cthead1
result["cthead1"] = image_da

store = DirectoryStore(
web3_data / "input" / "small_head.zarr", dimension_separator="/"
test_data_dir / "input" / "small_head.zarr", dimension_separator="/"
)
image_ds = xr.open_zarr(store)
image_da = image_ds.small_head
result["small_head"] = image_da

store = DirectoryStore(
web3_data / "input" / "2th_cthead1.zarr",
test_data_dir / "input" / "2th_cthead1.zarr",
)
image_ds = xr.open_zarr(store)
image_da = image_ds['2th_cthead1']
Expand All @@ -35,9 +55,9 @@ def input_images(web3_data):
return result


def verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale):
def verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale):
store = DirectoryStore(
web3_data / f"baseline/{dataset_name}/{baseline_name}", dimension_separator="/"
test_data_dir / f"baseline/{dataset_name}/{baseline_name}", dimension_separator="/"
)
dt = open_datatree(store, engine="zarr", mode="r")
xr.testing.assert_equal(dt.ds, multiscale.ds)
Expand All @@ -55,115 +75,115 @@ def test_base_scale(input_images):
multiscale = to_multiscale(image, [])
xr.testing.assert_equal(image, multiscale['scale0'].ds["small_head"])

def store_new_image(web3_data, multiscale_image, dataset_name, baseline_name):
def store_new_image(test_data_dir, multiscale_image, dataset_name, baseline_name):
'''Helper method for writing output results to disk
for later upload as test baseline'''
store = DirectoryStore(
web3_data / f"baseline/{dataset_name}/{baseline_name}", dimension_separator="/",
test_data_dir / f"baseline/{dataset_name}/{baseline_name}", dimension_separator="/",
)
multiscale_image.to_zarr(store)

def test_isotropic_scale_factors(web3_data, input_images):
def test_isotropic_scale_factors(input_images):

dataset_name = "cthead1"
image = input_images[dataset_name]
baseline_name = "2_4/XARRAY_COARSEN"
multiscale = to_multiscale(image, [2, 4], method=Methods.XARRAY_COARSEN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "cthead1"
image = input_images[dataset_name]
baseline_name = "2_3/XARRAY_COARSEN"
multiscale = to_multiscale(image, [2, 3], method=Methods.XARRAY_COARSEN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "small_head"
image = input_images[dataset_name]
baseline_name = "2_3_4/XARRAY_COARSEN"
multiscale = to_multiscale(image, [2, 3, 4], method=Methods.XARRAY_COARSEN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "cthead1"
image = input_images[dataset_name]
multiscale = to_multiscale(image, [2, 4], method=Methods.ITK_BIN_SHRINK)
baseline_name = "2_4/ITK_BIN_SHRINK"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "cthead1"
image = input_images[dataset_name]
multiscale = to_multiscale(image, [2, 3], method=Methods.ITK_BIN_SHRINK)
baseline_name = "2_3/ITK_BIN_SHRINK"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "small_head"
image = input_images[dataset_name]
multiscale = to_multiscale(image, [2, 3, 4], method=Methods.ITK_BIN_SHRINK)
baseline_name = "2_3_4/ITK_BIN_SHRINK"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)


def test_gaussian_isotropic_scale_factors(web3_data, input_images):
def test_gaussian_isotropic_scale_factors(input_images):
dataset_name = "cthead1"
image = input_images[dataset_name]
baseline_name = "2_4/ITK_GAUSSIAN"
multiscale = to_multiscale(image, [2, 4], method=Methods.ITK_GAUSSIAN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "cthead1"
image = input_images[dataset_name]
baseline_name = "2_3/ITK_GAUSSIAN"
multiscale = to_multiscale(image, [2, 3], method=Methods.ITK_GAUSSIAN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "small_head"
image = input_images[dataset_name]
baseline_name = "2_3_4/ITK_GAUSSIAN"
multiscale = to_multiscale(image, [2, 3, 4], method=Methods.ITK_GAUSSIAN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "cthead1"
image = input_images[dataset_name]
baseline_name = "2_4/DASK_IMAGE_GAUSSIAN"
multiscale = to_multiscale(image, [2, 4], method=Methods.DASK_IMAGE_GAUSSIAN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "cthead1"
image = input_images[dataset_name]
baseline_name = "2_3/DASK_IMAGE_GAUSSIAN"
multiscale = to_multiscale(image, [2, 3], method=Methods.DASK_IMAGE_GAUSSIAN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "small_head"
image = input_images[dataset_name]
baseline_name = "2_3_4/DASK_IMAGE_GAUSSIAN"
multiscale = to_multiscale(image, [2, 3, 4], method=Methods.DASK_IMAGE_GAUSSIAN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)


def test_label_gaussian_isotropic_scale_factors(web3_data, input_images):
def test_label_gaussian_isotropic_scale_factors(input_images):
dataset_name = "2th_cthead1"
image = input_images[dataset_name]
baseline_name = "2_4/ITK_LABEL_GAUSSIAN"
multiscale = to_multiscale(image, [2, 4], method=Methods.ITK_LABEL_GAUSSIAN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "2th_cthead1"
image = input_images[dataset_name]
baseline_name = "2_3/ITK_LABEL_GAUSSIAN"
multiscale = to_multiscale(image, [2, 3], method=Methods.ITK_LABEL_GAUSSIAN)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)


def test_anisotropic_scale_factors(web3_data, input_images):
def test_anisotropic_scale_factors(input_images):
dataset_name = "cthead1"
image = input_images[dataset_name]
scale_factors = [{"x": 2, "y": 4}, {"x": 1, "y": 2}]
multiscale = to_multiscale(image, scale_factors, method=Methods.XARRAY_COARSEN)
baseline_name = "x2y4_x1y2/XARRAY_COARSEN"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)
# Test default method: Methods.XARRAY_COARSEN
multiscale = to_multiscale(image, scale_factors)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "small_head"
image = input_images[dataset_name]
Expand All @@ -174,14 +194,14 @@ def test_anisotropic_scale_factors(web3_data, input_images):
]
multiscale = to_multiscale(image, scale_factors, method=Methods.XARRAY_COARSEN)
baseline_name = "x3y2z4_x2y2z2_x1y2z1/XARRAY_COARSEN"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "cthead1"
image = input_images[dataset_name]
scale_factors = [{"x": 2, "y": 4}, {"x": 1, "y": 2}]
multiscale = to_multiscale(image, scale_factors, method=Methods.ITK_BIN_SHRINK)
baseline_name = ("x2y4_x1y2/ITK_BIN_SHRINK",)
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "small_head"
image = input_images[dataset_name]
Expand All @@ -192,16 +212,16 @@ def test_anisotropic_scale_factors(web3_data, input_images):
]
multiscale = to_multiscale(image, scale_factors, method=Methods.ITK_BIN_SHRINK)
baseline_name = "x3y2z4_x2y2z2_x1y2z1/ITK_BIN_SHRINK"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)


def test_gaussian_anisotropic_scale_factors(web3_data, input_images):
def test_gaussian_anisotropic_scale_factors(input_images):
dataset_name = "cthead1"
image = input_images[dataset_name]
scale_factors = [{"x": 2, "y": 4}, {"x": 1, "y": 2}]
multiscale = to_multiscale(image, scale_factors, method=Methods.ITK_GAUSSIAN)
baseline_name = "x2y4_x1y2/ITK_GAUSSIAN"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "small_head"
image = input_images[dataset_name]
Expand All @@ -212,14 +232,14 @@ def test_gaussian_anisotropic_scale_factors(web3_data, input_images):
]
multiscale = to_multiscale(image, scale_factors, method=Methods.ITK_GAUSSIAN)
baseline_name = "x3y2z4_x2y2z2_x1y2z1/ITK_GAUSSIAN"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "cthead1"
image = input_images[dataset_name]
scale_factors = [{"x": 2, "y": 4}, {"x": 1, "y": 2}]
multiscale = to_multiscale(image, scale_factors, method=Methods.DASK_IMAGE_GAUSSIAN)
baseline_name = "x2y4_x1y2/DASK_IMAGE_GAUSSIAN"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

dataset_name = "small_head"
image = input_images[dataset_name]
Expand All @@ -230,19 +250,19 @@ def test_gaussian_anisotropic_scale_factors(web3_data, input_images):
]
multiscale = to_multiscale(image, scale_factors, method=Methods.DASK_IMAGE_GAUSSIAN)
baseline_name = "x3y2z4_x2y2z2_x1y2z1/DASK_IMAGE_GAUSSIAN"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)


def test_label_gaussian_anisotropic_scale_factors(web3_data, input_images):
def test_label_gaussian_anisotropic_scale_factors(input_images):
dataset_name = "2th_cthead1"
image = input_images[dataset_name]
scale_factors = [{"x": 2, "y": 4}, {"x": 1, "y": 2}]
multiscale = to_multiscale(image, scale_factors, method=Methods.ITK_LABEL_GAUSSIAN)
baseline_name = "x2y4_x1y2/ITK_LABEL_GAUSSIAN"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)


def test_from_itk(web3_data, input_images):
def test_from_itk(input_images):
import itk
import numpy as np

Expand All @@ -252,7 +272,7 @@ def test_from_itk(web3_data, input_images):
scale_factors=[4,2]
multiscale = itk_image_to_multiscale(image, scale_factors)
baseline_name = "4_2/from_itk"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

# Test 2D with nonunit metadata
dataset_name = "cthead1"
Expand All @@ -267,7 +287,7 @@ def test_from_itk(web3_data, input_images):
scale_factors=[4,2]
multiscale = itk_image_to_multiscale(image, scale_factors=scale_factors, anatomical_axes=False, axis_units=axis_units, name=name)
baseline_name = "4_2/from_itk_nonunit_metadata"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

# Expect error for 2D image with anatomical axes
try:
Expand All @@ -282,7 +302,7 @@ def test_from_itk(web3_data, input_images):
scale_factors=[4,2]
multiscale = itk_image_to_multiscale(image, scale_factors)
baseline_name = "4_2/from_itk"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

# Test 3D with additional metadata
dataset_name = "small_head"
Expand All @@ -295,5 +315,5 @@ def test_from_itk(web3_data, input_images):
scale_factors=[4,2]
multiscale = itk_image_to_multiscale(image, scale_factors=scale_factors, anatomical_axes=True, axis_units=axis_units, name=name)
baseline_name = "4_2/from_itk_anatomical"
verify_against_baseline(web3_data, dataset_name, baseline_name, multiscale)
verify_against_baseline(test_data_dir, dataset_name, baseline_name, multiscale)

0 comments on commit 07ca203

Please sign in to comment.