Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add real world test suite #27

Merged
merged 13 commits into from
Oct 24, 2022
4 changes: 2 additions & 2 deletions activestorage/active.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from zarr.indexing import (
OrthogonalIndexer,
)
from activestorage.storage import decode_chunk
from activestorage.storage import reduce_chunk
from activestorage import netcdf_to_zarr as nz


Expand Down Expand Up @@ -249,7 +249,7 @@ def _process_chunk(self, fsref, chunk_coords, chunk_selection, out,
coord = '.'.join([str(c) for c in chunk_coords])
key = f"{self.ncvar}/{coord}"
rfile, offset, size = tuple(fsref[key])
tmp = decode_chunk(rfile, offset, size, compressor, filters, missing,
tmp = reduce_chunk(rfile, offset, size, compressor, filters, missing,
self.zds._dtype, self.zds._chunks, self.zds._order,
chunk_selection, method=self.method)

Expand Down
136 changes: 120 additions & 16 deletions activestorage/dummy_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from os import lseek
from netCDF4 import Dataset
import numpy as np
import zarr
Expand All @@ -18,44 +19,147 @@ def _make_data(n=10):
return dd, data


def make_test_ncdata(filename='test_bizarre.nc', chunksize=(3,3,1), compression=None, n=10):
def make_missing_ncdata(filename='test_missing.nc', chunksize=(3,3,1), n=10):
"""
Make a test dataset which is three dimensional with indices and values that
aid in testing data extraction. If compression is required, it can be passed in via keyword
and is applied to all variables
Makes a test dataset based on the default vanilla dataset, but which includes
some missing values
"""
return make_ncdata(filename, chunksize, n, compression=None, missing=-999.)

def make_fillvalue_ncdata(filename='test_fillvalue.nc', chunksize=(3,3,1), n=10):
"""
Makes a test dataset based on the default vanilla dataset, but which includes
some non-default fillvalues.
"""
return make_ncdata(filename, chunksize, n, compression=None, fillvalue=-999.)

def make_validmin_ncdata(filename='test_validmin.nc', chunksize=(3,3,1), n=10):
"""
Makes a test dataset based on the default vanilla dataset, but which includes
missing values below min.
"""
return make_ncdata(filename, chunksize, n, compression=None, valid_min=-1.)

def make_validmax_ncdata(filename='test_validmax.nc', chunksize=(3,3,1), n=10):
"""
Makes a test dataset based on the default vanilla dataset, but which includes
missing values above max
"""
return make_ncdata(filename, chunksize, n, compression=None, valid_max=1.2*n**3)


def make_validrange_ncdata(filename='test_validrange.nc', chunksize=(3,3,1), n=10):
"""
Makes a test dataset based on the default vanilla dataset, but which includes
missing values outside range
"""
return make_ncdata(filename, chunksize, n, compression=None, valid_range=[-1.0,1.2*n**3])

def make_vanilla_ncdata(filename='test_vanilla.nc', chunksize=(3,3,1), n=10):
"""
Make a vanilla test dataset which is three dimensional with indices and values that
aid in testing data extraction.
"""
r = make_ncdata(filename, chunksize, n, None, False)
return

def make_ncdata(filename, chunksize, n, compression=None,
missing=None,
fillvalue=None,
valid_range=None,
valid_min=None,
valid_max=None):
"""
If compression is required, it can be passed in via keyword
and is applied to all variables.

Note that if compression is not None, or any of the valid
data options (missing etc) are selected, then four values
(for each option) are modified and made invalid.

For the purposes of test data, bounds (valid_min, range etc)
need to be non-zero, although that wont hold in real life.
"""

def make_holes(var, indices, attribute, value, dummy):
if value is not None:
assert type(value) == float
setattr(var,attribute,value)
for i,j,k in indices:
var[i,j,k] = dummy

assert n > 4

ds = Dataset(filename, 'w', format="NETCDF4")
dd, data = _make_data(n)

xdim = ds.createDimension("xdim",n)
ydim = ds.createDimension("ydim",n)
zdim = ds.createDimension("zdim",n)
x = ds.createVariable("x","i4",("xdim",), compression=compression)
y = ds.createVariable("y","i4",("ydim",), compression=compression)
z = ds.createVariable("z","i4",("zdim",), compression=compression)

x = ds.createVariable("x","i4",("xdim",), fill_value=fillvalue, compression=compression)
y = ds.createVariable("y","i4",("ydim",), fill_value=fillvalue, compression=compression)
z = ds.createVariable("z","i4",("zdim",), fill_value=fillvalue, compression=compression)

for a,s in zip([x,y,z],[1,n,n*n]):
a[:] = dd*s

dvar = ds.createVariable("data","f8",("xdim","ydim","zdim"), chunksizes=chunksize, compression=compression)
dvar[:] = data

nm1,nm2 = n-1, n-2
# we use a diffferent set of indices for all the values to be masked
mindices, findices, vrindices, vm1indices, vm2indices = None, None, None, None, None
if missing:
# we use the deprecated missing_value option
mindices = [(1,1,1),(n/2,1,1),(1,nm1,1),(nm1,1,n/2)]
dvar = make_holes(dvar, mindices, 'missing_value', missing, missing)

if fillvalue:
# note we use a different set of indices for
findices = [(nm1,nm1,nm1),(n/2,n/2,1),(1,1,n/2),(nm1,nm1,n/2)]
dvar = make_holes(dvar, findices, '_FillValue', None, fillvalue)

if valid_range and valid_min or valid_range and valid_max:
raise ValueError("Can't mix and match validity options")

if valid_min:
if valid_min == 0.0:
raise ValueError('Dummy data needs a non-zero valid min')
vm1indices = [(2,2,2),(n/2,2,2),(2,nm1,2),(nm1,2,nm1/2)]
dvar = make_holes(dvar, vm1indices, 'valid_min', valid_min, valid_min-abs(0.1*valid_min))

if valid_max:
if valid_min == 0.0:
raise ValueError('Dummy data needs a non-zero valid max')
vm2indices = [(2,nm1,2),(2,2,nm1),(nm2,nm2,nm1),(nm1,nm2,n/2)]
dvar = make_holes(dvar, vm2indices, 'valid_max', valid_max, valid_max*10)

if valid_range:
assert len(valid_range) == 2 and type(valid_range[0]) == float
if valid_range[0] == 0.0 or valid_range[1] == 0.0:
raise ValueError('Dummy data needs non-zero range bounds')
vrindices = [(2,nm1,nm2),(2,nm2,nm1),(nm1,nm2,nm1),(n/2,n/2+1,n/2)]
dvar.valid_range=valid_range
for i,j,k in vrindices[0:2]:
dvar[i,j,k]= valid_range[0]-abs(0.1*valid_range[0])
for i,j,k in vrindices[2:]:
dvar[i,j,k] = valid_range[1]*10

ds.close()

ds = Dataset(filename,'r')
var = ds.variables['data']
print(f'\nCreated file "{filename}" with a variable called "data" with shape {var.shape} and chunking, compression {var.chunking()},{compression}\n')


def make_testzarr_variable_file(filename='test.zarr'):
"""
Make a test variable and write to a zarr file.
#FIXME: Not quite sure how to get the chunking right yet
"""
dd, data = _make_data()
zarr.save(filename, x=dd, y=dd*10, z=dd*100, data=data)
return mindices, findices, vrindices, vm1indices, vm2indices


if __name__=="__main__":
make_test_ncdata()
make_vanilla_ncdata()
make_validmin_ncdata()
make_validmax_ncdata()
make_missing_ncdata()
make_fillvalue_ncdata()
make_validrange_ncdata()

2 changes: 1 addition & 1 deletion activestorage/storage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from ast import Not
from numcodecs.compat import ensure_ndarray

def decode_chunk(rfile, offset, size, compression, filters, missing, dtype, shape, order, chunk_selection, method=None):
def reduce_chunk(rfile, offset, size, compression, filters, missing, dtype, shape, order, chunk_selection, method=None):
""" We do our own read of chunks and decoding etc

rfile - the actual file with the data
Expand Down
4 changes: 2 additions & 2 deletions old_code/test_harness-learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import unittest
import os
from activestorage.active_tools import make_an_array_instance_active
from activestorage.dummy_data import make_test_ncdata
from activestorage.dummy_data import make_vanilla_ncdata
from netCDF4 import Dataset
import numpy as np
from numcodecs.compat import ensure_ndarray
Expand Down Expand Up @@ -217,7 +217,7 @@ def setUp(self):
"""
self.testfile = 'test_bizarre.nc'
if not os.path.exists(self.testfile):
make_test_ncdata(filename=self.testfile)
make_vanilla_ncdata(filename=self.testfile)

def testRead0(self):
"""
Expand Down
Loading