Skip to content

Commit 34a684a

Browse files
author
Bryan Lawrence
committed
Removing the pseudo chunking stuff that snuck into the last commit
1 parent 59e8667 commit 34a684a

File tree

1 file changed

+4
-32
lines changed

1 file changed

+4
-32
lines changed

pyfive/h5d.py

+4-32
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,11 @@ class DatasetID:
2323
instance, it is completely independent of the parent file, and it can be used
2424
efficiently in distributed threads without thread contention to the b-tree etc.
2525
"""
26-
def __init__(self, dataobject, pseudo_chunking_size_MB=4):
26+
def __init__(self, dataobject):
2727
"""
2828
Instantiated with the pyfive datasetdataobject, we copy and cache everything
2929
we want so that the only file operations are now data accesses.
3030
31-
if pseudo_chunking_size_MB is set to a value greater than zero, and
32-
if the storage is not local posix (and hence np.mmap is not available) then
33-
when accessing contiguous variables, we attempt to find a suitable
34-
chunk shape to approximate that volume and read the contigous variable
35-
as if were chunked. This is to facilitate lazy loading of partial data
36-
from contiguous storage.
3731
"""
3832

3933
self._order = dataobject.order
@@ -42,8 +36,7 @@ def __init__(self, dataobject, pseudo_chunking_size_MB=4):
4236
try:
4337
dataobject.fh.fileno()
4438
self._filename = dataobject.fh.name
45-
self.avoid_mmap = False
46-
self.pseudo_chunking_size = 0
39+
self.avoid_mmap = False
4740
except (AttributeError, OSError):
4841
try:
4942
# maybe this is an S3File instance?
@@ -53,7 +46,6 @@ def __init__(self, dataobject, pseudo_chunking_size_MB=4):
5346
# failing that, maybe a memory file, return as None
5447
self._filename = getattr(self._fh,'full_name','None')
5548
self.avoid_mmap = True
56-
self.pseudo_chunking_size = pseudo_chunking_size_MB*1024*1024
5749
self.filter_pipeline = dataobject.filter_pipeline
5850
self.shape = dataobject.shape
5951
self.rank = len(self.shape)
@@ -126,9 +118,6 @@ def read_direct_chunk(self, chunk_position, **kwargs):
126118
def get_data(self, args):
127119
""" Called by the dataset getitem method """
128120

129-
130-
131-
132121
match self.layout_class:
133122
case 0: #compact storage
134123
raise NotImplementedError("Compact Storage")
@@ -262,31 +251,14 @@ def _get_contiguous_data(self, args):
262251

263252
def _get_direct_from_contiguous(self, args=None):
264253
"""
265-
If pseudo_chunking_size is set, we attempt to read the contiguous data in chunks
266-
otherwise we have to read the entire array. This is a fallback situation if we
267-
can't use a memory map which would otherwise be lazy. This will normally be when
268-
we don't have a true Posix file. We should never end up here with compressed
269-
data.
254+
This is a fallback situation if we can't use a memory map which would otherwise be lazy.
255+
At the moment it is very unlazy.
270256
"""
271-
def __getstride():
272-
""" Determine an appropriate chunk and stride for a given pseudo chunk size """
273-
stride = 1
274-
chunk_shape = np.ones(self.rank, dtype=int)
275-
for i in range(self.rank):
276-
stride *= self.shape[i]
277-
chunk_shape = box[:i] = self.shape[:i]
278-
if stride*self.dtype.itemsize > self.pseudo_chunking_size:
279-
stride //= self.shape[i]
280-
chunk_shape = box[:i] = self.shape[:i-1]
281-
return chunk_shape, stride
282257

283258
itemsize = np.dtype(self.dtype).itemsize
284259
# need to impose type in case self.shape is () in which case numpy would return a float
285260
num_elements = np.prod(self.shape, dtype=int)
286261
num_bytes = num_elements*itemsize
287-
288-
if self.pseudo_chunking_size:
289-
stride = __getstride()
290262

291263
# we need it all, let's get it all (i.e. this really does read the lot)
292264
self._fh.seek(self.data_offset)

0 commit comments

Comments
 (0)