-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlbmhdf5imagingextractor.py
293 lines (250 loc) · 10.8 KB
/
lbmhdf5imagingextractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
"""An imaging extractor for .mat files based on HDF5.
Classes
-------
LbmHdf5ImagingExtractor
An imaging extractor for HDF5.
"""
from pathlib import Path
from typing import Optional, Tuple
from warnings import warn
import numpy as np
import re
from roiextractors.extraction_tools import PathType, FloatType, ArrayType
from roiextractors.extraction_tools import (
get_video_shape,
write_to_h5_dataset_format,
)
from roiextractors.imagingextractor import ImagingExtractor
#from lazy_ops import DatasetView
from neuroconv.datainterfaces.ophys.baseimagingextractorinterface import BaseImagingExtractorInterface
from neuroconv.utils import FolderPathType
try:
import h5py
HAVE_H5 = True
except ImportError:
HAVE_H5 = False
class LbmHdf5ImagingExtractor(ImagingExtractor):
"""An imaging extractor for LBM-style HDF5-based mat files."""
extractor_name = "LbmHdf5Imaging"
installed = HAVE_H5 # check at class level if installed or not
is_writable = True
mode = "file"
installation_mesg = "To use the Hdf5 Extractor run:\n\n pip install h5py\n\n" # error message when not installed
def __init__(
self,
file_path: PathType,
mov_field: str = "Y",
sampling_frequency: FloatType = None,
start_time: FloatType = None,
metadata: dict = None,
channel_names: ArrayType = None,
):
"""Create an ImagingExtractor from an HDF5 file.
Parameters
----------
file_path : str or Path
Path to the HDF5 file.
mov_field : str, optional
Name of the dataset in the HDF5 file that contains the imaging data. The default is "mov".
sampling_frequency : float, optional
Sampling frequency of the video. The default is None.
start_time : float, optional
Start time of the video. The default is None.
metadata : dict, optional
Metadata dictionary. The default is None.
channel_names : array-like, optional
List of channel names. The default is None.
"""
ImagingExtractor.__init__(self)
self.filepath = Path(file_path)
self._sampling_frequency = sampling_frequency
self._mov_field = mov_field
if self.filepath.suffix not in [".h5", ".hdf5", ".mat"]:
warn("'file_path' file is not an .hdf5 or .h5 or .mat file")
self._channel_names = channel_names
self._file = h5py.File(file_path, "r")
if mov_field in self._file.keys():
self._video = self._file[self._mov_field]
if sampling_frequency is None:
assert "volumeRate" in self._file.keys(), (
"Sampling frequency is unavailable as a dataset attribute! "
"Please set the keyword argument 'sampling_frequency'"
)
self._sampling_frequency = self._file["volumeRate"][0,0]
else:
self._sampling_frequency = sampling_frequency
else:
raise Exception(f"{file_path} does not contain the 'Y' dataset")
if start_time is None:
if "start_time" in self._video.attrs.keys():
self._start_time = self._video.attrs["start_time"]
else:
self._start_time = start_time
if metadata is None:
if "pixelResolution" in self._file.keys():
self.metadata = {}
self.metadata["pixel_size_um"] = self._file["pixelResolution"][0,0]
else:
self.metadata = metadata
self._num_channels = 1
self._num_frames, self._num_cols, self._num_rows = self._video.shape # returns (time, col, row)
self._dtype = self._video[0].dtype
# self._video = self._video.lazy_transpose([2, 0, 1]) # should be: (samples, rows, columns)
if self._channel_names is not None:
assert len(self._channel_names) == self._num_channels, (
"'channel_names' length is different than number " "of channels"
)
else:
self._channel_names = [f"channel_{ch}" for ch in range(self._num_channels)]
self._kwargs = {
"file_path": str(Path(file_path).absolute()),
"mov_field": mov_field,
"sampling_frequency": sampling_frequency,
"channel_names": channel_names,
}
def __del__(self):
"""Close the HDF5 file."""
self._file.close()
def get_frames(self, frame_idxs: ArrayType, channel: Optional[int] = 0):
# Fancy indexing is non performant for h5.py with long frame lists
if frame_idxs is not None:
slice_start = np.min(frame_idxs)
slice_stop = min(np.max(frame_idxs) + 1, self.get_num_frames())
else:
slice_start = 0
slice_stop = self.get_num_frames()
# ix order in self._video: [t, c, r]
# ix order expected by DANDI in the final file: [t, r, c]
# however, the following happens in imaginextractordatachunkiterator.py, line 140:
# tranpose_axes = (0, 2, 1) if len(data.shape) == 3 else (0, 2, 1, 3)
# so it flips the c and r axis there, anyways, we don't have to do that here
frames = self._video[slice_start:slice_stop, :, :]
if isinstance(frame_idxs, int):
frames = frames.squeeze()
return frames
def get_video(self, start_frame=None, end_frame=None, channel: Optional[int] = 0) -> np.ndarray:
return self._video[start_frame:end_frame, :, :]
def get_image_size(self) -> Tuple[int, int]:
return (self._num_rows, self._num_cols)
def get_num_frames(self):
return self._num_frames
def get_sampling_frequency(self):
return self._sampling_frequency
def get_channel_names(self):
return self._channel_names
def get_num_channels(self):
return self._num_channels
def get_dtype(self):
return self._dtype
@staticmethod
def write_imaging(
imaging: ImagingExtractor,
save_path,
overwrite: bool = False,
mov_field="mov",
**kwargs,
):
"""Write an imaging extractor to an HDF5 file.
Parameters
----------
imaging : ImagingExtractor
The imaging extractor object to be saved.
save_path : str or Path
Path to save the file.
overwrite : bool, optional
If True, overwrite the file if it already exists. The default is False.
mov_field : str, optional
Name of the dataset in the HDF5 file that contains the imaging data. The default is "mov".
**kwargs : dict
Keyword arguments to be passed to the HDF5 file writer.
Raises
------
AssertionError
If the file extension is not .h5 or .hdf5.
FileExistsError
If the file already exists and overwrite is False.
"""
save_path = Path(save_path)
assert save_path.suffix in [
".h5",
".hdf5",
], "'save_path' file is not an .hdf5 or .h5 file"
if save_path.is_file():
if not overwrite:
raise FileExistsError("The specified path exists! Use overwrite=True to overwrite it.")
else:
save_path.unlink()
with h5py.File(save_path, "w") as f:
write_to_h5_dataset_format(imaging=imaging, dataset_path=mov_field, file_handle=f, **kwargs)
dset = f[mov_field]
dset.attrs["fr"] = imaging.get_sampling_frequency()
from roiextractors.extraction_tools import PathType, FloatType, ArrayType, DtypeType, get_package
from roiextractors.imagingextractor import ImagingExtractor
from roiextractors.volumetricimagingextractor import VolumetricImagingExtractor
class LbmHdf5MultiPlaneImagingExtractor(VolumetricImagingExtractor):
"""Specialized extractor for reading multi-plane (volumetric) .mat files produced by LBM pre-processing."""
extractor_name = "LbmHdf5MultiPlaneImaging"
is_writable = True
mode = "file"
def __init__(
self,
folder_path: PathType,
channel_name: Optional[str] = None,
) -> None:
self.folder_path = Path(folder_path)
self.mat_file_paths = list(self.folder_path.glob('*.mat'))
sort_ixs = [int(re.match('.*_(\d*)\.mat', fp.as_posix()).group(1)) for fp in self.mat_file_paths]
sort_ixs_sorting = np.argsort(sort_ixs)
self.mat_file_paths = [self.mat_file_paths[ix] for ix in sort_ixs_sorting]
self.metadata = None # extract_extra_metadata(file_path)
parsed_metadata = None # parse_metadata(self.metadata)
num_planes = len(self.mat_file_paths) # parsed_metadata["num_planes"]
channel_names = ['green'] # parsed_metadata["channel_names"]
if channel_name is None:
channel_name = channel_names[0]
imaging_extractors = []
for plane in range(num_planes):
imaging_extractor = LbmHdf5ImagingExtractor(
file_path=self.mat_file_paths[plane]
)
imaging_extractors.append(imaging_extractor)
super().__init__(imaging_extractors=imaging_extractors)
class LbmHdf5ImagingInterface(BaseImagingExtractorInterface):
ExtractorModuleName = "lbmhdf5imagingextractor"
ExtractorName = "LbmHdf5MultiPlaneImagingExtractor"
@classmethod
def get_source_schema(cls) -> dict:
source_schema = super().get_source_schema()
source_schema["properties"]["file_path"]["description"] = "Path to imported pre-processed .mat files."
return source_schema
def __init__(
self,
folder_path: FolderPathType,
#fallback_sampling_frequency: Optional[float] = None,
verbose: bool = True,
):
"""
DataInterface for reading .mat files that are generated by LBM preprocessing
Parameters
----------
file_path: str
Path to tiff file.
fallback_sampling_frequency: float, optional
The sampling frequency can usually be extracted from the scanimage metadata in
exif:ImageDescription:state.acq.frameRate. If not, use this.
"""
'''
self.image_metadata = extract_extra_metadata(file_path=file_path)
if "state.acq.frameRate" in self.image_metadata:
sampling_frequency = float(self.image_metadata["state.acq.frameRate"])
elif "SI.hRoiManager.scanFrameRate" in self.image_metadata:
sampling_frequency = float(self.image_metadata["SI.hRoiManager.scanFrameRate"])
else:
assert_msg = (
"sampling frequency not found in image metadata, "
"input the frequency using the argument `fallback_sampling_frequency`"
)
assert fallback_sampling_frequency is not None, assert_msg
'''
#sampling_frequency = fallback_sampling_frequency
super().__init__(folder_path=folder_path, verbose=verbose)