diff --git a/elf/io/ngff.py b/elf/io/ngff.py index 6a07721..0496945 100644 --- a/elf/io/ngff.py +++ b/elf/io/ngff.py @@ -1,3 +1,4 @@ +import numpy as np import skimage.transform # we use zarr here because z5py does not support nested directory for the zarr format import zarr @@ -21,41 +22,65 @@ def _validate_axes_names(ndim, axes_names): assert len(axes_names) == ndim val_axes = tuple(axes_names) if ndim == 2: - assert val_axes == ('y', 'x') + assert val_axes == ('y', 'x'), str(val_axes) elif ndim == 3: - assert val_axes in [('z', 'y', 'x'), ('c', 'y', 'x'), ('t', 'y', 'x')] + assert val_axes in [('z', 'y', 'x'), ('c', 'y', 'x'), ('t', 'y', 'x')], str(val_axes) elif ndim == 4: - assert val_axes in [('t', 'z', 'y', 'x'), ('c', 'z' 'y', 'x'), ('t', 'c', 'y', 'x')] + assert val_axes in [('t', 'z', 'y', 'x'), ('c', 'z', 'y', 'x'), ('t', 'c', 'y', 'x')], str(val_axes) else: - assert val_axes == ('t', 'c', 'z', 'y', 'x') + assert val_axes == ('t', 'c', 'z', 'y', 'x'), str(val_axes) -# TODO downscale only in spatial dimensions -def _downscale(data, downscaler, kwargs): - data = downscaler(data, **kwargs).astype(data.dtype) +def _downscale(data, axes_names, downscaler, kwargs): + is_spatial = [ax in ('z', 'y', 'x') for ax in axes_names] + # downscaling is easy if we only have spatial axes + if all(is_spatial): + data = downscaler(data, **kwargs).astype(data.dtype) + else: + spatial_start = [i for i, spatial in enumerate(is_spatial) if spatial][0] + assert spatial_start in (1, 2), str(spatial_start) + if spatial_start == 1: + downscaled_data = [] + for d in data: + ds = downscaler(d, **kwargs).astype(data.dtype) + downscaled_data.append(ds[None]) + data = np.concatenate(downscaled_data, axis=0) + else: + downscaled_data = [] + for time_slice in data: + downscaled_channel = [] + for channel_slice in time_slice: + ds = downscaler(channel_slice, **kwargs).astype(data.dtype) + downscaled_channel.append(ds[None]) + downscaled_channel = np.concatenate(downscaled_channel, axis=0) + downscaled_data.append(downscaled_channel[None]) + data = np.concatenate(downscaled_data, axis=0) return data -# TODO expose dimension separator as param def write_ome_zarr(data, path, axes_names, name, n_scales, key=None, chunks=None, downscaler=skimage.transform.rescale, - kwargs={"scale": (0.5, 0.5, 0.5), "order": 0, "preserve_range": True}): + kwargs={"scale": (0.5, 0.5, 0.5), "order": 0, "preserve_range": True}, + dimension_separator="/"): """Write numpy data to ome.zarr format. """ - + assert dimension_separator in (".", "/") assert 2 <= data.ndim <= 5 _validate_axes_names(data.ndim, axes_names) chunks = _get_chunks(axes_names) if chunks is None else chunks - store = zarr.NestedDirectoryStore(path, dimension_separator="/") + if dimension_separator == "/": + store = zarr.NestedDirectoryStore(path, dimension_separator=dimension_separator) + else: + store = zarr.DirectoryStore(path, dimension_separator=dimension_separator) with zarr.open(store, mode='a') as f: g = f if key is None else f.require_group(key) - g.create_dataset('s0', data=data, chunks=chunks, dimension_separator="/") + g.create_dataset('s0', data=data, chunks=chunks, dimension_separator=dimension_separator) for ii in range(1, n_scales): - data = _downscale(data, downscaler, kwargs) - g.create_dataset(f's{ii}', data=data, chunks=chunks, dimension_separator="/") + data = _downscale(data, axes_names, downscaler, kwargs) + g.create_dataset(f's{ii}', data=data, chunks=chunks, dimension_separator=dimension_separator) function_name = f'{downscaler.__module__}.{downscaler.__name__}' create_ngff_metadata(g, name, axes_names, type_=function_name, metadata=kwargs) diff --git a/example/io/check_ngff_examples.py b/example/io/check_ngff_examples.py new file mode 100644 index 0000000..ab6fd97 --- /dev/null +++ b/example/io/check_ngff_examples.py @@ -0,0 +1,37 @@ +import argparse +import os +from glob import glob + +import napari +import zarr + + +def check_example(ff): + fname = os.path.split(ff)[1] + print("Checking", fname) + if fname.startswith('flat'): + store = zarr.storage.DirectoryStore(ff) + else: + store = zarr.storage.NestedDirectoryStore(ff) + + with zarr.open(store, mode='r') as f: + data = f['s0'][:] + print(data.shape) + + with napari.gui_qt(): + v = napari.Viewer() + v.title = fname + v.add_image(data) + + +def check_examples(folder): + files = glob(os.path.join(folder, '*.ome.zarr')) + for ff in files: + check_example(ff) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('folder') + args = parser.parse_args() + check_examples(args.folder) diff --git a/example/io/ngff_examples.py b/example/io/ngff_examples.py index d0de838..37b20fc 100644 --- a/example/io/ngff_examples.py +++ b/example/io/ngff_examples.py @@ -17,7 +17,7 @@ def _kwargs_3d(): def _load_data(path, key, bb): if key is None: - data = imageio.imread(path) + data = imageio.volread(path) data = data[bb] else: with open_file(path, 'r') as f: @@ -25,14 +25,21 @@ def _load_data(path, key, bb): return data -def _create_example(in_path, folder, axes, key=None, bb=np.s_[:]): +def _create_example(in_path, folder, axes, key=None, bb=np.s_[:], dimension_separator="/"): + ax_name = ''.join(axes) + if dimension_separator == "/": + out_path = os.path.join(folder, f"{ax_name}.ome.zarr") + else: + out_path = os.path.join(folder, f"flat_{ax_name}.ome.zarr") + if os.path.exists(out_path): + print("Example data at", out_path, "is already present") + return + data = _load_data(in_path, key, bb) assert data.ndim == len(axes) - ax_name = ''.join(axes) - out_path = os.path.join(folder, f"{ax_name}.ome.zr") kwargs = _kwargs_3d() if axes[-3:] == ('z', 'y', 'x') else _kwargs_2d() write_ome_zarr(data, out_path, axes, ax_name, - n_scales=3, kwargs=kwargs) + n_scales=3, kwargs=kwargs, dimension_separator=dimension_separator) # # create ngff ome.zarr example data @@ -42,30 +49,131 @@ def _create_example(in_path, folder, axes, key=None, bb=np.s_[:]): # axes: yx def create_2d_example(folder): + # yx: covid htm data with only nucleus channel in_path = os.path.join("/g/kreshuk/data/covid/covid-data-vibor/20200405_test_images", "WellC01_PointC01_0000_ChannelDAPI,WF_GFP,TRITC,WF_Cy5_Seq0216.tiff") _create_example(in_path, folder, axes=("y", "x"), bb=np.s_[0, :, :]) -# add linked labels for the zyx example +def _make_t_volume(path, timepoints, out_path, scale=2): + if os.path.exists(out_path): + return + data = [] + for tp in timepoints: + key = f'setup0/timepoint{tp}/s{scale}' + with open_file(path, 'r') as f: + d = f[key][:] + data.append(d[None]) + data = np.concatenate(data, axis=0) + with open_file(out_path, 'w') as f: + f.create_dataset('data', data=data, chunks=(1, 64, 64, 64)) + + +# TODO add linked labels for the zyx example # axes: zyx, cyx, tyx def create_3d_examples(folder): - pass - -# axes: tcyx, tzyx, czyx + # zyx: covid em data + labels + raw_path = os.path.join('/g/kreshuk/pape/Work/mobie/covid-em-datasets/data', + 'Covid19-S4-Area2/images/local/sbem-6dpf-1-whole-raw.n5') + raw_key = 'setup0/timepoint0/s3' + _create_example(raw_path, folder, axes=("z", "y", "x"), key=raw_key) + # for linked labels + # seg_path = os.path.join('/g/kreshuk/pape/Work/mobie/covid-em-datasets/data', + # 'Covid19-S4-Area2/images/local/s4_area2_segmentation.n5') + # seg_key = 'setup0/timepoint0/s3' + + # cyx: covid htm data with more channels + in_path = os.path.join("/g/kreshuk/data/covid/covid-data-vibor/20200405_test_images", + "WellC01_PointC01_0000_ChannelDAPI,WF_GFP,TRITC,WF_Cy5_Seq0216.tiff") + _create_example(in_path, folder, axes=("c", "y", "x"), bb=np.s_[:3, :, :]) + + # tyx: middle slice from arabidopsis dataset + timepoints = [32, 33, 34] + scale = 2 + raw_path = os.path.join('/g/kreshuk/pape/Work/mobie/arabidopsis-root-lm-datasets/data', + 'arabidopsis-root/images/local/lm-membranes.n5') + tmp_path = './tp_data.h5' + _make_t_volume(raw_path, timepoints, tmp_path, scale=scale) + _create_example(tmp_path, folder, axes=("t", "y", "x"), bb=np.s_[:, 200, :, :], + key='data') + + +def _make_tc_volume(path1, path2, timepoints, out_path, scale=2): + if os.path.exists(out_path): + return + data = [] + for tp in timepoints: + key = f'setup0/timepoint{tp}/s{scale}' + with open_file(path1, 'r') as f: + d1 = f[key][:] + with open_file(path2, 'r') as f: + d2 = f[key][:] + d = np.concatenate([d1[None], d2[None]], axis=0) + data.append(d[None]) + data = np.concatenate(data, axis=0) + with open_file(out_path, 'w') as f: + f.create_dataset('data', data=data, chunks=(1, 1, 64, 64, 64)) + + +# axes: tzyx, tcyx, czyx def create_4d_examples(folder): - pass + + # tzyx: arabidopsis dataset (boundaries) + tmp_path = './tp_data.h5' + _create_example(tmp_path, folder, key="data", axes=("t", "z", "y", "x"), bb=np.s_[:]) + + # tcyx: arabidopsis boundaries and nuclei, middle slice + timepoints = [32, 33, 34] + scale = 2 + path1 = '/g/kreshuk/pape/Work/mobie/arabidopsis-root-lm-datasets/data/arabidopsis-root/images/local/lm-membranes.n5' + path2 = '/g/kreshuk/pape/Work/mobie/arabidopsis-root-lm-datasets/data/arabidopsis-root/images/local/lm-nuclei.n5' + tmp_path = './tp_channel_data.h5' + _make_tc_volume(path1, path2, timepoints, tmp_path, scale=scale) + _create_example(tmp_path, folder, key="data", axes=("t", "c", "y", "x"), bb=np.s_[:, :, 200, :, :]) + + # czyx: arabidopsis dataset (boundaries + nuclei), single timepoint + _create_example(tmp_path, folder, key="data", axes=("c", "z", "y", "x"), bb=np.s_[0, :, :, :, :]) # axes: tczyx def create_5d_example(folder): - pass + # tczyx: full arabidopsis dataset + tmp_path = './tp_channel_data.h5' + _create_example(tmp_path, folder, key="data", axes=("t", "c", "z", "y", "x"), bb=np.s_[:, :, :, :, :]) # using '.' dimension separator def create_flat_example(folder): - pass + # yx: covid htm data with only nucleus channel + in_path = os.path.join("/g/kreshuk/data/covid/covid-data-vibor/20200405_test_images", + "WellC01_PointC01_0000_ChannelDAPI,WF_GFP,TRITC,WF_Cy5_Seq0216.tiff") + _create_example(in_path, folder, axes=("y", "x"), bb=np.s_[0, :, :], dimension_separator=".") + + +def copy_readme(output_folder, version): + readme = f""" +# Example data for OME-ZARR NGFF v{version} + +This folder contains the following example ome.zarr files +- yx: 2d image, data is the nucleus channel of an image from [1] +- zyx: 3d volume, data is an EM volume from [2] +- cyx: 2d image with channels, image with 3 channels from [1] +- tyx: timeseries of 2d images, timeseries of central slice of membrane channel from [3] +- tzyx: timeseries of 3d images, timeseries of membrane channel volume from [3] +- tcyx: timeseries of images with channel, timeseries of central slice of membrane + nucleus channel from [3] +- czyx: 3d volume with channel, single timepoint of membrane and nucleus channel from [3] +- tczyx: timeseries of 3d volumes with channel, full data from [3] +- flat_yx: same as yx, but using flat chunk storage (dimension_separator=".") instead of nested storage + +Publications: +[1] https://onlinelibrary.wiley.com/doi/full/10.1002/bies.202000257 +[2] https://www.sciencedirect.com/science/article/pii/S193131282030620X +[3] https://elifesciences.org/articles/57613 + """ + out_path = os.path.join(output_folder, 'Readme.md') + with open(out_path, 'w') as f: + f.write(readme) if __name__ == '__main__': @@ -82,4 +190,4 @@ def create_flat_example(folder): create_4d_examples(output_folder) create_5d_example(output_folder) create_flat_example(output_folder) - # TODO copy README + copy_readme(output_folder, args.version)