Skip to content

Commit 60c3c80

Browse files
authored
Merge pull request #51 from tlnagy/tn/add-mmapping
Add support for memory mapping Tiff files
2 parents cd0bb33 + ff4e265 commit 60c3c80

File tree

9 files changed

+164
-24
lines changed

9 files changed

+164
-24
lines changed

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "OMETIFF"
22
uuid = "2d0ec36b-e807-5756-994b-45af29551fcf"
33
authors = ["Tamas Nagy <[email protected]>"]
4-
version = "0.3.2"
4+
version = "0.3.3"
55

66
[deps]
77
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ labeled axes provided by [AxisArrays.jl](https://github.com/JuliaImages/AxisArra
1111
## Features
1212

1313
- Can open a wide-range of OMETIFF files with a special focus on [correctness](https://github.com/tlnagy/OMETIFF.jl/blob/master/test/runtests.jl)
14+
- Supports memory-mapping to open large TIFF files quickly even on
15+
memory-constrained machines
1416
- Spatial and temporal axes are annotated with units if available (like μm, s, etc)
1517
- Channel and position axes use their original names
1618
- Elapsed times are extracted and returned using the same labeled axes

docs/src/index.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ labeled axes provided by [AxisArrays.jl](https://github.com/JuliaImages/AxisArra
66

77
## Features
88

9-
- Can open a wide-range of OMETIFF files with a special focus on [correctness](https://github.com/tlnagy/OMETIFF.jl/blob/master/test/runtests.jl)
9+
- Can open a wide-range of OMETIFF files with a special focus on
10+
[correctness](https://github.com/tlnagy/OMETIFF.jl/blob/master/test/runtests.jl)
11+
- Supports memory-mapping to open large TIFF files quickly even on
12+
memory-constrained machines
1013
- Spatial and temporal axes are annotated with units if available (like μm, s, etc)
1114
- Channel and position axes use their original names
1215
- Elapsed times are extracted and returned using the same labeled axes

docs/src/lib/internals.md

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ OMETIFF.dump_omexml
1414
```@docs
1515
OMETIFF.IFD
1616
OMETIFF.TiffFile
17+
OMETIFF.ReadonlyTiffDiskArray
1718
```
1819

1920
## Logic

src/OMETIFF.jl

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ using DocStringExtensions
1717
include("utils.jl")
1818
include("files.jl")
1919
include("parsing.jl")
20+
include("mmap.jl")
2021
include("loader.jl")
2122

2223
end # module

src/files.jl

+28-2
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ mutable struct TiffFile
1313
filepath::String
1414

1515
"""The file stream"""
16-
io::Union{Stream, IOStream}
16+
io::Stream
1717

1818
"""Location of the first IFD in the file stream"""
1919
first_offset::Int
2020

2121
"""Whether this file has a different endianness than the host computer"""
2222
need_bswap::Bool
2323

24-
function TiffFile(io::Union{Stream, IOStream})
24+
function TiffFile(io::Stream)
2525
file = new()
2626
file.io = io
2727
seekstart(io)
@@ -47,6 +47,8 @@ function TiffFile(uuid::String, filepath::String)
4747
end
4848
end
4949

50+
TiffFile(io::IOStream) = TiffFile(Stream(format"OMETIFF", io, extract_filename(io)))
51+
5052
"""
5153
IFD(file, strip_offsets) -> IFD
5254
@@ -297,6 +299,30 @@ function load_comments(file)
297299
metadata["Summary"]
298300
end
299301

302+
"""
303+
_read_ifd_data!(target, ifd, buffer)
304+
305+
Reads the IFD `ifd` into `target` using a temporary buffer `buffer`. If the IFD
306+
is stripped, `buffer` must be 1-dimensional array, otherwise, it should be the
307+
same size as a `target`.
308+
"""
309+
function _read_ifd_data!(ifd::IFD, target::AbstractArray{T, 2}, buffer::AbstractArray{T, 1}) where {T}
310+
n_strips = length(ifd.strip_offsets)
311+
312+
for j in 1:n_strips
313+
seek(ifd.file.io, ifd.strip_offsets[j])
314+
read!(ifd.file.io, buffer)
315+
do_bswap(ifd.file, buffer)
316+
view(target, j, :) .= buffer
317+
end
318+
end
319+
320+
function _read_ifd_data!(ifd::IFD, target::AbstractArray{T, 2}, buffer::AbstractArray{T, 2}) where {T}
321+
seek(ifd.file.io, first(ifd.strip_offsets))
322+
read!(ifd.file.io, buffer)
323+
do_bswap(ifd.file, buffer)
324+
end
325+
300326
"""
301327
do_bswap(file, values) -> Array
302328

src/loader.jl

+33-20
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,31 @@
1-
function load(f::File{format"OMETIFF"}; dropunused=true)
1+
function load(f::File{format"OMETIFF"}; dropunused=true, inmemory=true)
22
open(f) do s
3-
ret = load(s; dropunused=dropunused)
3+
ret = load(s; dropunused=dropunused, inmemory=inmemory)
44
end
55
end
66

77
"""
8-
load(io; dropunused) -> ImageMetadata.ImageMeta
8+
load(io; dropunused, inmemory) -> ImageMetadata.ImageMeta
99
10-
Load an OMETIFF file using the stream `io`. `dropunused` controls whether
11-
dimensions of length 1 are dropped automatically (default) or not.
10+
Load an OMETIFF file using the stream `io`.
11+
12+
**Arguments**
13+
- `dropunused::Bool`: controls whether dimensions of length 1 are dropped
14+
automatically (default) or not.
15+
- `inmemory::Bool`: controls whether arrays are fully loaded into memory
16+
(default) or left on disk and specific parts only loaded when accessed.
17+
18+
!!! tip
19+
The `inmemory=false` flag currently returns a read-only view of the data on
20+
the disk for data integrity reasons. In order to modify the contents, you
21+
must copy the data into an in-memory container--at least until
22+
[#52](https://github.com/tlnagy/OMETIFF.jl/issues/52) is fixed--like so:
23+
24+
```
25+
copy(arr)
26+
```
1227
"""
13-
function load(io::Stream{format"OMETIFF"}; dropunused=true)
28+
function load(io::Stream{format"OMETIFF"}; dropunused=true, inmemory=true)
1429
if io.filename != nothing && !occursin(".ome.tif", io.filename)
1530
throw(FileIO.LoaderError("OMETIFF", "Not an OME TIFF file!"))
1631
end
@@ -83,7 +98,11 @@ function load(io::Stream{format"OMETIFF"}; dropunused=true)
8398

8499
elapsed_times = get_elapsed_times(containers, master_dims, masteraxis)
85100

86-
img = inmemoryarray(ifds, master_dims, master_rawtype, mappedtype)
101+
if inmemory
102+
img = inmemoryarray(ifds, master_dims, master_rawtype, mappedtype)
103+
else
104+
img = ReadonlyTiffDiskArray(Gray{mappedtype}, master_rawtype, ifds, values(master_dims));
105+
end
87106

88107
# find dimensions of length 1 and remove them
89108
if dropunused
@@ -121,29 +140,23 @@ function inmemoryarray(ifds::OrderedDict{NTuple{4, Int}, IFD},
121140

122141
# iterate over each IFD
123142
for (indices, ifd) in ifds
124-
125143
n_strips = length(ifd.strip_offsets)
126144
strip_len = floor(Int, (width * height) / n_strips)
127145

128146
# if the data is stripped and we haven't fix tmp's layout then lets make
129-
# tmp equal to one strip.
147+
# tmp equal to one strip. This'll be fixed in Julia 1.4
130148
if n_strips > 1 && size(tmp) != (strip_len, )
131149
tmp = Array{rawtype}(undef, strip_len)
132150
end
133151

134-
for j in 1:n_strips
135-
seek(ifd.file.io, ifd.strip_offsets[j])
136-
read!(ifd.file.io, tmp)
137-
do_bswap(ifd.file, tmp)
138-
if n_strips > 1
139-
data[j, :, indices...] = tmp
140-
else
141-
data[:, :, indices...] = tmp'
142-
end
152+
target = view(data, :, :, indices...)
153+
_read_ifd_data!(ifd, target, tmp)
154+
155+
# transposition must happen here since the on-disk variant does this on access
156+
if ndims(tmp) == 2
157+
target .= tmp'
143158
end
144159
end
145160

146161
reinterpret(Gray{mappedtype}, data)
147162
end
148-
149-

src/mmap.jl

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""
2+
ReadonlyTiffDiskArray(mappedtype, rawtype, ifds, dims) -> ReadonlyTiffDiskArray
3+
4+
A lazy representation of a OMETIFF file. This custom type is needed since TIFF
5+
files are laid out noncontiguously and nonregularly. It uses an internal index
6+
to determine the mapping from indices to the locations of data slices on disk.
7+
These slices are generally XY slices and are usually loaded in all at once so it
8+
is quickly loaded into an internal cache to speed up the process. Externally,
9+
this type should behave very similarly to an in-memory array, albeit with a
10+
higher cost of accessing an element.
11+
12+
$(FIELDS)
13+
"""
14+
mutable struct ReadonlyTiffDiskArray{T <: Gray, R, N1, N2} <: AbstractArray{T, N2}
15+
"""
16+
A map of dimensions (sans XY) to the corresponding [`IFD`](@ref)
17+
"""
18+
ifds::OrderedDict{NTuple{N1, Int}, IFD}
19+
20+
"""
21+
The full set of dimensions of the TIFF file, including XY
22+
"""
23+
dims::NTuple{N2, Int}
24+
25+
"""
26+
An internal cache to fill when reading from disk
27+
"""
28+
cache::Array{R, 2}
29+
30+
"""
31+
The dimension indices corresponding to the slice currently in the cache
32+
"""
33+
cache_index::NTuple{N1, Int}
34+
35+
function ReadonlyTiffDiskArray(::Type{T}, ::Type{R}, ifds::OrderedDict{NTuple{N1, Int}, IFD}, dims::NTuple{N2, Int}) where {T, R, N1, N2}
36+
if N2 - 2 != N1
37+
error("$N2 dimensions given, but the IFDs are indexed on $N1 dimensions instead of "*
38+
"expected $(N2-2).")
39+
end
40+
new{T, R, N1, N2}(ifds, dims, Array{R}(undef, dims[1], dims[2]), (-1, -1, -1, -1))
41+
end
42+
end
43+
44+
Base.size(A::ReadonlyTiffDiskArray) = A.dims
45+
46+
function Base.getindex(A::ReadonlyTiffDiskArray{Gray{T}, R, N1, N2}, i1::Int, i2::Int, i::Vararg{Int, N1}) where {T, R, N1, N2}
47+
# check the loaded cache is already the correct slice
48+
if A.cache_index == i
49+
return Gray(reinterpret(T, A.cache[i2, i1]))
50+
end
51+
52+
ifd = A.ifds[i]
53+
54+
# if the file isn't open, lets open a handle and update it
55+
if !isopen(ifd.file.io)
56+
path = ifd.file.filepath
57+
ifd.file.io = Stream(format"OMETIFF", open(path), path)
58+
end
59+
60+
n_strips = length(ifd.strip_offsets)
61+
strip_len = floor(Int, (size(A.cache, 1) * size(A.cache, 2)) / n_strips)
62+
63+
# if the data is striped then we need to change the buffer shape so that we
64+
# can read into it. This should be replaced with a view of cache in Julia
65+
# >1.4, see https://github.com/JuliaLang/julia/pull/33046
66+
if n_strips > 1 && size(tmp) != (strip_len, )
67+
tmp = Array{R}(undef, strip_len)
68+
else
69+
tmp = A.cache
70+
end
71+
72+
_read_ifd_data!(ifd, A.cache, tmp)
73+
74+
A.cache_index = i
75+
76+
return Gray(reinterpret(T, A.cache[i2, i1]))
77+
end
78+
79+
function Base.setindex!(A::ReadonlyTiffDiskArray{Gray{T}, R, N1, N2}, X, I...) where {T, R, N1, N2}
80+
error("This array is on disk and is read only. Convert to a mutable in-memory version by running "*
81+
"`copy(arr)`. \n\n𝗡𝗼𝘁𝗲: For large files this can be quite expensive. A future PR will add "*
82+
"support for reading and writing to/from disk.")
83+
end

test/runtests.jl

+11
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,17 @@ end
155155
@test size(img) == (24, 18, 1, 1, 5, 1)
156156
end
157157
end
158+
@testset "Memory mapping" begin
159+
open(joinpath("testdata", "singles", "181003_multi_pos_time_course_1_MMStack.ome.tif")) do f
160+
s = Stream(format"OMETIFF", f, OMETIFF.extract_filename(f))
161+
img = OMETIFF.load(s, inmemory=false)
162+
img2 = OMETIFF.load(s)
163+
@test size(img) == (256, 256, 10, 2)
164+
@test all(img[1:10,1,1,1] .== img2[1:10,1,1,1])
165+
# file is read only and should throw an error if you try and modify it
166+
@test_throws ErrorException img[1:10,1,1,1] .= 1.0
167+
end
168+
end
158169
end
159170

160171
@testset "Error checks" begin

0 commit comments

Comments
 (0)