Skip to content

Commit f32d354

Browse files
authored
Lazy Imports (#7179)
* fix typing of BackendEntrypoint * make backends lazy * make matplotlib lazy and add tests for lazy modules * make flox lazy * fix generated docs on windows... * try fixing test * make pycompat lazy * make dask.array lazy * add import xarray without numpy or pandas benchmark * improve error reporting in test * fix import benchmark * add lazy import to whats-new * fix lazy import test * fix typos * fix windows stuff again
1 parent fc9026b commit f32d354

33 files changed

+445
-241
lines changed

asv_bench/benchmarks/import.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@ class Import:
22
"""Benchmark importing xarray"""
33

44
def timeraw_import_xarray(self):
5-
return """
6-
import xarray
7-
"""
5+
return "import xarray"
86

97
def timeraw_import_xarray_plot(self):
10-
return """
11-
import xarray.plot
12-
"""
8+
return "import xarray.plot"
139

1410
def timeraw_import_xarray_backends(self):
1511
return """
1612
from xarray.backends import list_engines
1713
list_engines()
1814
"""
15+
16+
def timeraw_import_xarray_only(self):
17+
# import numpy and pandas in the setup stage
18+
return "import xarray", "import numpy, pandas"

doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ Internal Changes
8787
encoding times to preserve existing behavior and prevent future errors when it
8888
is eventually set to ``True`` by default in cftime (:pull:`7171`). By
8989
`Spencer Clark <https://github.com/spencerkclark>`_.
90+
- Improved import time by lazily importing backend modules, matplotlib, dask.array and flox. (:issue:`6726`, :pull:`7179`)
91+
By `Michael Niklas <https://github.com/headtr1ck>`_.
9092
- Emit a warning under the development version of pandas when we convert
9193
non-nanosecond precision datetime or timedelta values to nanosecond precision.
9294
This was required in the past, because pandas previously was not compatible

xarray/backends/cfgrib_.py

+11-16
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77

88
from ..core import indexing
9-
from ..core.utils import Frozen, FrozenDict, close_on_error
9+
from ..core.utils import Frozen, FrozenDict, close_on_error, module_available
1010
from ..core.variable import Variable
1111
from .common import (
1212
BACKEND_ENTRYPOINTS,
@@ -18,20 +18,6 @@
1818
from .locks import SerializableLock, ensure_lock
1919
from .store import StoreBackendEntrypoint
2020

21-
try:
22-
import cfgrib
23-
24-
has_cfgrib = True
25-
except ModuleNotFoundError:
26-
has_cfgrib = False
27-
# cfgrib throws a RuntimeError if eccodes is not installed
28-
except (ImportError, RuntimeError):
29-
warnings.warn(
30-
"Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. "
31-
"Try `import cfgrib` to get the full error message"
32-
)
33-
has_cfgrib = False
34-
3521
# FIXME: Add a dedicated lock, even if ecCodes is supposed to be thread-safe
3622
# in most circumstances. See:
3723
# https://confluence.ecmwf.int/display/ECC/Frequently+Asked+Questions
@@ -61,6 +47,15 @@ class CfGribDataStore(AbstractDataStore):
6147
"""
6248

6349
def __init__(self, filename, lock=None, **backend_kwargs):
50+
try:
51+
import cfgrib
52+
# cfgrib throws a RuntimeError if eccodes is not installed
53+
except (ImportError, RuntimeError) as err:
54+
warnings.warn(
55+
"Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. "
56+
"Try `import cfgrib` to get the full error message"
57+
)
58+
raise err
6459

6560
if lock is None:
6661
lock = ECCODES_LOCK
@@ -96,7 +91,7 @@ def get_encoding(self):
9691

9792

9893
class CfgribfBackendEntrypoint(BackendEntrypoint):
99-
available = has_cfgrib
94+
available = module_available("cfgrib")
10095

10196
def guess_can_open(self, filename_or_obj):
10297
try:

xarray/backends/common.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -376,22 +376,25 @@ class BackendEntrypoint:
376376
Attributes
377377
----------
378378
379-
open_dataset_parameters : tuple, default None
379+
available : bool, default: True
380+
Indicate wether this backend is available given the installed packages.
381+
The setting of this attribute is not mandatory.
382+
open_dataset_parameters : tuple, default: None
380383
A list of ``open_dataset`` method parameters.
381384
The setting of this attribute is not mandatory.
382-
description : str
385+
description : str, default: ""
383386
A short string describing the engine.
384387
The setting of this attribute is not mandatory.
385-
url : str
388+
url : str, default: ""
386389
A string with the URL to the backend's documentation.
387390
The setting of this attribute is not mandatory.
388391
"""
389392

390393
available: ClassVar[bool] = True
391394

392-
open_dataset_parameters: tuple | None = None
393-
description: str = ""
394-
url: str = ""
395+
open_dataset_parameters: ClassVar[tuple | None] = None
396+
description: ClassVar[str] = ""
397+
url: ClassVar[str] = ""
395398

396399
def __repr__(self) -> str:
397400
txt = f"<{type(self).__name__}>"

xarray/backends/h5netcdf_.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from ..core.utils import (
1212
FrozenDict,
1313
is_remote_uri,
14+
module_available,
1415
read_magic_number_from_file,
1516
try_read_magic_number_from_file_or_path,
1617
)
@@ -33,16 +34,6 @@
3334
)
3435
from .store import StoreBackendEntrypoint
3536

36-
try:
37-
import h5netcdf
38-
39-
has_h5netcdf = True
40-
except ImportError:
41-
# Except a base ImportError (not ModuleNotFoundError) to catch usecases
42-
# where errors have mismatched versions of c-dependencies. This can happen
43-
# when developers are making changes them.
44-
has_h5netcdf = False
45-
4637

4738
class H5NetCDFArrayWrapper(BaseNetCDF4Array):
4839
def get_array(self, needs_lock=True):
@@ -110,6 +101,7 @@ class H5NetCDFStore(WritableCFDataStore):
110101
)
111102

112103
def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=False):
104+
import h5netcdf
113105

114106
if isinstance(manager, (h5netcdf.File, h5netcdf.Group)):
115107
if group is None:
@@ -147,6 +139,7 @@ def open(
147139
phony_dims=None,
148140
decode_vlen_strings=True,
149141
):
142+
import h5netcdf
150143

151144
if isinstance(filename, bytes):
152145
raise ValueError(
@@ -237,12 +230,16 @@ def get_attrs(self):
237230
return FrozenDict(_read_attributes(self.ds))
238231

239232
def get_dimensions(self):
233+
import h5netcdf
234+
240235
if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"):
241236
return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())
242237
else:
243238
return self.ds.dimensions
244239

245240
def get_encoding(self):
241+
import h5netcdf
242+
246243
if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"):
247244
return {
248245
"unlimited_dims": {
@@ -373,7 +370,7 @@ class H5netcdfBackendEntrypoint(BackendEntrypoint):
373370
backends.ScipyBackendEntrypoint
374371
"""
375372

376-
available = has_h5netcdf
373+
available = module_available("h5netcdf")
377374
description = (
378375
"Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using h5netcdf in Xarray"
379376
)

xarray/backends/netCDF4_.py

+4-12
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
FrozenDict,
1515
close_on_error,
1616
is_remote_uri,
17+
module_available,
1718
try_read_magic_number_from_path,
1819
)
1920
from ..core.variable import Variable
@@ -31,17 +32,6 @@
3132
from .netcdf3 import encode_nc3_attr_value, encode_nc3_variable
3233
from .store import StoreBackendEntrypoint
3334

34-
try:
35-
import netCDF4
36-
37-
has_netcdf4 = True
38-
except ImportError:
39-
# Except a base ImportError (not ModuleNotFoundError) to catch usecases
40-
# where errors have mismatched versions of c-dependencies. This can happen
41-
# when developers are making changes them.
42-
has_netcdf4 = False
43-
44-
4535
# This lookup table maps from dtype.byteorder to a readable endian
4636
# string used by netCDF4.
4737
_endian_lookup = {"=": "native", ">": "big", "<": "little", "|": "native"}
@@ -313,6 +303,7 @@ class NetCDF4DataStore(WritableCFDataStore):
313303
def __init__(
314304
self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False
315305
):
306+
import netCDF4
316307

317308
if isinstance(manager, netCDF4.Dataset):
318309
if group is None:
@@ -349,6 +340,7 @@ def open(
349340
lock_maker=None,
350341
autoclose=False,
351342
):
343+
import netCDF4
352344

353345
if isinstance(filename, os.PathLike):
354346
filename = os.fspath(filename)
@@ -537,7 +529,7 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint):
537529
backends.ScipyBackendEntrypoint
538530
"""
539531

540-
available = has_netcdf4
532+
available = module_available("netCDF4")
541533
description = (
542534
"Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray"
543535
)

xarray/backends/pseudonetcdf_.py

+3-10
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44

55
from ..core import indexing
6-
from ..core.utils import Frozen, FrozenDict, close_on_error
6+
from ..core.utils import Frozen, FrozenDict, close_on_error, module_available
77
from ..core.variable import Variable
88
from .common import (
99
BACKEND_ENTRYPOINTS,
@@ -16,14 +16,6 @@
1616
from .locks import HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock
1717
from .store import StoreBackendEntrypoint
1818

19-
try:
20-
from PseudoNetCDF import pncopen
21-
22-
has_pseudonetcdf = True
23-
except ModuleNotFoundError:
24-
has_pseudonetcdf = False
25-
26-
2719
# psuedonetcdf can invoke netCDF libraries internally
2820
PNETCDF_LOCK = combine_locks([HDF5_LOCK, NETCDFC_LOCK])
2921

@@ -56,6 +48,7 @@ class PseudoNetCDFDataStore(AbstractDataStore):
5648

5749
@classmethod
5850
def open(cls, filename, lock=None, mode=None, **format_kwargs):
51+
from PseudoNetCDF import pncopen
5952

6053
keywords = {"kwargs": format_kwargs}
6154
# only include mode if explicitly passed
@@ -128,7 +121,7 @@ class PseudoNetCDFBackendEntrypoint(BackendEntrypoint):
128121
backends.PseudoNetCDFDataStore
129122
"""
130123

131-
available = has_pseudonetcdf
124+
available = module_available("PseudoNetCDF")
132125
description = (
133126
"Open many atmospheric science data formats using PseudoNetCDF in Xarray"
134127
)

xarray/backends/pydap_.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,14 @@
55

66
from ..core import indexing
77
from ..core.pycompat import integer_types
8-
from ..core.utils import Frozen, FrozenDict, close_on_error, is_dict_like, is_remote_uri
8+
from ..core.utils import (
9+
Frozen,
10+
FrozenDict,
11+
close_on_error,
12+
is_dict_like,
13+
is_remote_uri,
14+
module_available,
15+
)
916
from ..core.variable import Variable
1017
from .common import (
1118
BACKEND_ENTRYPOINTS,
@@ -16,15 +23,6 @@
1623
)
1724
from .store import StoreBackendEntrypoint
1825

19-
try:
20-
import pydap.client
21-
import pydap.lib
22-
23-
pydap_version = pydap.lib.__version__
24-
has_pydap = True
25-
except ModuleNotFoundError:
26-
has_pydap = False
27-
2826

2927
class PydapArrayWrapper(BackendArray):
3028
def __init__(self, array):
@@ -101,6 +99,8 @@ def open(
10199
verify=None,
102100
user_charset=None,
103101
):
102+
import pydap.client
103+
import pydap.lib
104104

105105
if timeout is None:
106106
from pydap.lib import DEFAULT_TIMEOUT
@@ -114,7 +114,7 @@ def open(
114114
"output_grid": output_grid or True,
115115
"timeout": timeout,
116116
}
117-
if Version(pydap_version) >= Version("3.3.0"):
117+
if Version(pydap.lib.__version__) >= Version("3.3.0"):
118118
if verify is not None:
119119
kwargs.update({"verify": verify})
120120
if user_charset is not None:
@@ -154,7 +154,7 @@ class PydapBackendEntrypoint(BackendEntrypoint):
154154
backends.PydapDataStore
155155
"""
156156

157-
available = has_pydap
157+
available = module_available("pydap")
158158
description = "Open remote datasets via OPeNDAP using pydap in Xarray"
159159
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.PydapBackendEntrypoint.html"
160160

xarray/backends/pynio_.py

+3-10
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44

55
from ..core import indexing
6-
from ..core.utils import Frozen, FrozenDict, close_on_error
6+
from ..core.utils import Frozen, FrozenDict, close_on_error, module_available
77
from ..core.variable import Variable
88
from .common import (
99
BACKEND_ENTRYPOINTS,
@@ -16,14 +16,6 @@
1616
from .locks import HDF5_LOCK, NETCDFC_LOCK, SerializableLock, combine_locks, ensure_lock
1717
from .store import StoreBackendEntrypoint
1818

19-
try:
20-
import Nio
21-
22-
has_pynio = True
23-
except ModuleNotFoundError:
24-
has_pynio = False
25-
26-
2719
# PyNIO can invoke netCDF libraries internally
2820
# Add a dedicated lock just in case NCL as well isn't thread-safe.
2921
NCL_LOCK = SerializableLock()
@@ -61,6 +53,7 @@ class NioDataStore(AbstractDataStore):
6153
"""Store for accessing datasets via PyNIO"""
6254

6355
def __init__(self, filename, mode="r", lock=None, **kwargs):
56+
import Nio
6457

6558
if lock is None:
6659
lock = PYNIO_LOCK
@@ -101,7 +94,7 @@ def close(self):
10194

10295

10396
class PynioBackendEntrypoint(BackendEntrypoint):
104-
available = has_pynio
97+
available = module_available("Nio")
10598

10699
def open_dataset(
107100
self,

0 commit comments

Comments
 (0)