Skip to content

Commit 6f585cb

Browse files
authored
Merge pull request #53 from kthyng/add_fsspec_cache
can now cache datasets using fsspec
2 parents 8614789 + bc05e2e commit 6f585cb

File tree

4 files changed

+32
-4
lines changed

4 files changed

+32
-4
lines changed

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ channels:
33
- conda-forge
44
dependencies:
55
- python
6+
- fsspec
67
- numpy
78
- dask
89
- pandas

intake_erddap/erddap.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import List, Optional, Tuple, Type, Union
66

77
import cf_pandas # noqa: F401
8+
import fsspec
89
import numpy as np
910
import pandas as pd
1011
import requests
@@ -139,6 +140,11 @@ class TableDAPSource(ERDDAPSource):
139140
dropna : bool, False.
140141
WARNING ALPHA FEATURE. If True, rows with data columns of nans will be
141142
dropped from data frame. Has not been thoroughly tested.
143+
cache_kwargs : dict, optional
144+
WARNING ALPHA FEATURE. If you want to have the data you access stored
145+
locally in a cache, use this keyword to input a dictionary of keywords.
146+
The cache is set up using ``fsspec``'s simple cache. Example configuration
147+
is ``cache_kwargs=dict(cache_storage="/tmp/fnames/", same_names=True)``.
142148
143149
Examples
144150
--------
@@ -178,6 +184,7 @@ def __init__(
178184
server: str,
179185
mask_failed_qartod: bool = False,
180186
dropna: bool = False,
187+
cache_kwargs: Optional[dict] = None,
181188
*args,
182189
**kwargs,
183190
):
@@ -186,6 +193,7 @@ def __init__(
186193
self._dataset_metadata: Optional[dict] = None
187194
self._mask_failed_qartod = mask_failed_qartod
188195
self._dropna = dropna
196+
self._cache_kwargs = cache_kwargs
189197
kwargs.pop("protocol", None)
190198
# https://github.com/python/mypy/issues/6799
191199
super().__init__(*args, protocol="tabledap", **kwargs) # type: ignore
@@ -220,9 +228,20 @@ def _close(self):
220228

221229
def _load(self):
222230
e = self.get_client()
223-
self._dataframe: pd.DataFrame = e.to_pandas(
224-
requests_kwargs={"timeout": 60}, **self.open_kwargs
225-
)
231+
if self._cache_kwargs is not None:
232+
if "response" in self.open_kwargs:
233+
response = self.open_kwargs["response"]
234+
self.open_kwargs.pop("response")
235+
url = e.get_download_url(response=response)
236+
else:
237+
url = e.get_download_url(response=response)
238+
239+
with fsspec.open(f"simplecache::{url}", **self._cache_kwargs) as f:
240+
self._dataframe: pd.DataFrame = pd.read_csv(f, **self.open_kwargs)
241+
else:
242+
self._dataframe: pd.DataFrame = e.to_pandas(
243+
requests_kwargs={"timeout": 60}, **self.open_kwargs
244+
)
226245
if self._mask_failed_qartod:
227246
self.run_mask_failed_qartod()
228247
if self._dropna:

intake_erddap/erddap_cat.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ class ERDDAPCatalog(Catalog):
109109
dropna : bool, False.
110110
WARNING ALPHA FEATURE. If True, rows with data columns of nans will be
111111
dropped from data frame. Has not been thoroughly tested.
112+
cache_kwargs : dict, optional
113+
WARNING ALPHA FEATURE. If you want to have the data you access stored
114+
locally in a cache, use this keyword to input a dictionary of keywords.
115+
The cache is set up using ``fsspec``'s simple cache. Example configuration
116+
is ``cache_kwargs=dict(cache_storage="/tmp/fnames/", same_names=True)``.
112117
113118
Attributes
114119
----------
@@ -143,6 +148,7 @@ def __init__(
143148
open_kwargs: dict = None,
144149
mask_failed_qartod: bool = False,
145150
dropna: bool = False,
151+
cache_kwargs: Optional[dict] = None,
146152
**kwargs,
147153
):
148154
if server.endswith("/"):
@@ -159,6 +165,7 @@ def __init__(
159165
self.open_kwargs = open_kwargs or {}
160166
self._mask_failed_qartod = mask_failed_qartod
161167
self._dropna = dropna
168+
self._cache_kwargs = cache_kwargs
162169

163170
if kwargs_search is not None:
164171
checks = [
@@ -429,6 +436,7 @@ def _load(self):
429436
{
430437
"mask_failed_qartod": self._mask_failed_qartod,
431438
"dropna": self._dropna,
439+
"cache_kwargs": self._cache_kwargs,
432440
}
433441
)
434442
args["constraints"].update(self._get_tabledap_constraints())

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ quiet = false
1919
color = true
2020

2121
[tool.isort]
22-
known_third_party = ["appdirs", "cf_pandas", "dask", "erddapy", "intake", "numpy", "pandas", "pkg_resources", "pytest", "requests", "setuptools", "xarray"]
22+
known_third_party = ["appdirs", "cf_pandas", "dask", "erddapy", "fsspec", "intake", "numpy", "pandas", "pkg_resources", "pytest", "requests", "setuptools", "xarray"]
2323
skip_glob = ["docs/*", "docs/**/*.py"]
2424

2525
[tool.pytest.ini_options]

0 commit comments

Comments
 (0)