Skip to content

Commit ee2d35f

Browse files
committed
raise exception when datasets would like to cache pipeline result
1 parent 6133f89 commit ee2d35f

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

src/pytorch_ie/pipeline.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from torch import Tensor
1212
from torch.utils.data import DataLoader
1313

14+
from datasets import is_caching_enabled
1415
from pytorch_ie.core.document import Document
1516
from pytorch_ie.core.model import PyTorchIEModel
1617
from pytorch_ie.core.taskmodule import (
@@ -390,6 +391,10 @@ def __call__(
390391
batched=True,
391392
**dataset_map_params,
392393
)
394+
# For now, we do not allow caching of pipeline results since fingerprinting may be incorrect
395+
# TODO: elaborate why it may be incorrect
396+
if is_caching_enabled() and documents._fingerprint == processed_documents._fingerprint:
397+
raise Exception("Caching is not allowed for pipeline calls")
393398
else:
394399
processed_documents = self._process_documents(
395400
documents=documents,

0 commit comments

Comments
 (0)