Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion ignite/contrib/handlers/clearml_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,18 @@ def get_local_copy(self, filename: str) -> Optional[str]:

@idist.one_rank_only()
def remove(self, filename: str) -> None:
super(ClearMLSaver, self).remove(filename)
from clearml.storage.helper import StorageHelper

helper = StorageHelper.get(filename)

try:
helper.delete(filename)
except ValueError:
warnings.warn(
"Checkpoints being uploaded to clearml-server with version "
"earlier than 1.0.0 does not support delete operation."
)

for slots in self._checkpoint_slots.values():
try:
slots[slots.index(filename)] = None
Expand Down
31 changes: 17 additions & 14 deletions ignite/handlers/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def __call__(self, engine: Engine) -> None:
global_step = engine.state.get_event_attrib_value(Events.ITERATION_COMPLETED)
priority = global_step

if self._check_lt_n_saved() or self._compare_fn(priority):
if self._check_lt_n_saved(or_equal=True) or self._compare_fn(priority):

priority_str = f"{priority}" if isinstance(priority, numbers.Integral) else f"{priority:.4f}"

Expand Down Expand Up @@ -400,18 +400,6 @@ def __call__(self, engine: Engine) -> None:
"priority": priority,
}

try:
index = list(map(lambda it: it.filename == filename, self._saved)).index(True)
to_remove = True
except ValueError:
index = 0
to_remove = not self._check_lt_n_saved()

if to_remove:
item = self._saved.pop(index)
if isinstance(self.save_handler, BaseSaveHandler):
self.save_handler.remove(item.filename)

self._saved.append(Checkpoint.Item(priority, filename))
self._saved.sort(key=lambda it: it[0])

Expand All @@ -424,6 +412,14 @@ def __call__(self, engine: Engine) -> None:
except TypeError:
self.save_handler(checkpoint, filename)

index = list(map(lambda it: it.filename == filename, self._saved)).index(True)
to_remove = not self._check_lt_n_saved(or_equal=True)

if to_remove:
item = self._saved.pop(index)
if isinstance(self.save_handler, BaseSaveHandler):
self.save_handler.remove(item.filename)

def _setup_checkpoint(self) -> Dict[str, Dict[Any, Any]]:
checkpoint = {}
if self.to_save is not None:
Expand Down Expand Up @@ -629,12 +625,19 @@ class DiskSaver(BaseSaveHandler):
dirname: Directory path where the checkpoint will be saved
atomic: if True, checkpoint is serialized to a temporary file, and then
moved to final destination, so that files are guaranteed to not be damaged
(for example if exception occurs during saving).
(for example if exception occurs during saving). Setting ``atomic=True`` is
recommended if ``n_saved=1`` is set in checkpoint object. See notes below
for detail.
create_dir: if True, will create directory ``dirname`` if it doesnt exist.
require_empty: If True, will raise exception if there are any files in the
directory ``dirname``.
kwargs: Accepted keyword arguments for `torch.save` or `xm.save`.

Note:
When ``n_saved=1`` is set in the checkpoint object, then to protect only saved
checkpoint, ``atomic=True`` is the only option to preserve a non-corrupt
checkpoint.

.. versionchanged:: 0.4.2
Accept ``kwargs`` for `torch.save` or `xm.save`.
"""
Expand Down