diff --git a/tools/migration-unified-annotation-versioning/main.py b/tools/migration-unified-annotation-versioning/main.py index b4318ac935..27101600fc 100755 --- a/tools/migration-unified-annotation-versioning/main.py +++ b/tools/migration-unified-annotation-versioning/main.py @@ -18,8 +18,8 @@ def main(): parser.add_argument("--dry", help="Only read and process data, do not write out results", action="store_true") parser.add_argument("--num_threads", help="Number of threads to migrate the annotations in parallel", type=int, default=1) parser.add_argument("--postgres", help="Postgres connection specifier, default is postgresql://postgres@localhost:5432/webknossos", type=str, default="postgresql://postgres@localhost:5432/webknossos") - parser.add_argument("--previous_start", help="Previous run start time. Only annotations last modified after that time will be migrated. Use for second run in incremental migration. Example: 2024-11-27 10:37:30.171083", type=str) - parser.add_argument("--start", help="Run “start time”. Only annotations last modified before that time will be migrated. Defaults to now. Change if FossilDB content is not up to date with postgres. Example: 2024-11-27 10:37:30.171083", type=str) + parser.add_argument("--previous_start", help="Previous run start time. Only annotations last modified after that time will be migrated. Use for second run in incremental migration. Example: 2024-11-27 10:37:30.171083+01", type=str) + parser.add_argument("--start", help="Run “start time”. Only annotations last modified before that time will be migrated. Defaults to now. Change if FossilDB content is not up to date with postgres. Example: 2024-11-27 10:37:30.171083+01", type=str) parser.add_argument("--count_versions", help="Instead of migrating, only count materialized versions of the annotation", action="store_true") parser.add_argument("--previous_checkpoints", help="Supply checkpoints file of a previous run to resume", type=str) parser.add_argument("--verbose", "-v", help="Print for every annotation", action="store_true") diff --git a/tools/migration-unified-annotation-versioning/migration.py b/tools/migration-unified-annotation-versioning/migration.py index d7b2f66386..209611e3c7 100644 --- a/tools/migration-unified-annotation-versioning/migration.py +++ b/tools/migration-unified-annotation-versioning/migration.py @@ -71,7 +71,7 @@ def migrate_annotation(self, annotation): newest_version = self.get_newest_version(tracing_id, update_collection) versions += newest_version if versions > 1: - logger.info(f"{versions} versions for {annotation['_id']}{self.get_progress()}") + logger.info(f"{versions} versions for {annotation['_id']}{self.get_progress(offset=1)}") else: logger.debug(f"Migrating annotation {annotation['_id']} (dry={self.args.dry}) ...") mapping_id_map = self.build_mapping_id_map(annotation) @@ -86,7 +86,7 @@ def migrate_annotation(self, annotation): logger.debug("writing annotationProtos...") self.create_and_save_annotation_proto(annotation, materialized_versions, mapping_id_map) if time.time() - before > 1 or self.args.verbose: - log_since(before, f"Migrating annotation {annotation['_id']} ({len(materialized_versions)} materialized versions)", self.get_progress()) + log_since(before, f"Migrating annotation {annotation['_id']} ({len(materialized_versions)} materialized versions)", self.get_progress(offset=1)) checkpoint_logger.info(annotation['_id']) except Exception: logger.exception(f"Exception while migrating annotation {annotation['_id']}:") @@ -570,7 +570,7 @@ def delete_all_with_prefix(self, collection: str, prefix: str) -> None: def read_annotation_list(self): checkpoint_set = self.read_checkpoints() before = time.time() - start_time = str(datetime.datetime.now()) + start_time = str(datetime.datetime.now(datetime.timezone.utc)) if self.args.start is not None: start_time = self.args.start previous_start_label = "" @@ -628,9 +628,12 @@ def replace_before_first_slash(self, replacement_prefix: str, key) -> str: slash_pos = key.find('/') return replacement_prefix + key[slash_pos:] - def get_progress(self) -> str: + # Current progress as formatted string. + # offset is added to done_count to account for incrementing done_count only *after* logging (in the finally block) + def get_progress(self, offset: int) -> str: with self.done_count_lock: done_count = self.done_count + done_count += offset percentage = 100.0 * done_count / self.total_count duration = time.time() - self.before if done_count > 0: