X-Chen97
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎README.md
-2 b/‎README.md
-2
diff --git a/‎agent/README.md
+31-1 b/‎agent/README.md
+31-1
diff --git a/‎agent/VERSION
+1-1 b/‎agent/VERSION
+1-1
diff --git a/‎agent/plugin_info.json
+1-1 b/‎agent/plugin_info.json
+1-1
diff --git a/‎agent/src/worker/agent.py
+3-2 b/‎agent/src/worker/agent.py
+3-2
diff --git a/‎agent/src/worker/data_manager.py
+44-35 b/‎agent/src/worker/data_manager.py
+44-35
diff --git a/‎agent/src/worker/fs_storages.py
+2 b/‎agent/src/worker/fs_storages.py
+2
diff --git a/‎agent/src/worker/task_clean_node.py
+1-2 b/‎agent/src/worker/task_clean_node.py
+1-2
diff --git a/‎agent/src/worker/task_dockerized.py
+6-2 b/‎agent/src/worker/task_dockerized.py
+6-2
diff --git a/‎agent/src/worker/task_sly.py
+1 b/‎agent/src/worker/task_sly.py
+1
diff --git a/‎agent/src/worker/utils.py
-6 b/‎agent/src/worker/utils.py
-6
diff --git a/‎base_images/jupyterlab/VERSION
+1-1 b/‎base_images/jupyterlab/VERSION
+1-1
diff --git a/‎base_images/py/VERSION
+1-1 b/‎base_images/py/VERSION
+1-1
diff --git a/‎base_images/pytorch/VERSION
+1-1 b/‎base_images/pytorch/VERSION
+1-1
diff --git a/‎base_images/pytorch_v04/VERSION
+1-1 b/‎base_images/pytorch_v04/VERSION
+1-1
diff --git a/‎base_images/tensorflow/VERSION
+1-1 b/‎base_images/tensorflow/VERSION
+1-1
diff --git a/‎help/jupyterlab_scripts/VERSION
+1-1 b/‎help/jupyterlab_scripts/VERSION
+1-1
@@ -8,5 +8,6 @@
 tasks_data/*
 !tasks_data/.gitkeep
 
+
 **/params.local.sh
 */_account_private_registry.sh
@@ -120,5 +120,3 @@ Data Transformation Language allows to automate complicated pipelines of data tr
   Regular updates on how to use state of the art models and solve practical
   data science problems with Supervisely.
 - [Tutorials and Cookbooks](./help) in this repository.
-
-
@@ -23,4 +23,34 @@ This principal scheme illustrates how agent processes the task.
 
 [Here](https://docs.supervise.ly/cluster/add_delete_node/add_delete_node/) you will find documentaion about how to monitor Agent status.
 
-![](https://i.imgur.com/rgihpsQ.png)
+![](https://i.imgur.com/rgihpsQ.png)
+
+
+# Environment variables:
+
+#### Required:
+
+- `AGENT_HOST_DIR`: directory, where agent stores user data. _(default: `$HOME/.supervisely-agent/$ACCESS_TOKEN`)_
+
+- `SERVER_ADDRESS`: full server URL to connect to (e.g. `http://somehost:12345/agent`).
+
+- `ACCESS_TOKEN`: unique string which allows the server to identify the agent.
+
+- `DOCKER_REGISTRY`: list of used docker registry addresses. (e.g. `docker.deepsystems.io,docker.enterprise.supervise.ly`)
+
+- `DOCKER_LOGIN`: list of login names for used docker registries (ordered as registries), e.g. `user,user`.
+
+- `DOCKER_PASSWORD`: list of passwords for used docker registries (ordered as registries), e.g. `123,345`.
+
+
+#### Optional: 
+
+- `WITH_LOCAL_STORAGE`: whether to use local agent storage for long-term persistent storage of task results (learned model checkpoints, images generated by DTL) instead of uploading the results to the web instance storage. When this option is enabled, those results will be unavailable when the agent is not connected to the web instance. Do not enable this option when running the agent on transient machines, like hourly rented AWS instances, as the local data there will be lost as soon as your rented time ends. _(default: true)_
+
+- `PULL_ALWAYS`: whether to always pull docker image from registry, or only if image with given name and tags not found localy. _(default: true)_
+
+- `DEFAULT_TIMEOUTS`: whether to use default timeout configs or load from `/workdir/src/configs/timeouts_for_stateless.json` file. _(default: true)_
+
+- `DELETE_TASK_DIR_ON_FINISH`: whether to remove task directory after the task finishes successfully. _(default: true)_
+
+- `DELETE_TASK_DIR_ON_FAILURE`: whether to remove task directory after the task finishes with a failure. _(default: false)_
@@ -1 +1 @@
-agent:4.1.1
+agent:4.2.0
@@ -1,5 +1,5 @@
 {
 	"title": "Agent",
-	"description": "brief description",
+	"description": "Supervisely Agent - is a small, but powerful task manager that allows to connect any computer (your office PC or cloud server) to the platform and use it for any computational tasks: neural network training/inference/deployment, training data preparation and many more.",
 	"type": "agent"
 }
@@ -60,14 +60,14 @@ def agent_connect_initially(self):
                                             shell=True, executable="/bin/bash", 
                                             stdout=subprocess.PIPE).communicate()[0]
 
-        agent_info = {
+        self.agent_info = {
             'hardware_info': hw_info,
             'agent_image': json.loads(docker_img_info)["Config"]["Image"],
             'agent_image_digest': get_self_docker_image_digest()
         }
 
         self.api.simple_request('AgentConnected', sly.api_proto.ServerInfo,
-                                sly.api_proto.AgentInfo(info=json.dumps(agent_info)))
+                                sly.api_proto.AgentInfo(info=json.dumps(self.agent_info)))
 
     def send_connect_info(self):
         while True:
@@ -77,6 +77,7 @@ def send_connect_info(self):
     def get_new_task(self):
         for task in self.api.get_endless_stream('GetNewTask', sly.api_proto.Task, sly.api_proto.Empty()):
             task_msg = json.loads(task.data)
+            task_msg['agent_info'] = self.agent_info
             self.logger.debug('GET_NEW_TASK', extra={'task_msg': task_msg})
             self.start_task(task_msg)
 
 
@@ -58,7 +58,6 @@ def _split_images_by_cache(self, images):
 
     def download_project(self, parent_dir, name, datasets_whitelist=None):
         self.logger.info("DOWNLOAD_PROJECT", extra={'title': name})
-        #@TODO: reimplement and use path without splitting
         project_fs = sly.Project(os.path.join(parent_dir, name), sly.OpenMode.CREATE)
         project_id = self.public_api.project.get_info_by_name(self.workspace_id, name).id
         meta = sly.ProjectMeta.from_json(self.public_api.project.get_meta(project_id))
@@ -75,42 +74,50 @@ def download_project(self, parent_dir, name, datasets_whitelist=None):
 
     def download_dataset(self, dataset, dataset_id):
         images = self.public_api.image.get_list(dataset_id)
-        progress = sly.Progress('Download dataset {!r}: images'.format(dataset.name), len(images), self.logger)
+        progress_imgs = sly.Progress('Dataset {!r}: download images'.format(dataset.name), len(images), self.logger)
+        progress_anns = sly.Progress('Dataset {!r}: download annotations'.format(dataset.name), len(images), self.logger)
 
         images_to_download = images
+
+        # copy images from cache to task folder and download corresponding annotations
         if self.has_images_storage():
             images_to_download, images_in_cache, images_cache_paths = self._split_images_by_cache(images)
-            # copy images from cache to task folder
-            for img_info, img_cache_path in zip(images_in_cache, images_cache_paths):
-                dataset.add_item_file(img_info.name, img_cache_path)
-                progress.iter_done_report()
+            self.logger.info('Dataset {!r}'.format(dataset.name), extra={'total_images': len(images),
+                                                                         'images_in_cache': len(images_in_cache),
+                                                                         'images_to_download': len(images_to_download)})
+            if len(images_to_download) + len(images_in_cache) != len(images):
+                raise RuntimeError("Error with images cache during download. Please contact support.")
+            for batch_cache in sly.batched(list(zip(images_in_cache, images_cache_paths)), constants.BATCH_SIZE_GET_IMAGES_INFO()):
+                img_cache_ids = [img_info.id for img_info, _ in batch_cache]
+                ann_info_list = self.public_api.annotation.download_batch(dataset_id, img_cache_ids, progress_anns.iters_done_report)
+                img_name_to_ann = {ann.image_name: ann.annotation for ann in ann_info_list}
+                for img_info, img_cache_path in batch_cache:
+                    dataset.add_item_file(img_info.name, img_cache_path, img_name_to_ann[img_info.name])
+                    progress_imgs.iter_done_report()
 
         # download images from server
-        img_ids = []
-        img_paths = []
-        for img_info in images_to_download:
-            img_ids.append(img_info.id)
-            # TODO download to a temp file and use dataset api to add the image to the dataset.
-            img_paths.append(dataset.deprecated_make_img_path(img_info.name, img_info.ext))
+        for batch_download in sly.batched(images_to_download, constants.BATCH_SIZE_GET_IMAGES_INFO()):
+            #prepare lists for api methods
+            img_ids = []
+            img_paths = []
+            for img_info in batch_download:
+                img_ids.append(img_info.id)
+                # TODO download to a temp file and use dataset api to add the image to the dataset.
+                img_paths.append(dataset.deprecated_make_img_path(img_info.name, img_info.ext))
+
+            # download annotations
+            ann_info_list = self.public_api.annotation.download_batch(dataset_id, img_ids, progress_anns.iters_done_report)
+            img_name_to_ann = {ann.image_name: ann.annotation for ann in ann_info_list}
+            self.public_api.image.download_batch(dataset_id, img_ids, img_paths, progress_imgs.iters_done_report)
+            for img_info, img_path in zip(batch_download, img_paths):
+                dataset.add_item_file(img_info.name, img_path, img_name_to_ann[img_info.name])
 
-        self.public_api.image.download_batch(img_ids, img_paths, progress.iter_done_report)
-        for img_info, img_path in zip(images_to_download, img_paths):
-            dataset.add_item_file(img_info.name, img_path)
+            if self.has_images_storage():
+                progress_cache = sly.Progress('Dataset {!r}: cache images'.format(dataset.name), len(img_paths), self.logger)
+                img_hashes = [img_info.hash for img_info in batch_download]
+                self.storage.images.write_objects(img_paths, img_hashes, progress_cache.iter_done_report)
 
-        if self.has_images_storage():
-            progress = sly.Progress('Download dataset {!r}: cache images'.format(dataset.name), len(img_paths), self.logger)
-            img_hashes = [img_info.hash for img_info in images_to_download]
-            self.storage.images.write_objects(img_paths, img_hashes, progress.iter_done_report)
-
-        # download annotations from server
-        img_id_to_name = {image.id: image.name for image in images}
-        progress_ann = sly.Progress('Download dataset {!r}: annotations'.format(dataset.name), len(images), self.logger)
-        anns = self.public_api.annotation.get_list(dataset_id, progress_cb=progress_ann.iters_done_report)
-        for ann in anns:
-            img_name = img_id_to_name[ann.image_id]
-            dataset.set_ann_dict(img_name, ann.annotation)
-
-    #@TODO: remove legacy stuff
+    # @TODO: remove legacy stuff
     # @TODO: reimplement and use path without splitting
     def upload_project(self, parent_dir, project_name, new_title, legacy=False, add_to_existing=False):
         # @TODO: reimplement and use path without splitting
@@ -152,7 +159,7 @@ def upload_dataset(self, dataset, dataset_id):
             hash_to_item_names[img_hash].append(item_name)
             if self.has_images_storage():
                 if progress is None:
-                    progress = sly.Progress('Dataset {!r}: upload cache images'.format(dataset.name), items_count, self.logger)
+                    progress = sly.Progress('Dataset {!r}: cache images'.format(dataset.name), items_count, self.logger)
                 self.storage.images.write_object(item_paths.img_path, img_hash)
                 progress.iter_done_report()
 
@@ -163,18 +170,20 @@ def add_images_annotations(hashes, pb_img_cb, pb_ann_cb):
             names = [name for hash in hashes for name in hash_to_item_names[hash]]
             unrolled_hashes = [hash for hash in hashes for _ in range(len(hash_to_item_names[hash]))]
             ann_paths = [path for hash in hashes for path in hash_to_ann_paths[hash]]
-            remote_ids = self.public_api.image.add_batch(dataset_id, names, unrolled_hashes, pb_img_cb)
-            self.public_api.annotation.add_batch(remote_ids, ann_paths, pb_ann_cb)
+            remote_infos = self.public_api.image.add_batch(dataset_id, names, unrolled_hashes, pb_img_cb)
+            self.public_api.annotation.upload_batch_paths(dataset_id, [info.id for info in remote_infos], ann_paths, pb_ann_cb)
 
         # add already uploaded images + attach annotations
         remote_hashes = self.public_api.image.check_existing_hashes(list(hash_to_img_paths.keys()))
-        add_images_annotations(remote_hashes, progress_img.iter_done_report, progress_ann.iter_done_report)
+        if len(remote_hashes) > 0:
+            add_images_annotations(remote_hashes, progress_img.iters_done_report, progress_ann.iters_done_report)
 
         # upload new images + add annotations
         new_hashes = list(set(hash_to_img_paths.keys()) - set(remote_hashes))
         img_paths = [path for hash in new_hashes for path in hash_to_img_paths[hash]]
-        self.public_api.image.upload_batch(img_paths, progress_img.iter_done_report)
-        add_images_annotations(new_hashes, None, progress_ann.iter_done_report)
+        self.public_api.image.upload_batch_paths(img_paths, progress_img.iters_done_report)
+        if len(new_hashes) > 0:
+            add_images_annotations(new_hashes, None, progress_ann.iters_done_report)
 
     def upload_archive(self, task_id, dir_to_archive, archive_name):
         self.logger.info("PACK_TO_ARCHIVE ...")
 
@@ -72,6 +72,8 @@ def scan_deeper(paths):
         return obj_pathes_suffixes
 
     def get_storage_path(self, data_hash, suffix=''):
+        if suffix:
+            suffix = ".{}".format(suffix).replace("..", ".")
         st_hash = hashlib.sha256(data_hash.encode('utf-8')).hexdigest()
         st_path = osp.join(self._storage_root, st_hash[0:2], st_hash[2:5], st_hash + suffix)
         return st_path
 
@@ -8,7 +8,6 @@
 from worker.task_sly import TaskSly
 from worker.agent_utils import TaskDirCleaner
 from worker import constants
-from worker.utils import batched
 
 
 class TaskCleanNode(TaskSly):
@@ -40,7 +39,7 @@ def get_dataset_images_hashes(self, dataset_id):
         image_array = self.api.simple_request('GetDatasetImages', sly.api_proto.ImageArray, sly.api_proto.Id(id=dataset_id))
         img_hashes = []
 
-        for batch_img_ids in batched(list(image_array.images), constants.BATCH_SIZE_GET_IMAGES_INFO()):
+        for batch_img_ids in sly.batched(list(image_array.images), constants.BATCH_SIZE_GET_IMAGES_INFO()):
             images_info_proto = self.api.simple_request('GetImagesInfo', sly.api_proto.ImagesInfo,
                                                         sly.api_proto.ImageArray(images=batch_img_ids))
             img_hashes.extend([(info.hash, info.ext) for info in images_info_proto.infos])
 
@@ -3,7 +3,7 @@
 from enum import Enum
 from threading import Lock
 import json
-from docker.errors import ImageNotFound as DockerImageNotFound
+from docker.errors import DockerException, ImageNotFound as DockerImageNotFound
 
 import supervisely_lib as sly
 
@@ -112,7 +112,11 @@ def _docker_pull(self):
         self.logger.info('Docker image will be pulled', extra={'image_name': self.docker_image_name})
         progress_dummy = sly.Progress('Pulling image...', 1, ext_logger=self.logger)
         progress_dummy.iter_done_report()
-        pulled_img = self._docker_api.images.pull(self.docker_image_name)
+        try:
+            pulled_img = self._docker_api.images.pull(self.docker_image_name)
+        except DockerException:
+            raise DockerException('Unable to pull image: not enough free disk space or something wrong with DockerHub.'
+                                  ' Please, run the task again or email support.')
         self.logger.info('Docker image has been pulled', extra={'pulled': {'tags': pulled_img.tags, 'id': pulled_img.id}})
 
     def _docker_image_exists(self):
 
@@ -21,6 +21,7 @@ def init_api(self):
 
     def report_start(self):
         self.logger.info('TASK_START', extra={'event_type': sly.EventType.TASK_STARTED})
+        self.logger.info('TASK_MSG', extra=self.info)
 
     def task_main_func(self):
         raise NotImplementedError()
@@ -1 +1 @@
-base-jupyterlab:4.1.0
+base-jupyterlab:4.2.0
@@ -1 +1 @@
-base-py:4.1.0
+base-py:4.2.0
@@ -1 +1 @@
-base-pytorch:4.1.0
+base-pytorch:4.2.0
@@ -1 +1 @@
-base-pytorch-v04:4.1.0
+base-pytorch-v04:4.2.0
@@ -1 +1 @@
-base-tensorflow:4.1.0
+base-tensorflow:4.2.0
@@ -1 +1 @@
-jupyterlab:4.1.1
+jupyterlab:4.2.0
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`{`
`2`	`2`	`"title": "Agent",`
`3`		`- "description": "brief description",`
	`3`	`+ "description": "Supervisely Agent - is a small, but powerful task manager that allows to connect any computer (your office PC or cloud server) to the platform and use it for any computational tasks: neural network training/inference/deployment, training data preparation and many more.",`
`4`	`4`	`"type": "agent"`
`5`	`5`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-base-jupyterlab:4.1.0`
	`1`	`+base-jupyterlab:4.2.0`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-base-pytorch:4.1.0`
	`1`	`+base-pytorch:4.2.0`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-base-pytorch-v04:4.1.0`
	`1`	`+base-pytorch-v04:4.2.0`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-base-tensorflow:4.1.0`
	`1`	`+base-tensorflow:4.2.0`