datatorch · ninthreezy · Jan 23, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 21, 2025
diff --git a/.DS_Store b/.DS_Store
diff --git a/datatorch/api/api.py b/datatorch/api/api.py
@@ -163,11 +163,55 @@ def upload_to_default_filesource(
         )
         print(r.text + " " + endpoint)
 
+    def upload_to_filesource(
+        self,
+        project: Project,
+        file: IO,
+        storageId: str = None,
+        storageFolderName=None,
+        dataset: Dataset = None,
+        **kwargs,
+    ):
+        """
+        Uploads a file to the provided `storage_id` if available;
+        otherwise, retrieves the default storage ID (DataTorch Storage) from the project.
+        """
+        # Retrieve default storage_id if not explicitly provided
+        if storageId is None:
+            storageId = project.storage_link_default().id
+
+        storageFolderName = "" if storageFolderName is None else storageFolderName
+        datasetId = "" if dataset is None else dataset.id
+        importFiles = "false" if dataset is None else "true"
+
+        # Construct the endpoint
+        endpoint = f"{self.api_url}/file/v1/upload/{storageId}?path={storageFolderName}&import={importFiles}&datasetId={datasetId}"
+
+        # Determine MIME type
+        if magic:
+            tell = file.tell()
+            mimetype = magic.from_buffer(file.read(1024), mime=True)
+            file.seek(tell)
+        else:
+            mimetype = mimetypes.guess_type(file.name)[0]
+
+        # Make the POST request
+        r = requests.post(
+            endpoint,
+            files={"file": (os.path.basename(file.name), file, mimetype)},
+            headers={self.token_header: self._api_token},
+            stream=True,
+        )
+
+        # Raise an error for failed requests
+        r.raise_for_status()
+
     def glob_upload_folder(
         self,
         project: Project,
         uploadingFromGlob: str,
         storageFolderName: str,
+        storageId: str = None,
         folderSplit=1000,
         dataset: Dataset = None,
         recursive=False,
@@ -192,9 +236,10 @@ def glob_upload_folder(
                 folderIndex += 1
                 uploadFolderName = storageFolderName + "_" + str(folderIndex)
             file = open(file, "rb")
-            self.upload_to_default_filesource(
+            self.upload_to_filesource(
                 project=project,
                 file=file,
+                storageId=storageId,
                 storageFolderName=uploadFolderName,
                 dataset=dataset,
             )

diff --git a/datatorch/cli/groups.py b/datatorch/cli/groups.py
@@ -12,6 +12,7 @@
 from .pipeline import pipeline
 from .action import action
 from .import_cmds import import_cmd
+from .upload import upload
 
 
 @click.group()
@@ -29,3 +30,5 @@ def main():
 main.add_command(agent)
 main.add_command(action)
 main.add_command(import_cmd)
+
+main.add_command(upload)
diff --git a/datatorch/cli/upload/__init__.py b/datatorch/cli/upload/__init__.py
@@ -0,0 +1,10 @@
+import click
+from .folder import folder
+
+
+@click.group(help="Commands for managing uploads.")
+def upload():
+    pass
+
+
+upload.add_command(folder)
diff --git a/datatorch/cli/upload/folder.py b/datatorch/cli/upload/folder.py
@@ -0,0 +1,131 @@
+import os
+import click
+from datatorch.core.settings import UserSettings
+from datatorch.api.api import ApiClient
+from datatorch.api.entity.project import Project
+from ..spinner import Spinner
+
+
+@click.command("folder")
+@click.argument("folder_path", type=click.Path(exists=True, file_okay=False))
+@click.argument("project_id", type=str)
+def folder(folder_path, project_id):
+    """Bulk upload files to a specified project."""
+
+    # Get the list of files to upload
+    files = [f for f in os.listdir(folder_path)
+             if os.path.isfile(os.path.join(folder_path, f))]
+    total_files = len(files)
+
+    if total_files == 0:
+        click.echo("No files found in the specified folder.")
+        return
+
+    # Load user settings
+    user_settings = UserSettings()
+    api_key = user_settings.api_key
+    api_url = user_settings.api_url
+
+    if not api_key or not api_url:
+        click.echo("You are not logged in. "
+                   "Please log in using the `login` command.")
+        return
+
+    # Initialize the API client
+    client = ApiClient(api_url=api_url, api_key=api_key)
+
+    # Validate the endpoint
+    if not client.validate_endpoint():
+        click.echo("Error: Invalid API endpoint.")
+        return
+    click.echo("Valid API endpoint verified.")
+
+    # Retrieve the project by ID
+    try:
+        project = client.project(project_id)
+        click.echo(f"Retrieved project: {project.name}")
+    except Exception as e:
+        click.echo(f"Error: Unable to retrieve "
+                   f"project with ID '{project_id}'. {e}")
+        return
+
+    # Display available dataset
+    try:
+        datasets = project.datasets()
+        if datasets:
+            click.echo("\nAvailable Dataset:")
+            for idx, dataset in enumerate(datasets, start=1):
+                click.echo(f"{idx}. {dataset.name} (ID: {dataset.id})")
+
+            # Prompt user to select a dataset
+            choice = click.prompt(
+                "Enter the number of the dataset",
+                type=int,
+                default=1,
+            )
+            if 1 <= choice <= len(datasets):
+                selected_dataset = datasets[choice - 1]
+                click.echo(f"Selected Dataset: {selected_dataset.name} (ID: {selected_dataset.id}")
+            else:
+                click.echo(f"Invalid choice. Please select a number between 1 and {len(datasets)}")
+        else:
+            # No datasets found, as if user want to continue with global upload
+            continue_upload = click.confirm("No datasets found for this project"
+                                            "Do you want to continue with global upload?", default=False)
+            if not continue_upload:
+                click.echo("Ending...")
+                return
+    except Exception as e:
+        click.echo(f"Error retrieving data set: {e}")
+        return
+
+    # Display available storage links and prompt user selection
+    try:
+        storage_links = project.storage_links()
+        if not storage_links:
+            click.echo("No storage available for this project.")
+            return
+
+        click.echo("\nAvailable Storages:")
+        for idx, storage_link in enumerate(storage_links):
+            click.echo(f"{idx + 1}. {storage_link.name} "
+                       f"(ID: {storage_link.id})")
+
+        # Prompt user to select a storage link
+        choice = click.prompt(
+            "Enter the number of the storage to use",
+            type=int,
+            default=1,
+        )
+        if 1 <= choice <= len(storage_links):
+            selected_storage_link = storage_links[choice - 1]
+        else:
+            click.echo(f"Invalid choice. Please select a number between 1 and {len(storage_links)}.")
+            return
+
+        click.echo(f"Selected Storage: {selected_storage_link.name} "
+                   f"(ID: {selected_storage_link.id})")
+    except Exception as e:
+        click.echo(f"Error retrieving storage: {e}")
+        return
+
+    # Initialize the spinner
+    spinner = Spinner(f"Uploading files (0/{total_files})")
+
+    # Upload files to the selected storage and dataset using their IDs
+    try:
+        for idx, file_name in enumerate(files, start=1):
+            file_path = os.path.join(folder_path, file_name)
+            spinner.set_text(f"Uploading file ({idx}/{total_files})")
+            with open(file_path, "rb") as file:
+                client.upload_to_filesource(
+                    project=project,
+                    file=file,
+                    storageId=selected_storage_link.id,
+                    storageFolderName=None,
+                    dataset=selected_dataset,
+                )
+        spinner.done(f"Uploaded all {total_files} files successfully!")
+    except Exception as e:
+        spinner.done(f"Error during upload: {e}")
+        return
diff --git a/examples/upload_files.py b/examples/upload_files.py
@@ -0,0 +1,28 @@
+import os
+import datatorch as dt
+
+api = dt.api.ApiClient('your-api-key')
+proj = api.project('user-name/project-name')
+dset = proj.dataset('data-set-name')
+
+folder_to_upload = 'uploadme'
+upload_to_storage_id = 'your-storage-id'
+
+# Get all the file names in the folder
+files = [f for f in os.listdir(folder_to_upload)
+         if os.path.isfile(os.path.join(folder_to_upload, f))]
+
+# Upload files to the selected storage and dataset using their IDs
+try:
+    for file_name in files:
+        file_path = os.path.join(folder_to_upload, file_name)
+        with open(file_path, "rb") as file:
+            api.upload_to_filesource(
+                project=proj,
+                file=file,
+                storageId=upload_to_storage_id,
+                storageFolderName=None,
+                dataset=dset,
+            )
+except Exception as e:
+    print(f"Error Uploading: {e}")
diff --git a/examples/uploadme/1copy.jpg b/examples/uploadme/1copy.jpg
diff --git a/examples/uploadme/2copy.jpg b/examples/uploadme/2copy.jpg
diff --git a/examples/uploadme/3copy.jpg b/examples/uploadme/3copy.jpg
diff --git a/setup.py b/setup.py
@@ -1,8 +1,8 @@
 from setuptools import setup, find_packages
 import sys
 
-# Ensure the Python version is 3.13 or higher
-assert sys.version_info >= (3, 13, 0), "DataTorch requires Python 3.13+"
+# Ensure the Python version is 3.12 or higher
+assert sys.version_info >= (3, 12, 0), "DataTorch requires Python 3.12+"
 
 with open("README.md", "r", encoding="utf-8") as fp:
     long_description = fp.read()
@@ -33,7 +33,7 @@
 
 setup(
     name="datatorch",
-    version="0.4.8.4",
+    version="0.4.8.5",
     description="A CLI and library for interacting with DataTorch.",
     author="DataTorch",
     author_email="[email protected]",
@@ -45,7 +45,7 @@
     long_description=long_description,
     long_description_content_type="text/markdown",
     install_requires=requirements,
-    python_requires=">=3.13",
+    python_requires=">=3.12",
     license="MIT license",
     zip_safe=False,
     include_package_data=True,
@@ -55,7 +55,7 @@
         "Framework :: Pytest",
         "Intended Audience :: Developers",
         "Natural Language :: English",
-        "Programming Language :: Python :: 3.13",
+        "Programming Language :: Python :: 3.12",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
         "Topic :: Software Development :: Libraries :: Python Modules",
     ],