Remove explicit tqdm dependency (#448)

jhamon · web-flow · commit b510e21b6cb7 · 2025-02-03T16:06:57.000-05:00
## Problem

We want to minimize the number of required dependencies we have, and
tqdm is non-essential. Moreover, common notebook environments like
Google Colab will already have tqdm loaded even if we do not declare
this an explicit dependency.

## Solution

Instead of having a specific dependency on tqdm, we want to detect and
use it if it is available in the environment. Otherwise just noop with a
stub implementation of our own.

## Type of Change

- [x] Breaking change (fix or feature that would cause existing
functionality to not work as expected)
- [x] This change requires a documentation update
diff --git a/pinecone/__init__.py b/pinecone/__init__.py
@@ -2,11 +2,6 @@
 .. include:: ../README.md
 """
 
-import warnings
-from tqdm import TqdmExperimentalWarning
-
-warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
-
 from .deprecated_plugins import check_for_deprecated_plugins
 from .deprecation_warnings import *
 from .config import *
diff --git a/pinecone/data/index.py b/pinecone/data/index.py
@@ -1,4 +1,4 @@
-from tqdm.autonotebook import tqdm
+from pinecone.utils.tqdm import tqdm
 
 import logging
 import json
diff --git a/pinecone/data/index_asyncio.py b/pinecone/data/index_asyncio.py
@@ -1,4 +1,5 @@
-from tqdm.autonotebook import tqdm
+from pinecone.utils.tqdm import tqdm
+
 
 import logging
 import asyncio
diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py
@@ -3,7 +3,7 @@
 
 from google.protobuf import json_format
 
-from tqdm.autonotebook import tqdm
+from pinecone.utils.tqdm import tqdm
 from concurrent.futures import as_completed, Future
 
 
@@ -207,7 +207,9 @@ def upsert_from_dataframe(
             results = [
                 async_result.result()
                 for async_result in tqdm(
-                    cast_results, disable=not show_progress, desc="collecting async responses"
+                    iterable=cast_results,
+                    disable=not show_progress,
+                    desc="collecting async responses",
                 )
             ]
 
diff --git a/pinecone/utils/tqdm.py b/pinecone/utils/tqdm.py
@@ -0,0 +1,28 @@
+try:
+    # Use the notebook-friendly auto selection if tqdm is installed.
+    from tqdm.auto import tqdm
+except ImportError:
+    # Fallback: define a dummy tqdm that supports the same interface.
+    class tqdm:  # type: ignore
+        def __init__(self, iterable=None, total=None, desc="", **kwargs):
+            self.iterable = iterable
+            self.total = total
+            self.desc = desc
+            # You can store additional kwargs if needed
+
+        def __iter__(self):
+            # Just iterate over the underlying iterable
+            for item in self.iterable:
+                yield item
+
+        def update(self, n=1):
+            # No-op: This stub doesn't track progress
+            pass
+
+        def __enter__(self):
+            # Allow use as a context manager
+            return self
+
+        def __exit__(self, exc_type, exc_value, traceback):
+            # Nothing to cleanup
+            pass
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,7 +38,6 @@ urllib3 = [
   { version = ">=1.26.0", python = ">=3.8,<3.12" },
   { version = ">=1.26.5", python = "^3.12" }
 ]
-tqdm = ">=4.64.1"
 # certifi does not follow semver. Should always be
 # on latest but setting a broad range to have maximum
 # compatibility with libraries that may pin version.
diff --git a/tests/integration/data/test_upsert_from_dataframe.py b/tests/integration/data/test_upsert_from_dataframe.py
@@ -0,0 +1,28 @@
+import pandas as pd
+from pinecone.data import _Index
+from ..helpers import embedding_values, random_string
+
+
+class TestUpsertFromDataFrame:
+    def test_upsert_from_dataframe(self, idx: _Index):
+        # Create sample data for testing.
+        data = {
+            "id": ["1", "2", "3"],
+            "values": [embedding_values(), embedding_values(), embedding_values()],
+            "sparse_values": [
+                {"indices": [1], "values": [0.234]},
+                {"indices": [2], "values": [0.432]},
+                {"indices": [3], "values": [0.543]},
+            ],
+            "metadata": [
+                {"source": "generated", "quality": "high"},
+                {"source": "generated", "quality": "medium"},
+                {"source": "generated", "quality": "low"},
+            ],
+        }
+
+        # Create the DataFrame
+        df = pd.DataFrame(data)
+
+        ns = random_string(10)
+        idx.upsert_from_dataframe(df=df, namespace=ns)

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from tqdm.autonotebook import tqdm`
	`1`	`+from pinecone.utils.tqdm import tqdm`
`2`	`2`
`3`	`3`	`import logging`
`4`	`4`	`import json`
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@`
`3`	`3`
`4`	`4`	`from google.protobuf import json_format`
`5`	`5`
`6`		`-from tqdm.autonotebook import tqdm`
	`6`	`+from pinecone.utils.tqdm import tqdm`
`7`	`7`	`from concurrent.futures import as_completed, Future`
`8`	`8`
`9`	`9`
`@@ -207,7 +207,9 @@ def upsert_from_dataframe(`
`207`	`207`	`results = [`
`208`	`208`	`async_result.result()`
`209`	`209`	`for async_result in tqdm(`
`210`		`- cast_results, disable=not show_progress, desc="collecting async responses"`
	`210`	`+ iterable=cast_results,`
	`211`	`+ disable=not show_progress,`
	`212`	`+ desc="collecting async responses",`
`211`	`213`	`)`
`212`	`214`	`]`
`213`	`215`
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,6 @@ urllib3 = [`
`38`	`38`	`{ version = ">=1.26.0", python = ">=3.8,<3.12" },`
`39`	`39`	`{ version = ">=1.26.5", python = "^3.12" }`
`40`	`40`	`]`
`41`		`-tqdm = ">=4.64.1"`
`42`	`41`	`# certifi does not follow semver. Should always be`
`43`	`42`	`# on latest but setting a broad range to have maximum`
`44`	`43`	`# compatibility with libraries that may pin version.`