|
51 | 51 | },
|
52 | 52 | {
|
53 | 53 | "cell_type": "code",
|
54 |
| - "execution_count": null, |
| 54 | + "execution_count": 2, |
55 | 55 | "metadata": {},
|
56 |
| - "outputs": [], |
| 56 | + "outputs": [ |
| 57 | + { |
| 58 | + "name": "stderr", |
| 59 | + "output_type": "stream", |
| 60 | + "text": [ |
| 61 | + "/Users/tyler.hutcherson/Library/Caches/pypoetry/virtualenvs/redisvl-VnTEShF2-py3.13/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", |
| 62 | + " from .autonotebook import tqdm as notebook_tqdm\n", |
| 63 | + "Compiling the model with `torch.compile` and using a `torch.mps` device is not supported. Falling back to non-compiled mode.\n" |
| 64 | + ] |
| 65 | + } |
| 66 | + ], |
57 | 67 | "source": [
|
58 | 68 | "# Initialize the vectorizer\n",
|
59 | 69 | "vectorizer = HFTextVectorizer(\n",
|
60 |
| - " model=\"sentence-transformers/all-mpnet-base-v2\",\n", |
| 70 | + " model=\"redis/langcache-embed-v1\",\n", |
61 | 71 | " cache_folder=os.getenv(\"SENTENCE_TRANSFORMERS_HOME\")\n",
|
62 | 72 | ")"
|
63 | 73 | ]
|
|
103 | 113 | },
|
104 | 114 | {
|
105 | 115 | "cell_type": "code",
|
106 |
| - "execution_count": 4, |
| 116 | + "execution_count": 5, |
107 | 117 | "metadata": {},
|
108 | 118 | "outputs": [
|
109 | 119 | {
|
110 | 120 | "name": "stdout",
|
111 | 121 | "output_type": "stream",
|
112 | 122 | "text": [
|
113 |
| - "Stored with key: embedcache:059d...\n" |
| 123 | + "Stored with key: embedcache:909f...\n" |
114 | 124 | ]
|
115 | 125 | }
|
116 | 126 | ],
|
117 | 127 | "source": [
|
118 | 128 | "# Text to embed\n",
|
119 | 129 | "text = \"What is machine learning?\"\n",
|
120 |
| - "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n", |
| 130 | + "model_name = \"redis/langcache-embed-v1\"\n", |
121 | 131 | "\n",
|
122 | 132 | "# Generate the embedding\n",
|
123 | 133 | "embedding = vectorizer.embed(text)\n",
|
|
147 | 157 | },
|
148 | 158 | {
|
149 | 159 | "cell_type": "code",
|
150 |
| - "execution_count": 5, |
| 160 | + "execution_count": 6, |
151 | 161 | "metadata": {},
|
152 | 162 | "outputs": [
|
153 | 163 | {
|
154 | 164 | "name": "stdout",
|
155 | 165 | "output_type": "stream",
|
156 | 166 | "text": [
|
157 | 167 | "Found in cache: What is machine learning?\n",
|
158 |
| - "Model: sentence-transformers/all-mpnet-base-v2\n", |
| 168 | + "Model: redis/langcache-embed-v1\n", |
159 | 169 | "Metadata: {'category': 'ai', 'source': 'user_query'}\n",
|
160 | 170 | "Embedding shape: (768,)\n"
|
161 | 171 | ]
|
|
184 | 194 | },
|
185 | 195 | {
|
186 | 196 | "cell_type": "code",
|
187 |
| - "execution_count": 6, |
| 197 | + "execution_count": 7, |
188 | 198 | "metadata": {},
|
189 | 199 | "outputs": [
|
190 | 200 | {
|
|
218 | 228 | },
|
219 | 229 | {
|
220 | 230 | "cell_type": "code",
|
221 |
| - "execution_count": 7, |
| 231 | + "execution_count": 8, |
222 | 232 | "metadata": {},
|
223 | 233 | "outputs": [
|
224 | 234 | {
|
|
251 | 261 | },
|
252 | 262 | {
|
253 | 263 | "cell_type": "code",
|
254 |
| - "execution_count": 8, |
| 264 | + "execution_count": 9, |
255 | 265 | "metadata": {},
|
256 | 266 | "outputs": [
|
257 | 267 | {
|
258 | 268 | "name": "stdout",
|
259 | 269 | "output_type": "stream",
|
260 | 270 | "text": [
|
261 |
| - "Stored with key: embedcache:059d...\n", |
| 271 | + "Stored with key: embedcache:909f...\n", |
262 | 272 | "Exists by key: True\n",
|
263 | 273 | "Retrieved by key: What is machine learning?\n"
|
264 | 274 | ]
|
|
297 | 307 | },
|
298 | 308 | {
|
299 | 309 | "cell_type": "code",
|
300 |
| - "execution_count": 9, |
| 310 | + "execution_count": 10, |
301 | 311 | "metadata": {},
|
302 | 312 | "outputs": [
|
303 | 313 | {
|
|
382 | 392 | },
|
383 | 393 | {
|
384 | 394 | "cell_type": "code",
|
385 |
| - "execution_count": 10, |
| 395 | + "execution_count": 11, |
386 | 396 | "metadata": {},
|
387 | 397 | "outputs": [
|
388 | 398 | {
|
|
430 | 440 | },
|
431 | 441 | {
|
432 | 442 | "cell_type": "code",
|
433 |
| - "execution_count": 11, |
| 443 | + "execution_count": 12, |
434 | 444 | "metadata": {},
|
435 | 445 | "outputs": [
|
436 | 446 | {
|
|
484 | 494 | },
|
485 | 495 | {
|
486 | 496 | "cell_type": "code",
|
487 |
| - "execution_count": 12, |
| 497 | + "execution_count": 13, |
488 | 498 | "metadata": {},
|
489 | 499 | "outputs": [
|
490 | 500 | {
|
|
533 | 543 | },
|
534 | 544 | {
|
535 | 545 | "cell_type": "code",
|
536 |
| - "execution_count": 13, |
| 546 | + "execution_count": 14, |
537 | 547 | "metadata": {},
|
538 | 548 | "outputs": [
|
539 | 549 | {
|
540 | 550 | "name": "stdout",
|
541 | 551 | "output_type": "stream",
|
542 | 552 | "text": [
|
543 |
| - "Computing embedding for: What is artificial intelligence?\n", |
544 |
| - "Computing embedding for: How does machine learning work?\n", |
545 |
| - "Found in cache: What is artificial intelligence?\n", |
546 |
| - "Computing embedding for: What are neural networks?\n", |
547 |
| - "Found in cache: How does machine learning work?\n", |
548 | 553 | "\n",
|
549 | 554 | "Statistics:\n",
|
550 | 555 | "Total queries: 5\n",
|
|
562 | 567 | " ttl=3600 # 1 hour TTL\n",
|
563 | 568 | ")\n",
|
564 | 569 | "\n",
|
565 |
| - "# Function to get embedding with caching\n", |
566 |
| - "def get_cached_embedding(text, model_name):\n", |
567 |
| - " # Check if it's in the cache first\n", |
568 |
| - " if cached_result := example_cache.get(text=text, model_name=model_name):\n", |
569 |
| - " print(f\"Found in cache: {text}\")\n", |
570 |
| - " return cached_result[\"embedding\"]\n", |
571 |
| - " \n", |
572 |
| - " # Not in cache, compute the embedding\n", |
573 |
| - " print(f\"Computing embedding for: {text}\")\n", |
574 |
| - " embedding = vectorizer.embed(text)\n", |
575 |
| - " \n", |
576 |
| - " # Store in cache\n", |
577 |
| - " example_cache.set(\n", |
578 |
| - " text=text,\n", |
579 |
| - " model_name=model_name,\n", |
580 |
| - " embedding=embedding,\n", |
581 |
| - " )\n", |
582 |
| - " \n", |
583 |
| - " return embedding\n", |
| 570 | + "vectorizer = HFTextVectorizer(\n", |
| 571 | + " model=model_name,\n", |
| 572 | + " cache=example_cache,\n", |
| 573 | + " cache_folder=os.getenv(\"SENTENCE_TRANSFORMERS_HOME\")\n", |
| 574 | + ")\n", |
584 | 575 | "\n",
|
585 | 576 | "# Simulate processing a stream of queries\n",
|
586 | 577 | "queries = [\n",
|
|
604 | 595 | " cache_hits += 1\n",
|
605 | 596 | " \n",
|
606 | 597 | " # Get embedding (will compute or use cache)\n",
|
607 |
| - " embedding = get_cached_embedding(query, model_name)\n", |
| 598 | + " embedding = vectorizer.embed(query)\n", |
608 | 599 | "\n",
|
609 | 600 | "# Report statistics\n",
|
610 | 601 | "cache_misses = total_queries - cache_hits\n",
|
|
632 | 623 | },
|
633 | 624 | {
|
634 | 625 | "cell_type": "code",
|
635 |
| - "execution_count": 14, |
| 626 | + "execution_count": 15, |
636 | 627 | "metadata": {},
|
637 | 628 | "outputs": [
|
638 | 629 | {
|
639 | 630 | "name": "stdout",
|
640 | 631 | "output_type": "stream",
|
641 | 632 | "text": [
|
642 | 633 | "Benchmarking without caching:\n",
|
643 |
| - "Time taken without caching: 0.0940 seconds\n", |
644 |
| - "Average time per embedding: 0.0094 seconds\n", |
| 634 | + "Time taken without caching: 0.4735 seconds\n", |
| 635 | + "Average time per embedding: 0.0474 seconds\n", |
645 | 636 | "\n",
|
646 | 637 | "Benchmarking with caching:\n",
|
647 |
| - "Time taken with caching: 0.0237 seconds\n", |
648 |
| - "Average time per embedding: 0.0024 seconds\n", |
| 638 | + "Time taken with caching: 0.0663 seconds\n", |
| 639 | + "Average time per embedding: 0.0066 seconds\n", |
649 | 640 | "\n",
|
650 | 641 | "Performance comparison:\n",
|
651 |
| - "Speedup with caching: 3.96x faster\n", |
652 |
| - "Time saved: 0.0703 seconds (74.8%)\n", |
653 |
| - "Latency reduction: 0.0070 seconds per query\n" |
| 642 | + "Speedup with caching: 7.14x faster\n", |
| 643 | + "Time saved: 0.4073 seconds (86.0%)\n", |
| 644 | + "Latency reduction: 0.0407 seconds per query\n" |
654 | 645 | ]
|
655 | 646 | }
|
656 | 647 | ],
|
657 | 648 | "source": [
|
658 | 649 | "# Text to use for benchmarking\n",
|
659 | 650 | "benchmark_text = \"This is a benchmark text to measure the performance of embedding caching.\"\n",
|
660 |
| - "benchmark_model = \"sentence-transformers/all-mpnet-base-v2\"\n", |
661 | 651 | "\n",
|
662 | 652 | "# Create a fresh cache for benchmarking\n",
|
663 | 653 | "benchmark_cache = EmbeddingsCache(\n",
|
664 | 654 | " name=\"benchmark_cache\",\n",
|
665 | 655 | " redis_url=\"redis://localhost:6379\",\n",
|
666 | 656 | " ttl=3600 # 1 hour TTL\n",
|
667 | 657 | ")\n",
|
668 |
| - "\n", |
669 |
| - "# Function to get embeddings without caching\n", |
670 |
| - "def get_embedding_without_cache(text, model_name):\n", |
671 |
| - " return vectorizer.embed(text)\n", |
672 |
| - "\n", |
673 |
| - "# Function to get embeddings with caching\n", |
674 |
| - "def get_embedding_with_cache(text, model_name):\n", |
675 |
| - " if cached_result := benchmark_cache.get(text=text, model_name=model_name):\n", |
676 |
| - " return cached_result[\"embedding\"]\n", |
677 |
| - " \n", |
678 |
| - " embedding = vectorizer.embed(text)\n", |
679 |
| - " benchmark_cache.set(\n", |
680 |
| - " text=text,\n", |
681 |
| - " model_name=model_name,\n", |
682 |
| - " embedding=embedding\n", |
683 |
| - " )\n", |
684 |
| - " return embedding\n", |
| 658 | + "vectorizer.cache = benchmark_cache\n", |
685 | 659 | "\n",
|
686 | 660 | "# Number of iterations for the benchmark\n",
|
687 | 661 | "n_iterations = 10\n",
|
688 | 662 | "\n",
|
689 | 663 | "# Benchmark without caching\n",
|
690 | 664 | "print(\"Benchmarking without caching:\")\n",
|
691 | 665 | "start_time = time.time()\n",
|
692 |
| - "get_embedding_without_cache(benchmark_text, benchmark_model)\n", |
| 666 | + "for _ in range(n_iterations):\n", |
| 667 | + " embedding = vectorizer.embed(text, skip_cache=True)\n", |
693 | 668 | "no_cache_time = time.time() - start_time\n",
|
694 | 669 | "print(f\"Time taken without caching: {no_cache_time:.4f} seconds\")\n",
|
695 | 670 | "print(f\"Average time per embedding: {no_cache_time/n_iterations:.4f} seconds\")\n",
|
696 | 671 | "\n",
|
697 | 672 | "# Benchmark with caching\n",
|
698 | 673 | "print(\"\\nBenchmarking with caching:\")\n",
|
699 | 674 | "start_time = time.time()\n",
|
700 |
| - "get_embedding_with_cache(benchmark_text, benchmark_model)\n", |
| 675 | + "for _ in range(n_iterations):\n", |
| 676 | + " embedding = vectorizer.embed(text)\n", |
701 | 677 | "cache_time = time.time() - start_time\n",
|
702 | 678 | "print(f\"Time taken with caching: {cache_time:.4f} seconds\")\n",
|
703 | 679 | "print(f\"Average time per embedding: {cache_time/n_iterations:.4f} seconds\")\n",
|
|
785 | 761 | "name": "python",
|
786 | 762 | "nbconvert_exporter": "python",
|
787 | 763 | "pygments_lexer": "ipython3",
|
788 |
| - "version": "3.10.12" |
| 764 | + "version": "3.13.2" |
789 | 765 | }
|
790 | 766 | },
|
791 | 767 | "nbformat": 4,
|
|
0 commit comments