Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions index_advisor_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from pymongo import MongoClient
import random
import string
import numpy as np

random.seed(42)
np.random.seed(42)

client = MongoClient("")
db = client["index_advisor_test"]

# ============================================================
# Equality filter — missing index
# ============================================================
# Description: equality filter on "user_id" without index.
coll = db["a"]
if coll.estimated_document_count() == 0:
docs = [{"user_id": random.randint(1, 5000), "name": ''.join(random.choices(string.ascii_lowercase, k=5))} for _ in range(10000)]
coll.insert_many(docs)
print("Equality filter collection created with sample data.")


# ============================================================
# Range query — missing index
# ============================================================
# Description: range filter on "age" without index.
coll = db["b"]
if coll.estimated_document_count() == 0:
docs = [{"user_id": i, "age": random.randint(18, 70)} for i in range(20000)]
coll.insert_many(docs)
print("Range query collection created with sample data.")


# ============================================================
# Compound filter — missing or non-matching index
# ============================================================
# Description: compound filter on "country" and "city" without proper index.
coll = db["c"]
if coll.estimated_document_count() == 0:
coll.create_index([("country", 1)]) # Index on only "country"
countries = ["US", "CN", "JP", "UK", "FR"]
cities = ["NY", "SF", "BJ", "TK", "LD", "PA"]
docs = [{"country": random.choice(countries), "city": random.choice(cities), "pop": random.randint(1000, 1000000)} for _ in range(30000)]
coll.insert_many(docs)
print("Compound filter collection created with sample data.")


# ============================================================
# Sort query — missing composite index
# ============================================================
# Description: find with sort on "score" and "timestamp" without composite index.
coll = db["d"]
if coll.estimated_document_count() == 0:
docs = [{"score": random.randint(0, 100), "timestamp": random.randint(1600000000, 1700000000)} for _ in range(20000)]
coll.insert_many(docs)
print("Sort query collection created with sample data.")


# ============================================================
# Aggregation with sort — missing composite index
# ============================================================
# Description: aggregation with $match + $sort on "category" and "price".
coll = db["e"]
if coll.estimated_document_count() == 0:
coll.create_index([("price", 1)]) # Index on only "price"
categories = ["A", "B", "C", "D"]
docs = [{"category": random.choice(categories), "price": random.randint(10, 1000)} for _ in range(20000)]
coll.insert_many(docs)
print("Aggregation with sort collection created with sample data.")


# ============================================================
# Low selectivity field
# ============================================================
# Description: equality filter on low-cardinality field.
coll = db["f"]
if coll.estimated_document_count() == 0:
coll.create_index([("gender", 1)]) # Index on "gender"
docs = [{"user_id": i, "gender": "F"} for i in range(50000)]
coll.insert_many(docs)
print("Low selectivity collection created with sample data.")


# ============================================================
# Small collection
# ============================================================
# Description: small dataset where index is unnecessary.
coll = db["g"]
if coll.estimated_document_count() == 0:
coll.create_index([("flag", 1)]) # Index on "flag"
docs = [{"flag": random.choice([True, False]), "value": random.randint(1, 100)} for _ in range(50)]
coll.insert_many(docs)
print("Small collection created with sample data.")


print("All test data prepared successfully.")
72 changes: 72 additions & 0 deletions l10n/bundle.l10n.json

Large diffs are not rendered by default.

20 changes: 18 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 55 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "vscode-documentdb",
"version": "0.5.1-alpha",
"version": "0.5.1-preview-test",
"aiKey": "0c6ae279ed8443289764825290e4f9e2-1a736e7c-1324-4338-be46-fc2a58ae4d14-7255",
"publisher": "ms-azuretools",
"displayName": "DocumentDB for VS Code",
Expand Down Expand Up @@ -83,7 +83,8 @@
"l10n:check": "node l10n/scripts/check.l10n.mjs",
"l10n:auto": "cross-env AZURE_TRANSLATOR_KEY= cross-env AZURE_TRANSLATOR_REGION= npx @vscode/l10n-dev generate-azure --outDir ./l10n ./l10n/bundle.l10n.json ./package.nls.json",
"l10n:export": "npx @vscode/l10n-dev generate-xlf ./l10n/bundle.l10n.json --outFile vscode-documentdb.xlf",
"l10n:import": "npx @vscode/l10n-dev import-xlf ./translations.xlf"
"l10n:import": "npx @vscode/l10n-dev import-xlf ./translations.xlf",
"test:index-advisor": "node test/indexAdvisor/runTests.js"
},
"devDependencies": {
"@eslint/js": "~9.31.0",
Expand Down Expand Up @@ -399,6 +400,13 @@
"title": "New DocumentDB Scrapbook",
"icon": "$(new-file)"
},
{
"//": "Testing: Run Index Advisor Tests",
"category": "DocumentDB",
"command": "vscode-documentdb.command.testing.runIndexAdvisorTests",
"title": "Run Index Advisor Tests",
"icon": "$(beaker)"
},
{
"//": "Delete Collection",
"category": "DocumentDB",
Expand Down Expand Up @@ -900,6 +908,51 @@
"type": "number",
"description": "The batch size to be used when querying working with the shell.",
"default": 50
},
"documentDB.llm.findQueryPromptPath": {
"type": [
"string",
"null"
],
"description": "Path to a custom prompt template file for find query optimization. Leave empty to use the built-in template.",
"default": null
},
"documentDB.llm.aggregateQueryPromptPath": {
"type": [
"string",
"null"
],
"description": "Path to a custom prompt template file for aggregate query optimization. Leave empty to use the built-in template.",
"default": null
},
"documentDB.llm.countQueryPromptPath": {
"type": [
"string",
"null"
],
"description": "Path to a custom prompt template file for count query optimization. Leave empty to use the built-in template.",
"default": null
},
"documentDB.llm.crossCollectionQueryPromptPath": {
"type": [
"string",
"null"
],
"description": "Path to a custom prompt template file for cross-collection query generation. Leave empty to use the built-in template.",
"default": null
},
"documentDB.llm.singleCollectionQueryPromptPath": {
"type": [
"string",
"null"
],
"description": "Path to a custom prompt template file for single collection query generation. Leave empty to use the built-in template.",
"default": null
},
"documentDB.llm.enablePromptCache": {
"type": "boolean",
"description": "Enable caching of prompt templates. Disable this when debugging/testing custom prompts to force reload on every use.",
"default": true
}
}
}
Expand Down
Loading
Loading