Skip to content

Commit 689b210

Browse files
authored
Add Xai websearch cost (#16001)
* Add xai websearch cost * Add test for websearch cost in xai * remove not required changes
1 parent 297c2a0 commit 689b210

File tree

4 files changed

+144
-3
lines changed

4 files changed

+144
-3
lines changed

litellm/llms/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ def get_cost_for_web_search_request(
4343
# Perplexity handles search costs internally in its own cost calculator
4444
# Return 0.0 to indicate costs are already accounted for
4545
return 0.0
46+
elif custom_llm_provider == "xai":
47+
from .xai.cost_calculator import cost_per_web_search_request
48+
return cost_per_web_search_request(usage=usage, model_info=model_info)
4649
else:
4750
return None
4851

litellm/llms/xai/chat/transformation.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
)
1111
from litellm.secret_managers.main import get_secret_str
1212
from litellm.types.llms.openai import AllMessageValues
13-
from litellm.types.utils import Choices, ModelResponse
13+
from litellm.types.utils import Choices, ModelResponse, Usage, PromptTokensDetailsWrapper
1414

1515
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
1616

@@ -170,6 +170,8 @@ def transform_response(
170170
171171
XAI API returns empty string for finish_reason when using tools,
172172
so we need to fix this after the standard OpenAI transformation.
173+
174+
Also handles X.AI web search usage tracking by extracting num_sources_used.
173175
"""
174176

175177
# First, let the parent class handle the standard transformation
@@ -193,4 +195,34 @@ def transform_response(
193195
if isinstance(choice, Choices):
194196
self._fix_choice_finish_reason_for_tool_calls(choice)
195197

198+
# Handle X.AI web search usage tracking
199+
try:
200+
raw_response_json = raw_response.json()
201+
self._enhance_usage_with_xai_web_search_fields(response, raw_response_json)
202+
except Exception as e:
203+
verbose_logger.debug(f"Error extracting X.AI web search usage: {e}")
196204
return response
205+
206+
def _enhance_usage_with_xai_web_search_fields(
207+
self, model_response: ModelResponse, raw_response_json: dict
208+
) -> None:
209+
"""
210+
Extract num_sources_used from X.AI response and map it to web_search_requests.
211+
"""
212+
if not hasattr(model_response, "usage") or model_response.usage is None:
213+
return
214+
215+
usage: Usage = model_response.usage
216+
num_sources_used = None
217+
response_usage = raw_response_json.get("usage", {})
218+
if isinstance(response_usage, dict) and "num_sources_used" in response_usage:
219+
num_sources_used = response_usage.get("num_sources_used")
220+
221+
# Map num_sources_used to web_search_requests for cost detection
222+
if num_sources_used is not None and num_sources_used > 0:
223+
if usage.prompt_tokens_details is None:
224+
usage.prompt_tokens_details = PromptTokensDetailsWrapper()
225+
226+
usage.prompt_tokens_details.web_search_requests = int(num_sources_used)
227+
setattr(usage, "num_sources_used", int(num_sources_used))
228+
verbose_logger.debug(f"X.AI web search sources used: {num_sources_used}")

litellm/llms/xai/cost_calculator.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@
44
- Handles XAI-specific reasoning token billing (billed as part of completion tokens)
55
"""
66

7-
from typing import Tuple
7+
from typing import TYPE_CHECKING, Tuple
88

99
from litellm.types.utils import Usage
1010
from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
1111

12+
if TYPE_CHECKING:
13+
from litellm.types.utils import ModelInfo
14+
1215

1316
def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
1417
"""
@@ -46,3 +49,35 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
4649
)
4750

4851
return prompt_cost, completion_cost
52+
53+
54+
def cost_per_web_search_request(usage: "Usage", model_info: "ModelInfo") -> float:
55+
"""
56+
Calculate the cost of web search requests for X.AI models.
57+
58+
X.AI Live Search costs $25 per 1,000 sources used.
59+
Each source costs $0.025.
60+
61+
The number of sources is stored in prompt_tokens_details.web_search_requests
62+
by the transformation layer to be compatible with the existing detection system.
63+
"""
64+
# Cost per source used: $25 per 1,000 sources = $0.025 per source
65+
cost_per_source = 25.0 / 1000.0 # $0.025
66+
67+
num_sources_used = 0
68+
69+
if (
70+
hasattr(usage, "prompt_tokens_details")
71+
and usage.prompt_tokens_details is not None
72+
and hasattr(usage.prompt_tokens_details, "web_search_requests")
73+
and usage.prompt_tokens_details.web_search_requests is not None
74+
):
75+
num_sources_used = int(usage.prompt_tokens_details.web_search_requests)
76+
77+
# Fallback: try to get from num_sources_used if set directly
78+
elif hasattr(usage, "num_sources_used") and usage.num_sources_used is not None:
79+
num_sources_used = int(usage.num_sources_used)
80+
81+
total_cost = cost_per_source * num_sources_used
82+
83+
return total_cost

tests/test_litellm/llms/xai/test_xai_cost_calculator.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@
99
import litellm
1010
from litellm.types.utils import (
1111
CompletionTokensDetailsWrapper,
12+
PromptTokensDetailsWrapper,
1213
Usage,
1314
)
1415

1516
sys.path.insert(
1617
0, os.path.abspath("../../..")
1718
) # Adds the parent directory to the system path
1819

19-
from litellm.llms.xai.cost_calculator import cost_per_token
20+
from litellm.llms.xai.cost_calculator import cost_per_token, cost_per_web_search_request
2021

2122

2223
class TestXAICostCalculator:
@@ -320,3 +321,73 @@ def test_tiered_pricing_model_without_tiered_pricing(self):
320321

321322
assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
322323
assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
324+
325+
def test_web_search_cost_calculation(self):
326+
"""Test web search cost calculation for X.AI models."""
327+
# Test with web_search_requests in prompt_tokens_details (primary path)
328+
usage = Usage(
329+
prompt_tokens=100,
330+
completion_tokens=50,
331+
total_tokens=150,
332+
prompt_tokens_details=PromptTokensDetailsWrapper(
333+
text_tokens=100,
334+
web_search_requests=3, # 3 sources used
335+
)
336+
)
337+
338+
web_search_cost = cost_per_web_search_request(usage=usage, model_info={})
339+
340+
# Expected cost: 3 sources * $0.025 per source = $0.075
341+
expected_cost = 3 * (25.0 / 1000.0) # 3 * $0.025
342+
343+
assert math.isclose(web_search_cost, expected_cost, rel_tol=1e-10)
344+
assert math.isclose(web_search_cost, 0.075, rel_tol=1e-10)
345+
346+
def test_web_search_cost_fallback_calculation(self):
347+
"""Test web search cost calculation using fallback num_sources_used."""
348+
# Test fallback: num_sources_used on usage object
349+
usage = Usage(
350+
prompt_tokens=100,
351+
completion_tokens=50,
352+
total_tokens=150,
353+
)
354+
# Manually set num_sources_used (as done by transformation layer)
355+
setattr(usage, "num_sources_used", 5)
356+
357+
web_search_cost = cost_per_web_search_request(usage=usage, model_info={})
358+
359+
# Expected cost: 5 sources * $0.025 per source = $0.125
360+
expected_cost = 5 * (25.0 / 1000.0) # 5 * $0.025
361+
362+
assert math.isclose(web_search_cost, expected_cost, rel_tol=1e-10)
363+
assert math.isclose(web_search_cost, 0.125, rel_tol=1e-10)
364+
365+
def test_web_search_no_sources_used(self):
366+
"""Test web search cost calculation when no sources are used."""
367+
usage = Usage(
368+
prompt_tokens=100,
369+
completion_tokens=50,
370+
total_tokens=150,
371+
prompt_tokens_details=PromptTokensDetailsWrapper(
372+
text_tokens=100,
373+
web_search_requests=0, # No web search
374+
)
375+
)
376+
377+
web_search_cost = cost_per_web_search_request(usage=usage, model_info={})
378+
379+
# Expected cost: 0 sources * $0.025 per source = $0.0
380+
assert web_search_cost == 0.0
381+
382+
def test_web_search_cost_without_prompt_tokens_details(self):
383+
"""Test web search cost calculation when prompt_tokens_details is None."""
384+
usage = Usage(
385+
prompt_tokens=100,
386+
completion_tokens=50,
387+
total_tokens=150,
388+
)
389+
390+
web_search_cost = cost_per_web_search_request(usage=usage, model_info={})
391+
392+
# Expected cost: No web search data = $0.0
393+
assert web_search_cost == 0.0

0 commit comments

Comments
 (0)