feat: support new UsageMetadata fields

MarkDaoust · copybara-github · commit 122cdc86f1c7 · 2025-03-20T06:49:32.000-07:00
PiperOrigin-RevId: 738783088
diff --git a/google/genai/tests/chats/test_send_message.py b/google/genai/tests/chats/test_send_message.py
@@ -615,7 +615,8 @@ async def test_async_stream_config_override(client):
   ):
     request_config_text += chunk.text
   default_config_text = ''
-  async for chunk in await chat.send_message_stream('tell me a story in 100 words'):
+
+  async for chunk in await chat.send_message_stream('tell me family friendly story in 100 words'):
     default_config_text += chunk.text
 
   assert json.loads(request_config_text)
diff --git a/google/genai/tests/models/test_generate_content.py b/google/genai/tests/models/test_generate_content.py
@@ -13,8 +13,11 @@
 # limitations under the License.
 #
 
+import base64
 import enum
+import os
 
+import PIL.Image
 from pydantic import BaseModel, ValidationError, Field
 from typing import Literal, List, Optional, Union
 from datetime import datetime
@@ -28,6 +31,13 @@
 from .. import pytest_helper
 from enum import Enum
 
+IMAGE_PNG_FILE_PATH = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), '../data/google.png')
+)
+image_png = PIL.Image.open(IMAGE_PNG_FILE_PATH)
+
+with open(IMAGE_PNG_FILE_PATH, 'rb') as image_file:
+  image_bytes = image_file.read()
 
 safety_settings_with_method = [
     {
@@ -1668,7 +1678,12 @@ def test_catch_stack_trace_in_error_handling(client):
     #         }]
     #     }
     # }
-    assert e.details == {'code': 400, 'message': '', 'status': 'UNKNOWN'}
+    if 'error' in e.details:
+      details = e.details['error']
+    else:
+      details = e.details
+    assert details['code'] == 400
+    assert details['status'] == 'INVALID_ARGUMENT'
 
 
 def test_multiple_strings(client):
@@ -1715,8 +1730,8 @@ class SummaryResponses(BaseModel):
 
   assert 'Shakespeare' in response.text
   assert 'Hemingway' in response.text
-  assert 'Shakespeare' == response.parsed[0].person
-  assert 'Hemingway' == response.parsed[1].person
+  assert 'Shakespeare' in  response.parsed[0].person
+  assert 'Hemingway' in response.parsed[1].person
 
 
 def test_multiple_function_calls(client):
@@ -1780,3 +1795,31 @@ def test_multiple_function_calls(client):
   assert 'sunny' in response.text
   assert '100 degrees' in response.text
   assert '$100' in response.text
+
+def test_usage_metadata_part_types(client):
+  contents = [
+      'Hello world.',
+      types.Part.from_bytes(
+          data=image_bytes,
+          mime_type='image/png',
+      ),
+  ]
+
+  response = client.models.generate_content(
+      model='gemini-1.5-flash', contents=contents
+  )
+  usage_metadata = response.usage_metadata
+
+  assert usage_metadata.candidates_token_count
+  assert usage_metadata.candidates_tokens_details
+  modalities = sorted(
+      [d.modality.name for d in usage_metadata.candidates_tokens_details]
+  )
+  assert modalities == ['TEXT']
+
+  assert usage_metadata.prompt_token_count
+  assert usage_metadata.prompt_tokens_details
+  modalities = sorted(
+      [d.modality.name for d in usage_metadata.prompt_tokens_details]
+  )
+  assert modalities == ['IMAGE', 'TEXT']
diff --git a/google/genai/tests/models/test_generate_content_tools.py b/google/genai/tests/models/test_generate_content_tools.py
@@ -18,6 +18,7 @@
 import typing
 import pydantic
 import pytest
+
 from ... import _transformers as t
 from ... import errors
 from ... import types
@@ -676,6 +677,7 @@ def describe_cities(
       country: str,
       cities: typing.Optional[list[str]] = None,
   ) -> str:
+    "Given a country and an optional list of cities, describe the cities."
     if cities is None:
       return 'There are no cities to describe.'
     else:
@@ -715,7 +717,7 @@ def test_empty_tools(client):
 
 def test_with_1_empty_tool(client):
   # Bad request for empty tool.
-  with pytest_helper.exception_if_vertex(client, errors.ClientError):
+  with pytest.raises(errors.ClientError):
     client.models.generate_content(
         model='gemini-1.5-flash',
         contents='What is the price of GOOG?.',
@@ -824,17 +826,16 @@ def divide_integers(a: int, b: int) -> int:
 
 @pytest.mark.asyncio
 async def test_automatic_function_calling_async_with_exception(client):
-  def divide_integers(a: int, b: int) -> int:
+  def mystery_function(a: int, b: int) -> int:
     return a // b
 
   response = await client.aio.models.generate_content(
       model='gemini-1.5-flash',
       contents='what is the result of 1000/0?',
       config={'tools': [divide_integers]},
   )
-
-  assert 'undefined' in response.text
-
+  assert response.automatic_function_calling_history
+  assert response.automatic_function_calling_history[-1].parts[0].function_response.response['error']
 
 @pytest.mark.asyncio
 async def test_automatic_function_calling_async_float_without_decimal(client):
@@ -1058,8 +1059,10 @@ def test_code_execution_tool(client):
       ),
   )
 
-  assert 'def is_prime' in response.executable_code
-  assert 'primes=' in response.code_execution_result
+  assert response.executable_code
+  assert (
+      'prime' in response.code_execution_result.lower() or 
+      '5117' in response.code_execution_result)
 
 
 def test_afc_logs_to_logger_instance(client, caplog):
diff --git a/google/genai/types.py b/google/genai/types.py
@@ -185,6 +185,15 @@ class BlockedReason(_common.CaseInSensitiveEnum):
   PROHIBITED_CONTENT = 'PROHIBITED_CONTENT'
 
 
+class Modality(_common.CaseInSensitiveEnum):
+  """Server content modalities."""
+
+  MODALITY_UNSPECIFIED = 'MODALITY_UNSPECIFIED'
+  TEXT = 'TEXT'
+  IMAGE = 'IMAGE'
+  AUDIO = 'AUDIO'
+
+
 class DeploymentResourcesType(_common.CaseInSensitiveEnum):
   """"""
 
@@ -334,15 +343,6 @@ class FileSource(_common.CaseInSensitiveEnum):
   GENERATED = 'GENERATED'
 
 
-class Modality(_common.CaseInSensitiveEnum):
-  """Server content modalities."""
-
-  MODALITY_UNSPECIFIED = 'MODALITY_UNSPECIFIED'
-  TEXT = 'TEXT'
-  IMAGE = 'IMAGE'
-  AUDIO = 'AUDIO'
-
-
 class VideoMetadata(_common.BaseModel):
   """Metadata describes the input video content."""
 
@@ -2840,40 +2840,107 @@ class GenerateContentResponsePromptFeedbackDict(TypedDict, total=False):
 ]
 
 
+class ModalityTokenCount(_common.BaseModel):
+  """Represents token counting info for a single modality."""
+
+  modality: Optional[Modality] = Field(
+      default=None,
+      description="""The modality associated with this token count.""",
+  )
+  token_count: Optional[int] = Field(
+      default=None, description="""Number of tokens."""
+  )
+
+
+class ModalityTokenCountDict(TypedDict, total=False):
+  """Represents token counting info for a single modality."""
+
+  modality: Optional[Modality]
+  """The modality associated with this token count."""
+
+  token_count: Optional[int]
+  """Number of tokens."""
+
+
+ModalityTokenCountOrDict = Union[ModalityTokenCount, ModalityTokenCountDict]
+
+
 class GenerateContentResponseUsageMetadata(_common.BaseModel):
   """Usage metadata about response(s)."""
 
+  cache_tokens_details: Optional[list[ModalityTokenCount]] = Field(
+      default=None,
+      description="""Output only. List of modalities of the cached content in the request input.""",
+  )
   cached_content_token_count: Optional[int] = Field(
       default=None,
       description="""Output only. Number of tokens in the cached part in the input (the cached content).""",
   )
   candidates_token_count: Optional[int] = Field(
       default=None, description="""Number of tokens in the response(s)."""
   )
+  candidates_tokens_details: Optional[list[ModalityTokenCount]] = Field(
+      default=None,
+      description="""Output only. List of modalities that were returned in the response.""",
+  )
   prompt_token_count: Optional[int] = Field(
       default=None,
       description="""Number of tokens in the request. When `cached_content` is set, this is still the total effective prompt size meaning this includes the number of tokens in the cached content.""",
   )
+  prompt_tokens_details: Optional[list[ModalityTokenCount]] = Field(
+      default=None,
+      description="""Output only. List of modalities that were processed in the request input.""",
+  )
+  thoughts_token_count: Optional[int] = Field(
+      default=None,
+      description="""Output only. Number of tokens present in thoughts output.""",
+  )
+  tool_use_prompt_token_count: Optional[int] = Field(
+      default=None,
+      description="""Output only. Number of tokens present in tool-use prompt(s).""",
+  )
+  tool_use_prompt_tokens_details: Optional[list[ModalityTokenCount]] = Field(
+      default=None,
+      description="""Output only. List of modalities that were processed for tool-use request inputs.""",
+  )
   total_token_count: Optional[int] = Field(
       default=None,
-      description="""Total token count for prompt and response candidates.""",
+      description="""Total token count for prompt, response candidates, and tool-use prompts (if present).""",
   )
 
 
 class GenerateContentResponseUsageMetadataDict(TypedDict, total=False):
   """Usage metadata about response(s)."""
 
+  cache_tokens_details: Optional[list[ModalityTokenCountDict]]
+  """Output only. List of modalities of the cached content in the request input."""
+
   cached_content_token_count: Optional[int]
   """Output only. Number of tokens in the cached part in the input (the cached content)."""
 
   candidates_token_count: Optional[int]
   """Number of tokens in the response(s)."""
 
+  candidates_tokens_details: Optional[list[ModalityTokenCountDict]]
+  """Output only. List of modalities that were returned in the response."""
+
   prompt_token_count: Optional[int]
   """Number of tokens in the request. When `cached_content` is set, this is still the total effective prompt size meaning this includes the number of tokens in the cached content."""
 
+  prompt_tokens_details: Optional[list[ModalityTokenCountDict]]
+  """Output only. List of modalities that were processed in the request input."""
+
+  thoughts_token_count: Optional[int]
+  """Output only. Number of tokens present in thoughts output."""
+
+  tool_use_prompt_token_count: Optional[int]
+  """Output only. Number of tokens present in tool-use prompt(s)."""
+
+  tool_use_prompt_tokens_details: Optional[list[ModalityTokenCountDict]]
+  """Output only. List of modalities that were processed for tool-use request inputs."""
+
   total_token_count: Optional[int]
-  """Total token count for prompt and response candidates."""
+  """Total token count for prompt, response candidates, and tool-use prompts (if present)."""
 
 
 GenerateContentResponseUsageMetadataOrDict = Union[