feat: add gemma3 ollama model support

douglas-reid · douglas-reid · commit 5fd74d5ae7bb · 2025-10-08T15:38:28.000-07:00
Adds support for locally-running Gemma3 models exposed via Ollama.
This will enable developers to run fully-local agent workflows, using
the larger two Gemma3 models.

Functionality is achieved by extending LiteLlm models and using a
custom prompt template with some request/response pre/post processing.

As part of this work, the existing Gemma 3 support (via Gemini API) is
refactored to clarify functionality and support broader reuse across
both modes of interacting with the LLM.

A separate `hello_world_gemma3_ollama` example is provided to highlight
local usage.

NOTE: Adds an optional dependency on `instructor` for finding and parsing
JSON in Gemma3 response blocks.

Testing

Test Plan

- add and run integration and unit tests
- manual run of both `hello_world_gemma` and `hellow_world_gemma3_ollama` agents
- manual run of `multi_tool_agent` from quickstart using new `Gemma3Ollama` LLM.

Automated Tests

| Test Command | Results |
|--------------|---------|
| pytest ./tests/unittests | 2779 passed, 2387 warnings in 63.03s |
| pytest ./tests/unittests/models/test_gemma_llm.py | 15 passed in 4.06s |
| pytest ./tests/integration/models/test_gemma_llm.py | 1 passed in 33.22s |

Manual Tests

Log of running `multi_agent_tool` with a locally-built wheel:

```
[user]: what is the weather in new york?
15:12:24 - LiteLLM:INFO: utils.py:3373 -
LiteLLM completion() model= gemma3:12b; provider = ollama
15:12:28 - LiteLLM:INFO: utils.py:3373 -
LiteLLM completion() model= gemma3:12b; provider = ollama
[weather_time_agent]: The weather in New York is sunny with a temperature of 25 degrees Celsius (77 degrees Fahrenheit).

[user]: what is the time in new york?
15:12:43 - LiteLLM:INFO: utils.py:3373 -
LiteLLM completion() model= gemma3:12b; provider = ollama
15:12:48 - LiteLLM:INFO: utils.py:3373 -
LiteLLM completion() model= gemma3:12b; provider = ollama
[weather_time_agent]: The current time in New York is 2025-10-08 18:12:48 EDT-0400.
```

`DEBUG` log snippet of an agent run:

```
2025-10-08 15:32:33,322 - DEBUG - lite_llm.py:810 -
LLM Request:
-----------------------------------------------------------
System Instruction:

      You roll dice and answer questions about the outcome of the dice rolls.
...

You are an agent. Your internal name is "data_processing_agent".
...

-----------------------------------------------------------
Contents:
{"parts":[{"text":"Hi, introduce yourself."}],"role":"user"}
{"parts":[{"text":"I am data_processing_agent, a hello world agent that can roll a dice of 8 sides and check prime numbers."}],"role":"model"}
{"parts":[{"text":"Roll a die with 100 sides and check if it is prime"}],"role":"user"}
{"parts":[{"text":"{\"args\":{\"sides\":100},\"name\":\"roll_die\"}"}],"role":"model"}
{"parts":[{"text":"Invoking tool `roll_die` produced: `{\"result\": 26}`."}],"role":"user"}
{"parts":[{"text":"{\"args\":{\"nums\":[26]},\"name\":\"check_prime\"}"}],"role":"model"}
{"parts":[{"text":"Invoking tool `check_prime` produced: `{\"result\": \"No prime numbers found.\"}`."}],"role":"user"}
{"parts":[{"text":"Okay, the roll was 26, and it is not a prime number."}],"role":"model"}
{"parts":[{"text":"Roll it again."}],"role":"user"}
{"parts":[{"text":"{\"args\":{\"sides\":100},\"name\":\"roll_die\"}"}],"role":"model"}
{"parts":[{"text":"Invoking tool `roll_die` produced: `{\"result\": 69}`."}],"role":"user"}
{"parts":[{"text":"{\"args\":{\"nums\":[69]},\"name\":\"check_prime\"}"}],"role":"model"}
{"parts":[{"text":"Invoking tool `check_prime` produced: `{\"result\": \"No prime numbers found.\"}`."}],"role":"user"}
{"parts":[{"text":"The roll was 69, and it is not a prime number."}],"role":"model"}
{"parts":[{"text":"What numbers did I get?"}],"role":"user"}
-----------------------------------------------------------
Functions:

-----------------------------------------------------------
```
diff --git a/.gitignore b/.gitignore
@@ -32,6 +32,7 @@ venv.bak/
 # IDE
 .idea/
 .vscode/
+.zed/
 *.swp
 *.swo
 .DS_Store
diff --git a/contributing/samples/hello_world_gemma/agent.py b/contributing/samples/hello_world_gemma/agent.py
@@ -16,7 +16,7 @@
 import random
 
 from google.adk.agents.llm_agent import Agent
-from google.adk.models.gemma_llm import Gemma
+from google.adk.models.gemma_llm import Gemma3GeminiAPI
 from google.genai.types import GenerateContentConfig
 
 
@@ -61,7 +61,7 @@ async def check_prime(nums: list[int]) -> str:
 
 
 root_agent = Agent(
-    model=Gemma(model="gemma-3-27b-it"),
+    model=Gemma3GeminiAPI(model="gemma-3-27b-it"),
     name="data_processing_agent",
     description=(
         "hello world agent that can roll many-sided dice and check if numbers"
diff --git a/contributing/samples/hello_world_gemma3_ollama/__init__.py b/contributing/samples/hello_world_gemma3_ollama/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from . import agent
diff --git a/contributing/samples/hello_world_gemma3_ollama/agent.py b/contributing/samples/hello_world_gemma3_ollama/agent.py
@@ -0,0 +1,93 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import random
+
+from google.adk.agents.llm_agent import Agent
+from google.adk.models.gemma_llm import Gemma3Ollama
+
+litellm_logger = logging.getLogger("LiteLLM")
+litellm_logger.setLevel(logging.WARNING)
+
+
+def roll_die(sides: int) -> int:
+  """Roll a die and return the rolled result.
+
+  Args:
+    sides: The integer number of sides the die has.
+
+  Returns:
+    An integer of the result of rolling the die.
+  """
+  return random.randint(1, sides)
+
+
+async def check_prime(nums: list[int]) -> str:
+  """Check if a given list of numbers are prime.
+
+  Args:
+    nums: The list of numbers to check.
+
+  Returns:
+    A str indicating which number is prime.
+  """
+  primes = set()
+  for number in nums:
+    number = int(number)
+    if number <= 1:
+      continue
+    is_prime = True
+    for i in range(2, int(number**0.5) + 1):
+      if number % i == 0:
+        is_prime = False
+        break
+    if is_prime:
+      primes.add(number)
+  return (
+      "No prime numbers found."
+      if not primes
+      else f"{', '.join(str(num) for num in primes)} are prime numbers."
+  )
+
+
+root_agent = Agent(
+    model=Gemma3Ollama(model="ollama/gemma3:12b"),
+    name="data_processing_agent",
+    description=(
+        "hello world agent that can roll a dice of 8 sides and check prime"
+        " numbers."
+    ),
+    instruction="""
+      You roll dice and answer questions about the outcome of the dice rolls.
+      You can roll dice of different sizes.
+      You can use multiple tools in parallel by calling functions in parallel(in one request and in one round).
+      It is ok to discuss previous dice roles, and comment on the dice rolls.
+      When you are asked to roll a die, you must call the roll_die tool with the number of sides. Be sure to pass in an integer. Do not pass in a string.
+      You should never roll a die on your own.
+      When checking prime numbers, call the check_prime tool with a list of integers. Be sure to pass in a list of integers. You should never pass in a string.
+      You should not check prime numbers before calling the tool.
+      When you are asked to roll a die and check prime numbers, you should always make the following two function calls:
+      1. You should first call the roll_die tool to get a roll. Wait for the function response before calling the check_prime tool.
+      2. After you get the function response from roll_die tool, you should call the check_prime tool with the roll_die result.
+        2.1 If user asks you to check primes based on previous rolls, make sure you include the previous rolls in the list.
+      3. When you respond, you must include the roll_die result from step 1.
+      You should always perform the previous 3 steps when asking for a roll and checking prime numbers.
+      You should not rely on the previous history on prime results.
+    """,
+    tools=[
+        roll_die,
+        check_prime,
+    ],
+)
diff --git a/contributing/samples/hello_world_gemma3_ollama/main.py b/contributing/samples/hello_world_gemma3_ollama/main.py
@@ -0,0 +1,77 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import asyncio
+import time
+
+import agent
+from dotenv import load_dotenv
+from google.adk.artifacts.in_memory_artifact_service import InMemoryArtifactService
+from google.adk.cli.utils import logs
+from google.adk.runners import Runner
+from google.adk.sessions.in_memory_session_service import InMemorySessionService
+from google.adk.sessions.session import Session
+from google.genai import types
+
+load_dotenv(override=True)
+logs.log_to_tmp_folder()
+
+
+async def main():
+
+  app_name = 'my_app'
+  user_id_1 = 'user1'
+  session_service = InMemorySessionService()
+  artifact_service = InMemoryArtifactService()
+  runner = Runner(
+      app_name=app_name,
+      agent=agent.root_agent,
+      artifact_service=artifact_service,
+      session_service=session_service,
+  )
+  session_11 = await session_service.create_session(
+      app_name=app_name, user_id=user_id_1
+  )
+
+  async def run_prompt(session: Session, new_message: str):
+    content = types.Content(
+        role='user', parts=[types.Part.from_text(text=new_message)]
+    )
+    print('** User says:', content.model_dump(exclude_none=True))
+    async for event in runner.run_async(
+        user_id=user_id_1,
+        session_id=session.id,
+        new_message=content,
+    ):
+      if event.content.parts and event.content.parts[0].text:
+        print(f'** {event.author}: {event.content.parts[0].text}')
+
+  start_time = time.time()
+  print('Start time:', start_time)
+  print('------------------------------------')
+  await run_prompt(session_11, 'Hi, introduce yourself.')
+  await run_prompt(
+      session_11, 'Roll a die with 100 sides and check if it is prime'
+  )
+  await run_prompt(session_11, 'Roll it again.')
+  await run_prompt(session_11, 'What numbers did I get?')
+  end_time = time.time()
+  print('------------------------------------')
+  print('End time:', end_time)
+  print('Total time:', end_time - start_time)
+
+
+if __name__ == '__main__':
+  asyncio.run(main())
diff --git a/pyproject.toml b/pyproject.toml
@@ -150,6 +150,7 @@ extensions = [
   "llama-index-embeddings-google-genai>=0.3.0",# For files retrieval using LlamaIndex.
   "lxml>=5.3.0",                               # For load_web_page tool.
   "toolbox-core>=0.1.0",                       # For tools.toolbox_toolset.ToolboxToolset
+  "instructor>=1.11.3",                        # For Gemma3 LLMs (parsing function responses).
 ]
 
 otel-gcp = [
diff --git a/src/google/adk/models/__init__.py b/src/google/adk/models/__init__.py
@@ -15,7 +15,8 @@
 """Defines the interface to support a model."""
 
 from .base_llm import BaseLlm
-from .gemma_llm import Gemma
+from .gemma_llm import Gemma3GeminiAPI
+from .gemma_llm import Gemma3Ollama
 from .google_llm import Gemini
 from .llm_request import LlmRequest
 from .llm_response import LlmResponse
@@ -24,10 +25,12 @@
 __all__ = [
     'BaseLlm',
     'Gemini',
-    'Gemma',
+    'Gemma3GeminiAPI',
+    'Gemma3Ollama',
     'LLMRegistry',
 ]
 
 
 LLMRegistry.register(Gemini)
-LLMRegistry.register(Gemma)
+LLMRegistry.register(Gemma3GeminiAPI)
+LLMRegistry.register(Gemma3Ollama)
diff --git a/src/google/adk/models/gemma_llm.py b/src/google/adk/models/gemma_llm.py
diff --git a/tests/integration/models/test_gemma_llm.py b/tests/integration/models/test_gemma_llm.py
diff --git a/tests/unittests/models/test_gemma_llm.py b/tests/unittests/models/test_gemma_llm.py

Original file line number	Diff line number	Diff line change
`@@ -150,6 +150,7 @@ extensions = [`
`150`	`150`	`"llama-index-embeddings-google-genai>=0.3.0",# For files retrieval using LlamaIndex.`
`151`	`151`	`"lxml>=5.3.0", # For load_web_page tool.`
`152`	`152`	`"toolbox-core>=0.1.0", # For tools.toolbox_toolset.ToolboxToolset`
	`153`	`+ "instructor>=1.11.3", # For Gemma3 LLMs (parsing function responses).`
`153`	`154`	`]`
`154`	`155`
`155`	`156`	`otel-gcp = [`