Skip to content

Commit f857ce7

Browse files
authored
[AQUA] Update AQUA client documentation to Support predictWithResponseStream Endpoint (#1191)
1 parent 894627e commit f857ce7

File tree

2 files changed

+39
-12
lines changed

2 files changed

+39
-12
lines changed

docs/source/user_guide/large_language_model/aqua_client.rst

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ Sync Usage
4646
client = Client(endpoint="https://<MD_OCID>/predict")
4747
response = client.chat(
4848
messages=[{"role": "user", "content": "Tell me a joke."}],
49-
payload={"model": "odsc-llm"},
50-
stream=False,
49+
payload={"model": "odsc-llm"}
5150
)
5251
print(response)
5352
@@ -58,7 +57,7 @@ Sync Usage
5857
from ads.aqua import Client
5958
ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
6059
61-
client = Client(endpoint="https://<MD_OCID>/predict")
60+
client = Client(endpoint="https://<MD_OCID>/predictWithResponseStream")
6261
response = client.chat(
6362
messages=[{"role": "user", "content": "Tell me a joke."}],
6463
payload={"model": "odsc-llm"},
@@ -97,8 +96,7 @@ The following examples demonstrate how to perform the same operations using the
9796
client = AsyncClient(endpoint="https://<MD_OCID>/predict")
9897
response = await client.generate(
9998
prompt="Tell me a joke",
100-
payload={"model": "odsc-llm"},
101-
stream=False,
99+
payload={"model": "odsc-llm"}
102100
)
103101
print(response)
104102
@@ -109,7 +107,7 @@ The following examples demonstrate how to perform the same operations using the
109107
from ads.aqua import AsyncClient
110108
ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
111109
112-
client = AsyncClient(endpoint="https://<MD_OCID>/predict")
110+
client = AsyncClient(endpoint="https://<MD_OCID>/predictWithResponseStream")
113111
async for chunk in await client.generate(
114112
prompt="Tell me a joke",
115113
payload={"model": "odsc-llm"},
@@ -225,11 +223,33 @@ The synchronous client, ``OpenAI``, extends the OpenAI client. If no HTTP client
225223
"content": "Tell me a joke.",
226224
}
227225
],
228-
# stream=True, # enable for streaming
229226
)
230227
231228
print(response)
232229
230+
**Streaming**
231+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
232+
233+
.. code-block:: python
234+
235+
client = OpenAI(
236+
base_url="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream/v1",
237+
)
238+
239+
response = client.chat.completions.create(
240+
model="odsc-llm",
241+
messages=[
242+
{
243+
"role": "user",
244+
"content": "Tell me a joke.",
245+
}
246+
],
247+
stream=True
248+
)
249+
250+
for chunk in response:
251+
print(chunk)
252+
233253
234254
**Asynchronous Client**
235255

@@ -246,7 +266,7 @@ The asynchronous client, ``AsynOpenAI``, extends the AsyncOpenAI client. If no a
246266
247267
async def test_async() -> None:
248268
client_async = AsyncOpenAI(
249-
base_url="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predict/v1",
269+
base_url="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream/v1",
250270
)
251271
response = await client_async.chat.completions.create(
252272
model="odsc-llm",

docs/source/user_guide/large_language_model/llamaindex_integration.rst

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ Streaming
8282

8383
Using ``stream_complete`` endpoint
8484
-------------------------------
85+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
8586

8687
.. code-block:: python3
8788
@@ -92,7 +93,7 @@ Using ``stream_complete`` endpoint
9293
9394
llm = OCIDataScience(
9495
model="odsc-llm",
95-
endpoint="https://<MD_OCID>/predict",
96+
endpoint="https://<MD_OCID>/predictWithResponseStream",
9697
)
9798
9899
for chunk in llm.stream_complete("Tell me a joke"):
@@ -101,6 +102,8 @@ Using ``stream_complete`` endpoint
101102
Using ``stream_chat`` endpoint
102103
----------------------------
103104

105+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
106+
104107
.. code-block:: python3
105108
106109
import ads
@@ -111,7 +114,7 @@ Using ``stream_chat`` endpoint
111114
112115
llm = OCIDataScience(
113116
model="odsc-llm",
114-
endpoint="https://<MD_OCID>/predict",
117+
endpoint="https://<MD_OCID>/predictWithResponseStream",
115118
)
116119
response = llm.stream_chat(
117120
[
@@ -176,6 +179,8 @@ Async Streaming
176179
Using ``astream_complete`` endpoint
177180
---------------------------------
178181

182+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
183+
179184
.. code-block:: python3
180185
181186
import ads
@@ -185,7 +190,7 @@ Using ``astream_complete`` endpoint
185190
186191
llm = OCIDataScience(
187192
model="odsc-llm",
188-
endpoint="https://<MD_OCID>/predict",
193+
endpoint="https://<MD_OCID>/predictWithResponseStream",
189194
)
190195
191196
async for chunk in await llm.astream_complete("Tell me a joke"):
@@ -194,6 +199,8 @@ Using ``astream_complete`` endpoint
194199
Using ``astream_chat`` endpoint
195200
-----------------------------
196201

202+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
203+
197204
.. code-block:: python3
198205
199206
import ads
@@ -204,7 +211,7 @@ Using ``astream_chat`` endpoint
204211
205212
llm = OCIDataScience(
206213
model="odsc-llm",
207-
endpoint="https://<MD_OCID>/predict",
214+
endpoint="https://<MD_OCID>/predictWithResponseStream",
208215
)
209216
response = await llm.stream_chat(
210217
[

0 commit comments

Comments
 (0)