@@ -46,8 +46,7 @@ Sync Usage
46
46
client = Client(endpoint="https://<MD_OCID>/predict")
47
47
response = client.chat(
48
48
messages=[{"role": "user", "content": "Tell me a joke."}],
49
- payload={"model": "odsc-llm"},
50
- stream=False,
49
+ payload={"model": "odsc-llm"}
51
50
)
52
51
print(response)
53
52
@@ -58,7 +57,7 @@ Sync Usage
58
57
from ads.aqua import Client
59
58
ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
60
59
61
- client = Client(endpoint="https://<MD_OCID>/predict ")
60
+ client = Client(endpoint="https://<MD_OCID>/predictWithResponseStream ")
62
61
response = client.chat(
63
62
messages=[{"role": "user", "content": "Tell me a joke."}],
64
63
payload={"model": "odsc-llm"},
@@ -97,8 +96,7 @@ The following examples demonstrate how to perform the same operations using the
97
96
client = AsyncClient(endpoint="https://<MD_OCID>/predict")
98
97
response = await client.generate(
99
98
prompt="Tell me a joke",
100
- payload={"model": "odsc-llm"},
101
- stream=False,
99
+ payload={"model": "odsc-llm"}
102
100
)
103
101
print(response)
104
102
@@ -109,7 +107,7 @@ The following examples demonstrate how to perform the same operations using the
109
107
from ads.aqua import AsyncClient
110
108
ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
111
109
112
- client = AsyncClient(endpoint="https://<MD_OCID>/predict ")
110
+ client = AsyncClient(endpoint="https://<MD_OCID>/predictWithResponseStream ")
113
111
async for chunk in await client.generate(
114
112
prompt="Tell me a joke",
115
113
payload={"model": "odsc-llm"},
@@ -225,11 +223,33 @@ The synchronous client, ``OpenAI``, extends the OpenAI client. If no HTTP client
225
223
" content" : " Tell me a joke." ,
226
224
}
227
225
],
228
- # stream=True, # enable for streaming
229
226
)
230
227
231
228
print (response)
232
229
230
+ **Streaming **
231
+ For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream ``.
232
+
233
+ .. code-block :: python
234
+
235
+ client = OpenAI(
236
+ base_url = " https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream/v1" ,
237
+ )
238
+
239
+ response = client.chat.completions.create(
240
+ model = " odsc-llm" ,
241
+ messages = [
242
+ {
243
+ " role" : " user" ,
244
+ " content" : " Tell me a joke." ,
245
+ }
246
+ ],
247
+ stream = True
248
+ )
249
+
250
+ for chunk in response:
251
+ print (chunk)
252
+
233
253
234
254
**Asynchronous Client **
235
255
@@ -246,7 +266,7 @@ The asynchronous client, ``AsynOpenAI``, extends the AsyncOpenAI client. If no a
246
266
247
267
async def test_async () -> None :
248
268
client_async = AsyncOpenAI(
249
- base_url = " https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predict /v1" ,
269
+ base_url = " https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream /v1" ,
250
270
)
251
271
response = await client_async.chat.completions.create(
252
272
model = " odsc-llm" ,
0 commit comments