Python SDK inference

OctoAI Python SDK at a glance

The OctoAI Python SDK is intended to help you use OctoAI endpoints. At its simplest form, it allows you to run inferences against an endpoint by providing a dictionary with the necessary inputs.

Python

1 import time
2 from octoai.client import OctoAI
3 
4 client = OctoAI()
5 
6 # It allows you to run inferences
7 output = client.infer(endpoint_url="your-endpoint-url", inputs={"keyword": "dictionary"})
8 
9 # It also allows for inference streams for LLMs
10 for token in client.infer_stream("your-endpoint-url", inputs={"keyword": "dictionary"}):
11     if token.get("object") == "chat.completion.chunk":
12         # Do stuff with the token
13         pass
14 
15 # And for server-side asynchronous inferences
16 future = client.infer_async("your-endpoint-url", {"keyword": "dictionary"})
17 # Typically, you'd collect additional futures then poll for status, but for the sake of example...
18 while not client.is_future_ready(future):
19     time.sleep(1)
20 # Once the results are ready, you can use them in the same way as you
21 # typically do for demo endpoints
22 result = client.get_future_result(future)
23 
24 # And includes healthChecks
25 if client.health_check("your-healthcheck-url") == 200:
26 	# Run some inferences
27     pass

1	import time
2	from octoai.client import OctoAI
3
4	client = OctoAI()
5
6	# It allows you to run inferences
7	output = client.infer(endpoint_url="your-endpoint-url", inputs={"keyword": "dictionary"})
8
9	# It also allows for inference streams for LLMs
10	for token in client.infer_stream("your-endpoint-url", inputs={"keyword": "dictionary"}):
11	if token.get("object") == "chat.completion.chunk":
12	# Do stuff with the token
13	pass
14
15	# And for server-side asynchronous inferences
16	future = client.infer_async("your-endpoint-url", {"keyword": "dictionary"})
17	# Typically, you'd collect additional futures then poll for status, but for the sake of example...
18	while not client.is_future_ready(future):
19	time.sleep(1)
20	# Once the results are ready, you can use them in the same way as you
21	# typically do for demo endpoints
22	result = client.get_future_result(future)
23
24	# And includes healthChecks
25	if client.health_check("your-healthcheck-url") == 200:
26	# Run some inferences
27	pass