Skip to content

Streaming

ZenMux allows any model to return generated results incrementally in a streaming fashion, rather than returning the full response at once. Streaming output lets users see the first token from the model immediately, reducing wait time. This can significantly improve user experience, especially for real-time conversations and long-form generation.

You can enable streaming output by setting the stream parameter to true in your request. Below are two example approaches:

python
from openai import OpenAI

client = OpenAI(
    base_url="https://zenmux.ai/api/v1",
    api_key="<your ZENMUX_API_KEY>", 
)

stream = client.chat.completions.create(
    model="openai/gpt-5",
    messages=[
        {
            "role": "user",
            "content": "What is the meaning of life?" 
        }
    ],
    # Enable streaming mode by setting stream=True
    stream=True, 
)

# When streaming mode (stream=True) is enabled, the response shape changes.
# You need to iterate over the stream and consume each individual chunk
for chunk in stream: 
	delta = chunk.choices[0].delta # <-- Use the delta field
 
	if delta.content:
		print(delta.content, end="")
ts
import OpenAI from "openai";

const openai = new OpenAI({
  baseURL: "https://zenmux.ai/api/v1",
  apiKey: "<your ZENMUX_API_KEY>", 
});

async function main() {
  const stream = await openai.chat.completions.create({
    model: "openai/gpt-5",
    messages: [
      {
        role: "user",
        content: "What is the meaning of life?",
      },
    ],
    // Enable streaming mode by setting stream: true
    stream: true, 
  });

  // When streaming mode (stream: true) is enabled, the response shape changes.
  // You need to iterate over the stream and consume each individual chunk
  for await (chunk of stream) { 
    delta = chunk.choices[0].delta // <-- Use the delta field
    
    if (delta.content) {
        console.log(delta.content)
    }
  }
}

main();

Method 2: Call the ZenMux API Directly

python
import httpx
import json

async def stream_openai_chat_completion():
    api_key = "<your ZENMUX_API_KEY>"
    headers = {
        "Authorization": f"Bearer {api_key}",
    }
    payload = {
        "model": "openai/gpt-5",
        "messages": [
            {
                "role": "user",
                "content": "What is the meaning of life?"
            }
        ],
        "stream": True
    }

    async with httpx.AsyncClient() as client:
        async with client.stream(method="POST", url="https://zenmux.ai/api/v1/chat/completions", headers=headers, json=payload, timeout=None) as response:
            response.raise_for_status()

            async for chunk in response.aiter_bytes():
                decoded_chunk = chunk.decode('utf-8')
                print(decoded_chunk)

if __name__ == "__main__":
    import asyncio
    asyncio.run(stream_openai_chat_completion())
typescript
fetch("https://zenmux.ai/api/v1/chat/completions", {
  method: "POST",
  headers: {
    Authorization: "Bearer <your ZENMUX_API_KEY>", 
    "Content-Type": "application/json",
  },
  body: JSON.stringify({
    model: "openai/gpt-5", 
    messages: [
      {
        role: "user",
        content: "What is the meaning of life?",
      },
    ],
    stream: true
  }),
})
  .then(async (response) => {
    const textDecoder = new TextDecoder();
    for await (const chunk of response.body) {
      const textChunk = textDecoder.decode(chunk);
      console.log(textChunk)
    }
  })
bash

curl "https://zenmux.ai/api/v1/chat/completions" \
  -H "Content-Type: application/json" \ 
  -H "Authorization: Bearer $ZENMUX_API_KEY" \
  -d '{  
    "model": "openai/gpt-5", 
    "messages": [ 
      { 
        "role": "user", 
        "content": "What is the meaning of life?" 
      } 
    ], 
    "stream": true
  }'