Skip to content

多模态

ZenMux 支持多模态输入,可通过多种 API 协议使用:

  • OpenAI Chat Completion API:使用 image_urlfile(PDF/视频)和 input_audio(音频)内容类型
  • OpenAI Responses API:使用 input_imageinput_file(仅 PDF)内容类型
  • Anthropic Messages API:使用 imagedocumentaudiovideo 内容类型,支持 base64 和 URL
  • Google Vertex AI API:使用 Part 对象传递图片、文件、音频和视频

支持的输入类型:

  • 文本输入
  • 图片输入
  • PDF 输入
  • 音频输入
  • 视频输入

OpenAI Chat Completion API

图片输入

使用图片链接

python
from openai import OpenAI

client = OpenAI(
    base_url="https://zenmux.ai/api/v1", 
    api_key="<你的 ZENMUX_API_KEY>", 
)

response = client.chat.completions.create(
    model="google/gemini-2.5-pro",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "请分析一下图片的内容"
                },
                {
                    "type": "image_url", 
                    "image_url": { 
                        "url": "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png"
                    }
                }
            ]
        }
    ]
)

print(response.choices[0].message.content)
ts
import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://zenmux.ai/api/v1", 
  apiKey: "<你的 ZENMUX_API_KEY>", 
});

const response = await client.chat.completions.create({
  model: "google/gemini-2.5-pro",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "请分析一下图片的内容",
        },
        {
          type: "image_url", 
          image_url: {
            url: "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png", 
          },
        },
      ],
    },
  ],
});

console.log(response.choices[0].message.content);

使用图片 Base64 编码

python
import base64
from openai import OpenAI

def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

client = OpenAI(
    base_url="https://zenmux.ai/api/v1", 
    api_key="<你的 ZENMUX_API_KEY>", 
)

image_path = "path/to/your/image.jpg"
base64_image = encode_image_to_base64(image_path)
data_url = f"data:image/jpeg;base64,{base64_image}"

response = client.chat.completions.create(
    model="google/gemini-2.5-pro",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "请分析一下图片的内容"
                },
                {
                    "type": "image_url", 
                    "image_url": { 
                        "url": data_url 
                    }
                }
            ]
        }
    ]
)

print(response.choices[0].message.content)
ts
import OpenAI from "openai";
import * as fs from "fs";

async function encodeImageToBase64(imagePath: string): Promise<string> {
  const imageBuffer = await fs.promises.readFile(imagePath);
  const base64Image = imageBuffer.toString("base64");
  return `data:image/jpeg;base64,${base64Image}`;
}

const client = new OpenAI({
  baseURL: "https://zenmux.ai/api/v1", 
  apiKey: "<你的 ZENMUX_API_KEY>", 
});

const imagePath = "path/to/your/image.jpg";
const base64Image = await encodeImageToBase64(imagePath);

const response = await client.chat.completions.create({
  model: "google/gemini-2.5-pro",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "请分析一下图片的内容",
        },
        {
          type: "image_url", 
          image_url: {
            url: base64Image, 
          },
        },
      ],
    },
  ],
});

console.log(response.choices[0].message.content);

PDF 输入

使用 PDF 链接

python
from openai import OpenAI

client = OpenAI(
    base_url="https://zenmux.ai/api/v1", 
    api_key="<你的 ZENMUX_API_KEY>", 
)

response = client.chat.completions.create(
    model="google/gemini-2.5-pro",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "请分析一下文件的主要内容"
                },
                {
                    "type": "file", 
                    "file": { 
                        "filename": "test.pdf", 
                        "file_data": "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/06/uyZbd8m/xiaoxingxingzhaopengyou.pdf"
                    }
                }
            ]
        }
    ]
)

print(response.choices[0].message.content)
typescript
import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://zenmux.ai/api/v1", 
  apiKey: "<你的 ZENMUX_API_KEY>", 
});

const response = await client.chat.completions.create({
  model: "google/gemini-2.5-pro",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "请分析一下文件的主要内容",
        },
        {
          type: "file", 
          file: {
            filename: "test.pdf", 
            file_data:
              "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/06/uyZbd8m/xiaoxingxingzhaopengyou.pdf", 
          },
        },
      ],
    },
  ],
});

console.log(response.choices[0].message.content);

使用 PDF Base64 编码

python
import base64
from openai import OpenAI

def encode_pdf_to_base64(pdf_path):
    with open(pdf_path, "rb") as pdf_file:
        return base64.b64encode(pdf_file.read()).decode("utf-8")

client = OpenAI(
    base_url="https://zenmux.ai/api/v1", 
    api_key="<你的 ZENMUX_API_KEY>", 
)

pdf_path = "path/to/your/test.pdf"
base64_pdf = encode_pdf_to_base64(pdf_path)
data_url = f"data:application/pdf;base64,{base64_pdf}"

response = client.chat.completions.create(
    model="google/gemini-2.5-pro",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "请分析一下文件的主要内容"
                },
                {
                    "type": "file", 
                    "file": { 
                        "filename": "test.pdf", 
                        "file_data": data_url 
                    }
                }
            ]
        }
    ]
)

print(response.choices[0].message.content)
typescript
import OpenAI from "openai";
import * as fs from "fs";

async function encodePDFToBase64(pdfPath: string): Promise<string> {
  const pdfBuffer = await fs.promises.readFile(pdfPath);
  const base64PDF = pdfBuffer.toString("base64");
  return `data:application/pdf;base64,${base64PDF}`;
}

const client = new OpenAI({
  baseURL: "https://zenmux.ai/api/v1", 
  apiKey: "<你的 ZENMUX_API_KEY>", 
});

const pdfPath = "path/to/your/test.pdf";
const base64PDF = await encodePDFToBase64(pdfPath);

const response = await client.chat.completions.create({
  model: "google/gemini-2.5-pro",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "请分析一下文件的主要内容",
        },
        {
          type: "file", 
          file: {
            filename: "test.pdf", 
            file_data: base64PDF, 
          },
        },
      ],
    },
  ],
});

console.log(response.choices[0].message.content);

音频输入

使用 input_audio 类型传递音频文件,需要使用 Base64 编码。

python
import base64
from openai import OpenAI

def encode_audio_to_base64(audio_path):
    with open(audio_path, "rb") as audio_file:
        return base64.b64encode(audio_file.read()).decode("utf-8")

client = OpenAI(
    base_url="https://zenmux.ai/api/v1", 
    api_key="<你的 ZENMUX_API_KEY>", 
)

audio_path = "path/to/your/audio.mp3"
base64_audio = encode_audio_to_base64(audio_path)

response = client.chat.completions.create(
    model="google/gemini-2.5-pro",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "请描述这段音频的内容"
                },
                {
                    "type": "input_audio", 
                    "input_audio": { 
                        "data": base64_audio, 
                        "format": "mp3"
                    }
                }
            ]
        }
    ]
)

print(response.choices[0].message.content)
typescript
import OpenAI from "openai";
import * as fs from "fs";

async function encodeAudioToBase64(audioPath: string): Promise<string> {
  const audioBuffer = await fs.promises.readFile(audioPath);
  return audioBuffer.toString("base64");
}

const client = new OpenAI({
  baseURL: "https://zenmux.ai/api/v1", 
  apiKey: "<你的 ZENMUX_API_KEY>", 
});

const audioPath = "path/to/your/audio.mp3";
const base64Audio = await encodeAudioToBase64(audioPath);

const response = await client.chat.completions.create({
  model: "google/gemini-2.5-pro",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "请描述这段音频的内容",
        },
        {
          type: "input_audio", 
          input_audio: {
            data: base64Audio, 
            format: "mp3", 
          },
        },
      ],
    },
  ],
});

console.log(response.choices[0].message.content);

视频输入

使用 file 类型传递视频文件,支持 URL 和 Base64 编码两种方式。

使用视频链接

python
from openai import OpenAI

client = OpenAI(
    base_url="https://zenmux.ai/api/v1", 
    api_key="<你的 ZENMUX_API_KEY>", 
)

response = client.chat.completions.create(
    model="google/gemini-2.5-pro",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "请描述这段视频的内容"
                },
                {
                    "type": "file", 
                    "file": { 
                        "filename": "video.mp4", 
                        "file_data": "https://example.com/video.mp4"
                    }
                }
            ]
        }
    ]
)

print(response.choices[0].message.content)
typescript
import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://zenmux.ai/api/v1", 
  apiKey: "<你的 ZENMUX_API_KEY>", 
});

const response = await client.chat.completions.create({
  model: "google/gemini-2.5-pro",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "请描述这段视频的内容",
        },
        {
          type: "file", 
          file: {
            filename: "video.mp4", 
            file_data: "https://example.com/video.mp4", 
          },
        },
      ],
    },
  ],
});

console.log(response.choices[0].message.content);

使用视频 Base64 编码

python
import base64
from openai import OpenAI

def encode_video_to_base64(video_path):
    with open(video_path, "rb") as video_file:
        return base64.b64encode(video_file.read()).decode("utf-8")

client = OpenAI(
    base_url="https://zenmux.ai/api/v1", 
    api_key="<你的 ZENMUX_API_KEY>", 
)

video_path = "path/to/your/video.mp4"
base64_video = encode_video_to_base64(video_path)
data_url = f"data:video/mp4;base64,{base64_video}"

response = client.chat.completions.create(
    model="google/gemini-2.5-pro",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "请描述这段视频的内容"
                },
                {
                    "type": "file", 
                    "file": { 
                        "filename": "video.mp4", 
                        "file_data": data_url 
                    }
                }
            ]
        }
    ]
)

print(response.choices[0].message.content)
typescript
import OpenAI from "openai";
import * as fs from "fs";

async function encodeVideoToBase64(videoPath: string): Promise<string> {
  const videoBuffer = await fs.promises.readFile(videoPath);
  const base64Video = videoBuffer.toString("base64");
  return `data:video/mp4;base64,${base64Video}`;
}

const client = new OpenAI({
  baseURL: "https://zenmux.ai/api/v1", 
  apiKey: "<你的 ZENMUX_API_KEY>", 
});

const videoPath = "path/to/your/video.mp4";
const base64Video = await encodeVideoToBase64(videoPath);

const response = await client.chat.completions.create({
  model: "google/gemini-2.5-pro",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "请描述这段视频的内容",
        },
        {
          type: "file", 
          file: {
            filename: "video.mp4", 
            file_data: base64Video, 
          },
        },
      ],
    },
  ],
});

console.log(response.choices[0].message.content);

OpenAI Responses API

Responses API 使用 input_imageinput_file 内容类型处理多模态输入。

注意

当前 Responses API 仅支持图片和 PDF 文件输入,不支持音频和视频输入。如需处理音频或视频,请使用 Chat Completion API 或 Vertex AI API。

图片输入

python
from openai import OpenAI

client = OpenAI(
    base_url="https://zenmux.ai/api/v1",
    api_key="<你的 ZENMUX_API_KEY>", 
)

response = client.responses.create(
    model="openai/gpt-5", 
    input=[
        {
            "role": "user",
            "content": [
                {
                    "type": "input_text",
                    "text": "请分析一下图片的内容"
                },
                {
                    "type": "input_image", 
                    "image_url": "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png"
                }
            ]
        }
    ]
)

# 提取回答
for item in response.output:
    if item.type == "message":
        for content in item.content:
            if content.type == "output_text":
                print(content.text)
typescript
import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://zenmux.ai/api/v1",
  apiKey: "<你的 ZENMUX_API_KEY>", 
});

const response = await client.responses.create({
  model: "openai/gpt-5", 
  input: [
    {
      role: "user",
      content: [
        {
          type: "input_text",
          text: "请分析一下图片的内容",
        },
        {
          type: "input_image", 
          image_url:
            "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png", 
        },
      ],
    },
  ],
});

// 提取回答
for (const item of response.output) {
  if (item.type === "message") {
    for (const content of item.content) {
      if (content.type === "output_text") {
        console.log(content.text);
      }
    }
  }
}
bash
curl https://zenmux.ai/api/v1/responses \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $ZENMUX_API_KEY" \
  -d '{
    "model": "openai/gpt-5",
    "input": [
      {
        "role": "user",
        "content": [
          {
            "type": "input_text",
            "text": "请分析一下图片的内容"
          },
          {
            "type": "input_image",
            "image_url": "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png"
          }
        ]
      }
    ]
  }'

使用 Base64 编码

python
import base64

def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

base64_image = encode_image_to_base64("path/to/image.jpg")

response = client.responses.create(
    model="openai/gpt-5",
    input=[
        {
            "role": "user",
            "content": [
                {"type": "input_text", "text": "描述这张图片"},
                {
                    "type": "input_image",
                    "image_url": f"data:image/jpeg;base64,{base64_image}"
                }
            ]
        }
    ]
)

PDF 输入

python
from openai import OpenAI

client = OpenAI(
    base_url="https://zenmux.ai/api/v1",
    api_key="<你的 ZENMUX_API_KEY>", 
)

response = client.responses.create(
    model="openai/gpt-5", 
    input=[
        {
            "role": "user",
            "content": [
                {"type": "input_text", "text": "请总结这个文档的主要内容"},
                {
                    "type": "input_file", 
                    "file_url": "https://www.example.com/document.pdf"
                }
            ]
        }
    ]
)

# 提取回答
for item in response.output:
    if item.type == "message":
        for content in item.content:
            if content.type == "output_text":
                print(content.text)
bash
curl https://zenmux.ai/api/v1/responses \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $ZENMUX_API_KEY" \
  -d '{
    "model": "openai/gpt-5",
    "input": [
      {
        "role": "user",
        "content": [
          {
            "type": "input_text",
            "text": "请总结这个文档的主要内容"
          },
          {
            "type": "input_file",
            "file_url": "https://www.example.com/document.pdf"
          }
        ]
      }
    ]
  }'

Anthropic Messages API

Anthropic Messages API 支持使用 imagedocumentaudiovideo 内容类型处理多模态输入,支持 base64 编码和 URL 两种方式。

提示

通过 ZenMux 的协议转换功能,Anthropic 协议可以路由到支持音频和视频的模型(如 Gemini)。使用支持多模态的模型时,所有输入类型都可用。

注意:音频和视频输入需要使用 Google Cloud Storage 的 gs:// URL 格式(例如 gs://cloud-samples-data/generative-ai/audio/pixel.mp3)才能被 Gemini 模型正确处理。如果需要使用本地文件或其他 URL,建议使用 Vertex AI API 协议。

支持的格式

类型支持的格式
图片JPEG、PNG、GIF、WebP
文档PDF
音频WAV、MP3、AIFF、AAC、OGG、FLAC
视频MP4、AVI、MOV、MKV、WEBM 等

使用图片 URL

python
import anthropic

client = anthropic.Anthropic(
    api_key="<你的 ZENMUX_API_KEY>", 
    base_url="https://zenmux.ai/api/anthropic"
)

message = client.messages.create(
    model="anthropic/claude-sonnet-4.5", 
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image", 
                    "source": { 
                        "type": "url", 
                        "url": "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png"
                    }
                },
                {
                    "type": "text",
                    "text": "请分析一下图片的内容"
                }
            ]
        }
    ]
)

print(message.content[0].text)
typescript
import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
  apiKey: "<你的 ZENMUX_API_KEY>", 
  baseURL: "https://zenmux.ai/api/anthropic", 
});

const message = await client.messages.create({
  model: "anthropic/claude-sonnet-4.5", 
  max_tokens: 1024,
  messages: [
    {
      role: "user",
      content: [
        {
          type: "image", 
          source: {
            type: "url", 
            url: "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png", 
          },
        },
        {
          type: "text",
          text: "请分析一下图片的内容",
        },
      ],
    },
  ],
});

console.log(message.content[0].text);
bash
curl https://zenmux.ai/api/anthropic/v1/messages \
  -H "x-api-key: $ZENMUX_API_KEY" \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "anthropic/claude-sonnet-4.5",
    "max_tokens": 1024,
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "image",
            "source": {
              "type": "url",
              "url": "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png"
            }
          },
          {
            "type": "text",
            "text": "请分析一下图片的内容"
          }
        ]
      }
    ]
  }'

使用 Base64 编码

python
import anthropic
import base64

def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

client = anthropic.Anthropic(
    api_key="<你的 ZENMUX_API_KEY>",
    base_url="https://zenmux.ai/api/anthropic"
)

base64_image = encode_image_to_base64("path/to/image.jpg")

message = client.messages.create(
    model="anthropic/claude-sonnet-4.5",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64", 
                        "media_type": "image/jpeg", 
                        "data": base64_image 
                    }
                },
                {
                    "type": "text",
                    "text": "请分析一下图片的内容"
                }
            ]
        }
    ]
)

print(message.content[0].text)
bash
# 先将图片编码为 base64
BASE64_IMAGE=$(base64 -i path/to/image.jpg)

curl https://zenmux.ai/api/anthropic/v1/messages \
  -H "x-api-key: $ZENMUX_API_KEY" \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "anthropic/claude-sonnet-4.5",
    "max_tokens": 1024,
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "image",
            "source": {
              "type": "base64",
              "media_type": "image/jpeg",
              "data": "'"$BASE64_IMAGE"'"
            }
          },
          {
            "type": "text",
            "text": "请分析一下图片的内容"
          }
        ]
      }
    ]
  }'

多张图片

Claude 支持在单次请求中分析多张图片:

python
message = client.messages.create(
    model="anthropic/claude-sonnet-4.5",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {"type": "url", "url": "https://example.com/image1.jpg"}
                },
                {
                    "type": "image",
                    "source": {"type": "url", "url": "https://example.com/image2.jpg"}
                },
                {
                    "type": "text",
                    "text": "请比较这两张图片的异同"
                }
            ]
        }
    ]
)

PDF 输入

使用 PDF 链接

python
import anthropic

client = anthropic.Anthropic(
    api_key="<你的 ZENMUX_API_KEY>",
    base_url="https://zenmux.ai/api/anthropic"
)

message = client.messages.create(
    model="anthropic/claude-sonnet-4.5",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "document", 
                    "source": { 
                        "type": "url", 
                        "url": "https://example.com/document.pdf"
                    }
                },
                {
                    "type": "text",
                    "text": "请总结这个文档的主要内容"
                }
            ]
        }
    ]
)

print(message.content[0].text)
typescript
import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
  apiKey: "<你的 ZENMUX_API_KEY>",
  baseURL: "https://zenmux.ai/api/anthropic",
});

const message = await client.messages.create({
  model: "anthropic/claude-sonnet-4.5",
  max_tokens: 1024,
  messages: [
    {
      role: "user",
      content: [
        {
          type: "document", 
          source: {
            type: "url", 
            url: "https://example.com/document.pdf", 
          },
        },
        {
          type: "text",
          text: "请总结这个文档的主要内容",
        },
      ],
    },
  ],
});

console.log(message.content[0].text);
bash
curl https://zenmux.ai/api/anthropic/v1/messages \
  -H "x-api-key: $ZENMUX_API_KEY" \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "anthropic/claude-sonnet-4.5",
    "max_tokens": 1024,
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "document",
            "source": {
              "type": "url",
              "url": "https://example.com/document.pdf"
            }
          },
          {
            "type": "text",
            "text": "请总结这个文档的主要内容"
          }
        ]
      }
    ]
  }'

使用 Base64 编码

python
import anthropic
import base64

def encode_pdf_to_base64(pdf_path):
    with open(pdf_path, "rb") as pdf_file:
        return base64.b64encode(pdf_file.read()).decode("utf-8")

client = anthropic.Anthropic(
    api_key="<你的 ZENMUX_API_KEY>",
    base_url="https://zenmux.ai/api/anthropic"
)

base64_pdf = encode_pdf_to_base64("path/to/document.pdf")

message = client.messages.create(
    model="anthropic/claude-sonnet-4.5",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "document",
                    "source": {
                        "type": "base64", 
                        "media_type": "application/pdf", 
                        "data": base64_pdf 
                    }
                },
                {
                    "type": "text",
                    "text": "请总结这个文档的主要内容"
                }
            ]
        }
    ]
)

print(message.content[0].text)
bash
# 先将 PDF 编码为 base64
BASE64_PDF=$(base64 -i path/to/document.pdf)

curl https://zenmux.ai/api/anthropic/v1/messages \
  -H "x-api-key: $ZENMUX_API_KEY" \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "anthropic/claude-sonnet-4.5",
    "max_tokens": 1024,
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "document",
            "source": {
              "type": "base64",
              "media_type": "application/pdf",
              "data": "'"$BASE64_PDF"'"
            }
          },
          {
            "type": "text",
            "text": "请总结这个文档的主要内容"
          }
        ]
      }
    ]
  }'

多个文档

支持在单次请求中分析多个 PDF 文档:

python
message = client.messages.create(
    model="anthropic/claude-sonnet-4.5",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "document",
                    "source": {"type": "url", "url": "https://example.com/document1.pdf"}
                },
                {
                    "type": "document",
                    "source": {"type": "url", "url": "https://example.com/document2.pdf"}
                },
                {
                    "type": "text",
                    "text": "请比较这两个文档的内容"
                }
            ]
        }
    ]
)

音频输入

支持多种音频格式:WAV、MP3、AIFF、AAC、OGG、FLAC

python
import anthropic

client = anthropic.Anthropic(
    api_key="<你的 ZENMUX_API_KEY>",
    base_url="https://zenmux.ai/api/anthropic"
)

message = client.messages.create(
    model="google/gemini-2.5-pro", 
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "audio", 
                    "source": { 
                        "type": "url", 
                        "url": "gs://cloud-samples-data/generative-ai/audio/pixel.mp3"
                    }
                },
                {
                    "type": "text",
                    "text": "请描述这段音频的内容"
                }
            ]
        }
    ]
)

print(message.content[0].text)
typescript
import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
  apiKey: "<你的 ZENMUX_API_KEY>",
  baseURL: "https://zenmux.ai/api/anthropic",
});

const message = await client.messages.create({
  model: "google/gemini-2.5-pro", 
  max_tokens: 1024,
  messages: [
    {
      role: "user",
      content: [
        {
          type: "audio", 
          source: {
            type: "url", 
            url: "gs://cloud-samples-data/generative-ai/audio/pixel.mp3", 
          },
        },
        {
          type: "text",
          text: "请描述这段音频的内容",
        },
      ],
    },
  ],
});

console.log(message.content[0].text);
bash
curl https://zenmux.ai/api/anthropic/v1/messages \
  -H "x-api-key: $ZENMUX_API_KEY" \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "google/gemini-2.5-pro",
    "max_tokens": 1024,
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "audio",
            "source": {
              "type": "url",
              "url": "gs://cloud-samples-data/generative-ai/audio/pixel.mp3"
            }
          },
          {
            "type": "text",
            "text": "请描述这段音频的内容"
          }
        ]
      }
    ]
  }'

视频输入

支持多种视频格式:MP4、AVI、MOV、MKV、WEBM 等

python
import anthropic

client = anthropic.Anthropic(
    api_key="<你的 ZENMUX_API_KEY>",
    base_url="https://zenmux.ai/api/anthropic"
)

message = client.messages.create(
    model="google/gemini-2.5-pro", 
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "video", 
                    "source": { 
                        "type": "url", 
                        "url": "gs://cloud-samples-data/video/animals.mp4"
                    }
                },
                {
                    "type": "text",
                    "text": "请描述这段视频的内容"
                }
            ]
        }
    ]
)

print(message.content[0].text)
typescript
import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
  apiKey: "<你的 ZENMUX_API_KEY>",
  baseURL: "https://zenmux.ai/api/anthropic",
});

const message = await client.messages.create({
  model: "google/gemini-2.5-pro", 
  max_tokens: 1024,
  messages: [
    {
      role: "user",
      content: [
        {
          type: "video", 
          source: {
            type: "url", 
            url: "gs://cloud-samples-data/video/animals.mp4", 
          },
        },
        {
          type: "text",
          text: "请描述这段视频的内容",
        },
      ],
    },
  ],
});

console.log(message.content[0].text);
bash
curl https://zenmux.ai/api/anthropic/v1/messages \
  -H "x-api-key: $ZENMUX_API_KEY" \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "google/gemini-2.5-pro",
    "max_tokens": 1024,
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "video",
            "source": {
              "type": "url",
              "url": "gs://cloud-samples-data/video/animals.mp4"
            }
          },
          {
            "type": "text",
            "text": "请描述这段视频的内容"
          }
        ]
      }
    ]
  }'

Google Vertex AI API

Vertex AI 的 Gemini 模型使用 Part 对象传递多模态内容,支持图片、PDF、视频等多种格式。

支持的格式

类型支持的格式
图片PNG、JPEG、WebP、HEIC、HEIF
文档PDF
音频WAV、MP3、AIFF、AAC、OGG、FLAC
视频MP4、AVI、MOV、MKV、WEBM 等

图片输入

python
from google import genai
from google.genai import types

client = genai.Client(
    api_key="<你的 ZENMUX_API_KEY>", 
    vertexai=True,
    http_options=types.HttpOptions(
        api_version='v1',
        base_url='https://zenmux.ai/api/vertex-ai'
    ),
)

# 使用图片 URL
response = client.models.generate_content(
    model="google/gemini-2.5-pro", 
    contents=[
        types.Part.from_uri( 
            file_uri="https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png",
            mime_type="image/png"
        ),
        "请分析一下图片的内容"
    ]
)

print(response.text)
typescript
import { GoogleGenAI } from "@google/genai";

const client = new GoogleGenAI({
  apiKey: "<你的 ZENMUX_API_KEY>", 
  vertexai: true,
  httpOptions: {
    baseUrl: "https://zenmux.ai/api/vertex-ai", 
    apiVersion: "v1",
  },
});

const response = await client.models.generateContent({
  model: "google/gemini-2.5-pro", 
  contents: [
    {
      role: "user",
      parts: [
        {
          fileData: {
            fileUri:
              "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png", 
            mimeType: "image/png", 
          },
        },
        { text: "请分析一下图片的内容" },
      ],
    },
  ],
});

console.log(response.text);
bash
curl https://zenmux.ai/api/vertex-ai/v1/projects/PROJECT_ID/locations/LOCATION/publishers/google/models/gemini-2.5-pro:generateContent \
  -H "Authorization: Bearer $ZENMUX_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "contents": [{
      "role": "user",
      "parts": [
        {
          "fileData": {
            "fileUri": "https://cdn.marmot-cloud.com/storage/tbox-router/2025/08/05/e9445SU/shengchengtupian2025-04-09-19_31.png",
            "mimeType": "image/png"
          }
        },
        {"text": "请分析一下图片的内容"}
      ]
    }]
  }'

使用 Base64 编码

python
import base64
from google import genai
from google.genai import types

def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

client = genai.Client(
    api_key="<你的 ZENMUX_API_KEY>",
    vertexai=True,
    http_options=types.HttpOptions(
        api_version='v1',
        base_url='https://zenmux.ai/api/vertex-ai'
    ),
)

base64_image = encode_image_to_base64("path/to/image.jpg")

response = client.models.generate_content(
    model="google/gemini-2.5-pro",
    contents=[
        types.Part.from_bytes( 
            data=base64.b64decode(base64_image), 
            mime_type="image/jpeg"
        ),
        "请分析一下图片的内容"
    ]
)

print(response.text)

PDF 输入

python
# 使用 PDF URL
response = client.models.generate_content(
    model="google/gemini-2.5-pro",
    contents=[
        types.Part.from_uri(
            file_uri="https://example.com/document.pdf",
            mime_type="application/pdf"
        ),
        "请总结这个文档的主要内容"
    ]
)

print(response.text)

多张图片

python
response = client.models.generate_content(
    model="google/gemini-2.5-pro",
    contents=[
        types.Part.from_uri(
            file_uri="https://example.com/image1.jpg",
            mime_type="image/jpeg"
        ),
        types.Part.from_uri(
            file_uri="https://example.com/image2.jpg",
            mime_type="image/jpeg"
        ),
        "请比较这两张图片的异同"
    ]
)

音频输入

Gemini 支持多种音频格式:WAV、MP3、AIFF、AAC、OGG、FLAC

python
from google import genai
from google.genai import types

client = genai.Client(
    api_key="<你的 ZENMUX_API_KEY>",
    vertexai=True,
    http_options=types.HttpOptions(
        api_version='v1',
        base_url='https://zenmux.ai/api/vertex-ai'
    ),
)

# 使用音频 URL
response = client.models.generate_content(
    model="google/gemini-2.5-pro",
    contents=[
        types.Part.from_uri(
            file_uri="https://example.com/audio.mp3", 
            mime_type="audio/mp3"
        ),
        "请描述这段音频的内容"
    ]
)

print(response.text)
bash
curl https://zenmux.ai/api/vertex-ai/v1/projects/PROJECT_ID/locations/LOCATION/publishers/google/models/gemini-2.5-pro:generateContent \
  -H "Authorization: Bearer $ZENMUX_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "contents": [{
      "role": "user",
      "parts": [
        {
          "fileData": {
            "fileUri": "https://example.com/audio.mp3",
            "mimeType": "audio/mp3"
          }
        },
        {"text": "请描述这段音频的内容"}
      ]
    }]
  }'

视频输入

Gemini 支持多种视频格式:MP4、AVI、MOV、MKV、WEBM 等

python
from google import genai
from google.genai import types

client = genai.Client(
    api_key="<你的 ZENMUX_API_KEY>",
    vertexai=True,
    http_options=types.HttpOptions(
        api_version='v1',
        base_url='https://zenmux.ai/api/vertex-ai'
    ),
)

# 使用视频 URL
response = client.models.generate_content(
    model="google/gemini-2.5-pro",
    contents=[
        types.Part.from_uri(
            file_uri="https://example.com/video.mp4", 
            mime_type="video/mp4"
        ),
        "请描述这段视频的内容"
    ]
)

print(response.text)
bash
curl https://zenmux.ai/api/vertex-ai/v1/projects/PROJECT_ID/locations/LOCATION/publishers/google/models/gemini-2.5-pro:generateContent \
  -H "Authorization: Bearer $ZENMUX_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "contents": [{
      "role": "user",
      "parts": [
        {
          "fileData": {
            "fileUri": "https://example.com/video.mp4",
            "mimeType": "video/mp4"
          }
        },
        {"text": "请描述这段视频的内容"}
      ]
    }]
  }'

协议对比

特性Chat CompletionResponses APIAnthropic MessagesVertex AI
图片类型名image_urlinput_imageimagePart
URL 支持url 字段image_url 字段type: "url"file_uri
Base64 支持✅ data URL✅ data URLtype: "base64"from_bytes
PDF 支持file 类型input_filedocument 类型mime_type
音频支持input_audio 类型❌ 不支持audio 类型audio/*
视频支持file 类型❌ 不支持video 类型video/*
多图片✅ 最多 100 张✅ 最多 3000 张