OpenAI-compatible Responses API
responses.js is an open-source, lightweight server implementing OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.
API Endpoint:
https://wauplin-responses-js.hf.space/v1
Get started by sending requests to this endpoint
OpenAI-compatible
Stateless implementation of the Responses API
Stateless implementation of the Responses API
Inference Providers
Powered by Hugging Face Inference Providers
Powered by Hugging Face Inference Providers
Multi-modal
Text and image input support
Text and image input support
Streaming, & Structured Output
Supports streaming, JSON schema, and function calling
Supports streaming, JSON schema, and function calling
Examples
from openai import OpenAI
import os
client = OpenAI(
base_url="https://wauplin-responses-js.hf.space/v1",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
response = client.responses.create(
model="Qwen/Qwen2.5-VL-7B-Instruct",
instructions="You are a helpful assistant.",
input="Tell me a three sentence bedtime story about a unicorn.",
)
print(response)
print(response.output_text)
from openai import OpenAI
import os
client = OpenAI(
base_url="https://wauplin-responses-js.hf.space/v1",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
response = client.responses.create(
model="Qwen/Qwen2.5-VL-7B-Instruct",
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": "what is in this image?"},
{
"type": "input_image",
"image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
},
],
}
],
)
print(response)
print(response.output_text)
from openai import OpenAI
import os
client = OpenAI(
base_url="https://wauplin-responses-js.hf.space/v1",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
response = client.responses.create(
model="Qwen/Qwen2.5-VL-7B-Instruct",
input=[
{
"role": "developer",
"content": "Talk like a pirate.",
},
{
"role": "user",
"content": "Are semicolons optional in JavaScript?",
},
],
)
print(response)
print(response.output_text)
from openai import OpenAI
import os
client = OpenAI(
base_url="https://wauplin-responses-js.hf.space/v1",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
stream = client.responses.create(
model="Qwen/Qwen2.5-VL-7B-Instruct",
input=[
{
"role": "user",
"content": "Say 'double bubble bath' ten times fast.",
},
],
stream=True,
)
for event in stream:
print(event)
from openai import OpenAI
import os
client = OpenAI(
base_url="https://wauplin-responses-js.hf.space/v1",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
tools = [
{
"type": "function",
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location", "unit"],
},
}
]
response = client.responses.create(
model="cerebras@meta-llama/Llama-3.3-70B-Instruct",
tools=tools,
input="What is the weather like in Boston today?",
tool_choice="auto",
)
print(response)
from openai import OpenAI
from pydantic import BaseModel
import os
client = OpenAI(
base_url="https://wauplin-responses-js.hf.space/v1",
api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]
response = client.responses.parse(
model="novita@meta-llama/Meta-Llama-3-70B-Instruct",
input=[
{"role": "system", "content": "Extract the event information."},
{
"role": "user",
"content": "Alice and Bob are going to a science fair on Friday.",
},
],
text_format=CalendarEvent,
)
print(response.output_parsed)