Examples

Working code examples for the VoiceRun Transcribe API.

Python#

Stream audio from a file and receive real-time transcription:

import asyncio
import base64
import json
import websockets

WS_URL = "wss://transcribe.voicerun.com"
API_KEY = "YOUR_API_KEY"

async def transcribe():
    async with websockets.connect(
        f"{WS_URL}/ws",
        extra_headers={"Authorization": f"Bearer {API_KEY}"}
    ) as ws:
        # Step 1: Receive session.created
        msg = json.loads(await ws.recv())
        print(f"Session: {msg['session']['id']}")

        # Step 2: Configure session
        await ws.send(json.dumps({
            "type": "session.update",
            "session": {
                "model": "nova-3",
                "provider": "DEEPGRAM",
                "language": "en",
                "input_audio_format": "pcm16",
                "sample_rate": 16000
            }
        }))

        # Wait for session.updated
        msg = json.loads(await ws.recv())
        assert msg["type"] == "session.updated"

        # Step 3: Stream audio (20ms chunks)
        with open("audio.raw", "rb") as f:
            while chunk := f.read(640):  # 320 samples * 2 bytes
                await ws.send(json.dumps({
                    "type": "audio.append",
                    "audio": base64.b64encode(chunk).decode()
                }))
                await asyncio.sleep(0.02)  # 20ms pacing

        # Step 4: Listen for transcription events
        async for raw in ws:
            msg = json.loads(raw)
            if msg["type"] == "transcription.completed":
                print(f"Final: {msg['text']}")
            elif msg["type"] == "transcription.partial":
                print(f"Partial: {msg['text']}")

        # Step 5: Close
        await ws.send(json.dumps({"type": "session.close"}))

asyncio.run(transcribe())

JavaScript (Node.js)#

import WebSocket from "ws";

const ws = new WebSocket("wss://transcribe.voicerun.com/ws", {
  headers: { Authorization: "Bearer YOUR_API_KEY" },
});

ws.on("message", (data) => {
  const msg = JSON.parse(data);

  switch (msg.type) {
    case "session.created":
      // Configure session
      ws.send(JSON.stringify({
        type: "session.update",
        session: {
          model: "nova-3",
          provider: "DEEPGRAM",
          language: "en",
          input_audio_format: "pcm16",
          sample_rate: 16000,
        },
      }));
      break;

    case "session.updated":
      console.log("Ready - start sending audio");
      break;

    case "transcription.partial":
      console.log("Partial:", msg.text);
      break;

    case "transcription.completed":
      console.log("Final:", msg.text);
      break;

    case "error":
      console.error(msg.error.code, msg.error.message);
      break;
  }
});

Microphone Streaming (Browser)#

Stream audio directly from the user's microphone:

const API_KEY = "YOUR_API_KEY";

async function startTranscription() {
  const ws = new WebSocket("wss://transcribe.voicerun.com/ws");

  ws.onopen = () => {
    // Auth is handled via first message for browser WebSocket
    ws.send(JSON.stringify({
      type: "auth",
      token: API_KEY
    }));
  };

  ws.onmessage = async (event) => {
    const msg = JSON.parse(event.data);

    if (msg.type === "session.created") {
      // Configure session
      ws.send(JSON.stringify({
        type: "session.update",
        session: {
          model: "nova-3",
          provider: "DEEPGRAM",
          language: "en",
          input_audio_format: "pcm16",
          sample_rate: 16000,
        },
      }));
    }

    if (msg.type === "session.updated") {
      // Start capturing microphone
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const audioContext = new AudioContext({ sampleRate: 16000 });
      const source = audioContext.createMediaStreamSource(stream);
      const processor = audioContext.createScriptProcessor(512, 1, 1);

      processor.onaudioprocess = (e) => {
        const inputData = e.inputBuffer.getChannelData(0);
        const pcm16 = new Int16Array(inputData.length);

        for (let i = 0; i < inputData.length; i++) {
          pcm16[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));
        }

        const base64 = btoa(String.fromCharCode(...new Uint8Array(pcm16.buffer)));
        ws.send(JSON.stringify({ type: "audio.append", audio: base64 }));
      };

      source.connect(processor);
      processor.connect(audioContext.destination);
    }

    if (msg.type === "transcription.completed") {
      console.log("Transcription:", msg.text);
    }
  };
}

startTranscription();