Streaming agent (SSE)

Use this when your agent streams response chunks back to a caller in real time while keeping the span open for the full duration of the stream. The key constraint: the stream must be consumed inside the wrapped function. Returning a generator or stream handle directly ends the span before any chunk is produced. See Streaming and async generators for why. To forward chunks to a caller, pass a streaming bridge into the wrapped function and read from it externally:

TypeScript
Python

import { registerOTel, wrapAgent } from "@uselemma/tracing";

registerOTel();

const wrapped = wrapAgent("my-agent", async ({ onComplete }, input: {
  userMessage: string;
  controller: ReadableStreamDefaultController<string>;
}) => {
  let fullResponse = "";

  for await (const chunk of streamLLM(input.userMessage)) {
    fullResponse += chunk;
    input.controller.enqueue(chunk);
  }

  onComplete(fullResponse);
  input.controller.close();
  return fullResponse;
});

export function handleRequest(userMessage: string) {
  let runIdPromise: Promise<string>;

  const stream = new ReadableStream<string>({
    start(controller) {
      runIdPromise = wrapped({ userMessage, controller }).then(({ runId }) => runId);
    },
  });

  return { stream, runIdPromise };
}

import asyncio
from uselemma_tracing import register_otel, TraceContext, wrap_agent

register_otel()


async def run_agent(ctx: TraceContext, input: dict) -> str:
    queue: asyncio.Queue = input["queue"]
    full_response = ""

    async for chunk in stream_llm(input["user_message"]):
        full_response += chunk
        await queue.put(chunk)

    await queue.put(None)  # sentinel — signals end of stream to consumer
    ctx.on_complete(full_response)
    return full_response


wrapped = wrap_agent("my-agent", run_agent)


async def handle_request(user_message: str):
    queue: asyncio.Queue = asyncio.Queue()

    # Start the agent in the background; consume chunks concurrently
    agent_task = asyncio.create_task(
        wrapped({"user_message": user_message, "queue": queue})
    )

    while True:
        chunk = await queue.get()
        if chunk is None:
            break
        yield chunk  # write to SSE / WebSocket / response stream here

    await agent_task  # ensure span closes and run_id is available
    _, run_id, _ = agent_task.result()

Key points:

The stream bridge decouples chunk forwarding from span lifetime — the span stays open until the wrapped function returns after the last chunk.
Wait for the wrapped invocation to finish before relying on runId or assuming the span is closed.
Replace the queue or ReadableStream example with whatever your framework uses to write SSE events.

Metrics

Tracing

Streaming agent (SSE)