Skip to main content
Use uselemma-tracing with the Anthropic Python SDK to get a top-level run trace from agent plus a child span for every messages.create call — with prompts, completions, model, token usage, and timing.

How It Works

register_otel() sets up the OTel transport. instrument_anthropic() is a convenience wrapper around openinference-instrumentation-anthropic that patches the anthropic package so every API call emits a child span inside whatever span is currently active.

Getting Started

Install

pip install uselemma-tracing anthropic openinference-instrumentation-anthropic

Register at startup

from uselemma_tracing import register_otel, instrument_anthropic

register_otel()
instrument_anthropic()  # patches the anthropic package
Set LEMMA_API_KEY and LEMMA_PROJECT_ID environment variables. Find them in your Lemma project settings.

Examples

Single-turn completion

from uselemma_tracing import agent, TraceContext
import anthropic

client = anthropic.AsyncAnthropic()

async def run_agent(user_message: str, ctx: TraceContext) -> str:
    message = await client.messages.create(
        model="claude-haiku-4-5",
        max_tokens=1024,
        messages=[{"role": "user", "content": user_message}],
    )
    return message.content[0].text

supportAgent = agent("support-agent", run_agent)

res = await supportAgent("Explain async/await in one sentence.")
print(res.result)
print(res.run_id)

Tool-calling agent with @tool

from uselemma_tracing import agent, tool, TraceContext
import anthropic
import json

client = anthropic.AsyncAnthropic()

@tool("get-weather")
async def get_weather(city: str) -> dict:
    return {"city": city, "temperature": "72°F", "condition": "sunny"}

TOOLS = [
    {
        "name": "get_weather",
        "description": "Get current weather for a city",
        "input_schema": {
            "type": "object",
            "properties": {"city": {"type": "string"}},
            "required": ["city"],
        },
    }
]

async def run_agent(user_message: str, ctx: TraceContext) -> str:
    messages = [{"role": "user", "content": user_message}]

    response = await client.messages.create(
        model="claude-sonnet-4-5",
        max_tokens=1024,
        tools=TOOLS,
        messages=messages,
    )

    while response.stop_reason == "tool_use":
        tool_uses = [b for b in response.content if b.type == "tool_use"]
        messages.append({"role": "assistant", "content": response.content})

        tool_results = []
        for block in tool_uses:
            result = await get_weather(block.input["city"])  # tool.get-weather span
            tool_results.append(
                {"type": "tool_result", "tool_use_id": block.id, "content": json.dumps(result)}
            )

        messages.append({"role": "user", "content": tool_results})
        response = await client.messages.create(
            model="claude-sonnet-4-5",
            max_tokens=1024,
            tools=TOOLS,
            messages=messages,
        )

    text_blocks = [b for b in response.content if b.type == "text"]
    return text_blocks[0].text if text_blocks else ""

weatherAgent = agent("weather-agent", run_agent)
res = await weatherAgent("What's the weather in Paris?")

Streaming with ctx.complete()

For streaming, consume the stream inside the wrapper and call ctx.complete() once the full text is assembled. Pass streaming=True so the wrapper does not auto-close on return:
from uselemma_tracing import agent, TraceContext
import anthropic

client = anthropic.AsyncAnthropic()

async def run_agent(user_message: str, ctx: TraceContext):
    full_text = ""

    async with client.messages.stream(
        model="claude-haiku-4-5",
        max_tokens=512,
        messages=[{"role": "user", "content": user_message}],
    ) as stream:
        async for text in stream.text_stream:
            full_text += text

    ctx.complete(full_text)  # store assembled text as output, close span
    return full_text

streamingAgent = agent("streaming-agent", run_agent, streaming=True)
res = await streamingAgent("Write a haiku about observability.")

What You’ll See in Lemma

SpanSourceContains
ai.agent.runagentFull run input, output, timing, run ID
gen_ai.chatOpenInferenceModel name, messages, completion, token usage
tool.get-weather@tool decoratorTool input and return value

Next Steps