Use uselemma-tracing with the Anthropic Python SDK to get a top-level run trace from agent plus a child span for every messages.create call — with prompts, completions, model, token usage, and timing.
How It Works
register_otel() sets up the OTel transport. instrument_anthropic() is a convenience wrapper around openinference-instrumentation-anthropic that patches the anthropic package so every API call emits a child span inside whatever span is currently active.
Getting Started
Install
pip install uselemma-tracing anthropic openinference-instrumentation-anthropic
Register at startup
from uselemma_tracing import register_otel, instrument_anthropic
register_otel()
instrument_anthropic() # patches the anthropic package
Set LEMMA_API_KEY and LEMMA_PROJECT_ID environment variables. Find them in your Lemma project settings.
Examples
Single-turn completion
from uselemma_tracing import agent, TraceContext
import anthropic
client = anthropic.AsyncAnthropic()
async def run_agent(user_message: str, ctx: TraceContext) -> str:
message = await client.messages.create(
model="claude-haiku-4-5",
max_tokens=1024,
messages=[{"role": "user", "content": user_message}],
)
return message.content[0].text
supportAgent = agent("support-agent", run_agent)
res = await supportAgent("Explain async/await in one sentence.")
print(res.result)
print(res.run_id)
from uselemma_tracing import agent, tool, TraceContext
import anthropic
import json
client = anthropic.AsyncAnthropic()
@tool("get-weather")
async def get_weather(city: str) -> dict:
return {"city": city, "temperature": "72°F", "condition": "sunny"}
TOOLS = [
{
"name": "get_weather",
"description": "Get current weather for a city",
"input_schema": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
}
]
async def run_agent(user_message: str, ctx: TraceContext) -> str:
messages = [{"role": "user", "content": user_message}]
response = await client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
tools=TOOLS,
messages=messages,
)
while response.stop_reason == "tool_use":
tool_uses = [b for b in response.content if b.type == "tool_use"]
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in tool_uses:
result = await get_weather(block.input["city"]) # tool.get-weather span
tool_results.append(
{"type": "tool_result", "tool_use_id": block.id, "content": json.dumps(result)}
)
messages.append({"role": "user", "content": tool_results})
response = await client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
tools=TOOLS,
messages=messages,
)
text_blocks = [b for b in response.content if b.type == "text"]
return text_blocks[0].text if text_blocks else ""
weatherAgent = agent("weather-agent", run_agent)
res = await weatherAgent("What's the weather in Paris?")
Streaming with ctx.complete()
For streaming, consume the stream inside the wrapper and call ctx.complete() once the full text is assembled. Pass streaming=True so the wrapper does not auto-close on return:
from uselemma_tracing import agent, TraceContext
import anthropic
client = anthropic.AsyncAnthropic()
async def run_agent(user_message: str, ctx: TraceContext):
full_text = ""
async with client.messages.stream(
model="claude-haiku-4-5",
max_tokens=512,
messages=[{"role": "user", "content": user_message}],
) as stream:
async for text in stream.text_stream:
full_text += text
ctx.complete(full_text) # store assembled text as output, close span
return full_text
streamingAgent = agent("streaming-agent", run_agent, streaming=True)
res = await streamingAgent("Write a haiku about observability.")
What You’ll See in Lemma
| Span | Source | Contains |
|---|
ai.agent.run | agent | Full run input, output, timing, run ID |
gen_ai.chat | OpenInference | Model name, messages, completion, token usage |
tool.get-weather | @tool decorator | Tool input and return value |
Next Steps