Use uselemma-tracing with the OpenAI Python SDK to get a top-level run trace from agent plus a child span for every chat.completions.create call — with prompts, completions, model, token usage, and timing.
How It Works
register_otel() sets up the OTel transport. instrument_openai() is a convenience wrapper around openinference-instrumentation-openai that patches the openai package so every API call emits a gen_ai.chat child span inside whatever span is currently active.
Getting Started
Install
pip install uselemma-tracing openai openinference-instrumentation-openai
Register at startup
from uselemma_tracing import register_otel, instrument_openai
register_otel()
instrument_openai() # patches the openai package
Set LEMMA_API_KEY and LEMMA_PROJECT_ID environment variables. Find them in your Lemma project settings.
Examples
Single-turn completion
from uselemma_tracing import agent, TraceContext
from openai import AsyncOpenAI
client = AsyncOpenAI()
async def run_agent(user_message: str, ctx: TraceContext) -> str:
response = await client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": user_message}],
)
return response.choices[0].message.content or ""
supportAgent = agent("support-agent", run_agent)
res = await supportAgent("How do I reset my password?")
print(res.result)
print(res.run_id)
from uselemma_tracing import agent, tool, TraceContext
from openai import AsyncOpenAI
import json
client = AsyncOpenAI()
@tool("lookup-order")
async def lookup_order(order_id: str) -> dict:
# your database lookup here
return {"order_id": order_id, "status": "shipped", "estimated_delivery": "2026-04-10"}
TOOLS = [
{
"type": "function",
"function": {
"name": "lookup_order",
"description": "Look up an order by ID",
"parameters": {
"type": "object",
"properties": {"order_id": {"type": "string"}},
"required": ["order_id"],
},
},
}
]
async def run_agent(user_message: str, ctx: TraceContext) -> str:
messages = [{"role": "user", "content": user_message}]
response = await client.chat.completions.create(
model="gpt-4o-mini", messages=messages, tools=TOOLS
)
while response.choices[0].finish_reason == "tool_calls":
tool_calls = response.choices[0].message.tool_calls or []
messages.append(response.choices[0].message)
for call in tool_calls:
args = json.loads(call.function.arguments)
result = await lookup_order(args["order_id"]) # tool.lookup-order span
messages.append(
{"role": "tool", "tool_call_id": call.id, "content": json.dumps(result)}
)
response = await client.chat.completions.create(
model="gpt-4o-mini", messages=messages, tools=TOOLS
)
result = response.choices[0].message.content or ""
return result
orderAgent = agent("order-agent", run_agent)
res = await orderAgent("What's the status of order ORD-123?")
Using the @trace decorator for helper functions
from uselemma_tracing import agent, trace, TraceContext
from openai import AsyncOpenAI
client = AsyncOpenAI()
@trace("format-response")
def format_response(raw: str) -> str:
return raw.strip().capitalize()
async def run_agent(user_message: str, ctx: TraceContext) -> str:
response = await client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": user_message}],
)
raw = response.choices[0].message.content or ""
return format_response(raw) # adds a format-response child span
myAgent = agent("my-agent", run_agent)
What You’ll See in Lemma
| Span | Source | Contains |
|---|
ai.agent.run | agent | Full run input, output, timing, run ID |
gen_ai.chat | OpenInference | Model name, messages, completion, token usage |
tool.lookup-order | @tool decorator | Tool input and return value |
format-response | @trace decorator | Function input and return value |
Next Steps