If you’re building agents directly with an LLM provider’s SDK — or with a framework Lemma doesn’t have a native integration for — you can still capture full traces using wrapAgent and registerOTel from @uselemma/tracing (or the Python equivalents).
What Gets Traced
wrapAgent always emits a top-level span with your agent’s name, inputs, outputs, and timing. However, LLM provider SDKs (openai, anthropic, etc.) have no built-in OTel support, so individual LLM calls won’t produce child spans on their own.
To get per-call visibility (token usage, model name, latency per LLM call), add a provider instrumentor. OpenInference provides instrumentors for both OpenAI and Anthropic that auto-patch the client to emit spans through your existing OTel provider.
Getting Started
Step 1: Install Dependencies
# OpenAI
npm install @uselemma/tracing openai @arizeai/openinference-instrumentation-openai
# Anthropic
npm install @uselemma/tracing anthropic @arizeai/openinference-instrumentation-anthropic
# OpenAI
pip install "uselemma-tracing[openai]" openai
# Anthropic
pip install "uselemma-tracing[anthropic]" anthropic
Step 2: Register and Instrument
Call this once at your application’s entry point, before importing your LLM provider.
// instrumentation.ts (Next.js)
export async function register() {
if (process.env.NEXT_RUNTIME === 'nodejs') {
const { registerOTel } = await import('@uselemma/tracing');
registerOTel();
// OpenAI
const { OpenAIInstrumentation } = await import('@arizeai/openinference-instrumentation-openai');
new OpenAIInstrumentation().instrument();
// Anthropic
const { AnthropicInstrumentation } = await import('@arizeai/openinference-instrumentation-anthropic');
new AnthropicInstrumentation().instrument();
}
}
For Node.js outside of Next.js, create a tracer.ts and import it first:// tracer.ts
import { registerOTel } from '@uselemma/tracing';
import { OpenAIInstrumentation } from '@arizeai/openinference-instrumentation-openai';
import { AnthropicInstrumentation } from '@arizeai/openinference-instrumentation-anthropic';
registerOTel();
new OpenAIInstrumentation().instrument();
new AnthropicInstrumentation().instrument();
# OpenAI
from uselemma_tracing import instrument_openai
instrument_openai()
# Anthropic
from uselemma_tracing import instrument_anthropic
instrument_anthropic()
Set LEMMA_API_KEY and LEMMA_PROJECT_ID environment variables in your application. You can find these in your Lemma project settings.
Tracing an Agent
Wrap your agent function with wrapAgent. The top-level span captures inputs, outputs, and timing. The provider instrumentor adds child spans for each LLM call with token counts and model metadata.
OpenAI
import OpenAI from 'openai';
import { wrapAgent } from '@uselemma/tracing';
const client = new OpenAI();
export const callAgent = async (userMessage: string) => {
const wrappedFn = wrapAgent(
'my-agent',
async ({ onComplete }, input) => {
const response = await client.chat.completions.create({
model: 'gpt-4o',
messages: [{ role: 'user', content: input.userMessage }],
});
const text = response.choices[0].message.content ?? '';
onComplete(text);
return text;
}
);
const { result, runId } = await wrappedFn({ userMessage });
return { result, runId };
};
from openai import AsyncOpenAI
from uselemma_tracing import TraceContext, wrap_agent
client = AsyncOpenAI()
async def run_agent(ctx: TraceContext, user_message: str):
response = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": user_message}],
)
text = response.choices[0].message.content or ""
ctx.on_complete(text)
return text
async def call_agent(user_message: str):
wrapped = wrap_agent("my-agent", run_agent)
result, run_id, span = await wrapped(user_message)
return result, run_id
Anthropic
import Anthropic from '@anthropic-ai/sdk';
import { wrapAgent } from '@uselemma/tracing';
const client = new Anthropic();
export const callAgent = async (userMessage: string) => {
const wrappedFn = wrapAgent(
'my-agent',
async ({ onComplete }, input) => {
const response = await client.messages.create({
model: 'claude-sonnet-4-5',
max_tokens: 1024,
messages: [{ role: 'user', content: input.userMessage }],
});
const text = response.content[0].type === 'text' ? response.content[0].text : '';
onComplete(text);
return text;
}
);
const { result, runId } = await wrappedFn({ userMessage });
return { result, runId };
};
import anthropic
from uselemma_tracing import TraceContext, wrap_agent
client = anthropic.AsyncAnthropic()
async def run_agent(ctx: TraceContext, user_message: str):
response = await client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
messages=[{"role": "user", "content": user_message}],
)
text = response.content[0].text
ctx.on_complete(text)
return text
async def call_agent(user_message: str):
wrapped = wrap_agent("my-agent", run_agent)
result, run_id, span = await wrapped(user_message)
return result, run_id
Streaming
For streaming responses, set autoEndRoot: true so the RunBatchSpanProcessor automatically ends the root span when all direct child spans have finished. Call onComplete to record the output once the stream finishes, and recordError if something goes wrong.
import OpenAI from 'openai';
import { wrapAgent } from '@uselemma/tracing';
const client = new OpenAI();
export const callAgent = async (userMessage: string) => {
const wrappedFn = wrapAgent(
'my-agent',
async ({ onComplete, recordError }, input) => {
try {
const stream = await client.chat.completions.create({
model: 'gpt-4o',
messages: [{ role: 'user', content: input.userMessage }],
stream: true,
});
let fullText = '';
for await (const chunk of stream) {
fullText += chunk.choices[0]?.delta?.content ?? '';
// yield chunk to your client here
}
onComplete({ text: fullText });
return fullText;
} catch (err) {
recordError(err);
throw err;
}
},
{ autoEndRoot: true }
);
const { result, runId } = await wrappedFn({ userMessage });
return { result, runId };
};
from openai import AsyncOpenAI
from uselemma_tracing import TraceContext, wrap_agent
client = AsyncOpenAI()
async def run_agent(ctx: TraceContext, user_message: str):
try:
stream = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": user_message}],
stream=True,
)
full_text = ""
async for chunk in stream:
full_text += chunk.choices[0].delta.content or ""
# yield chunk to your client here
ctx.on_complete(full_text)
return full_text
except Exception as err:
ctx.record_error(err)
raise
async def call_agent(user_message: str):
wrapped = wrap_agent(
"my-agent",
run_agent,
auto_end_root=True,
)
result, run_id, span = await wrapped(user_message)
return result, run_id
Adding Custom Child Spans
For multi-step agents (retrieval, reranking, multiple LLM calls), you can create child spans for individual operations. They automatically nest under the wrapAgent span in the dashboard.
import { trace } from '@opentelemetry/api';
import { wrapAgent } from '@uselemma/tracing';
const tracer = trace.getTracer('my-app');
export const callAgent = async (userMessage: string) => {
const wrappedFn = wrapAgent(
'my-agent',
async ({ onComplete }, input) => {
const context = await tracer.startActiveSpan('retrieve-context', async (span) => {
const docs = await vectorSearch(input.userMessage);
span.setAttribute('docs.count', docs.length);
span.end();
return docs;
});
const response = await tracer.startActiveSpan('generate', async (span) => {
const result = await callLLM(input.userMessage, context);
span.end();
return result;
});
onComplete(response);
return response;
}
);
const { result, runId } = await wrappedFn({ userMessage });
return { result, runId };
};
from opentelemetry import trace
from uselemma_tracing import TraceContext, wrap_agent
tracer = trace.get_tracer("my-app")
async def run_agent(ctx: TraceContext, user_message: str):
with tracer.start_as_current_span("retrieve-context") as span:
docs = await vector_search(user_message)
span.set_attribute("docs.count", len(docs))
with tracer.start_as_current_span("generate"):
response = await call_llm(user_message, docs)
ctx.on_complete(response)
return response
async def call_agent(user_message: str):
wrapped = wrap_agent("my-agent", run_agent)
result, run_id, span = await wrapped(user_message)
return result, run_id
Next Steps