Manual tracing

If you’re building agents directly with an LLM provider’s SDK — or with a framework Lemma doesn’t have a native integration for — you can still capture full traces using wrapAgent and registerOTel from @uselemma/tracing (or the Python equivalents).

What Gets Traced

wrapAgent always emits a top-level span with your agent’s name, inputs, outputs, and timing. However, LLM provider SDKs (openai, anthropic, etc.) have no built-in OTel support, so individual LLM calls won’t produce child spans on their own. To get per-call visibility (token usage, model name, latency per LLM call), add a provider instrumentor. OpenInference provides instrumentors for both OpenAI and Anthropic that auto-patch the client to emit spans through your existing OTel provider.

Getting Started

Step 1: Install Dependencies

TypeScript
Python

# OpenAI
npm install @uselemma/tracing openai @arizeai/openinference-instrumentation-openai

# Anthropic
npm install @uselemma/tracing anthropic @arizeai/openinference-instrumentation-anthropic

# OpenAI
pip install "uselemma-tracing[openai]" openai

# Anthropic
pip install "uselemma-tracing[anthropic]" anthropic

Step 2: Register and Instrument

Call this once at your application’s entry point, before importing your LLM provider.

TypeScript
Python

// instrumentation.ts (Next.js)
export async function register() {
  if (process.env.NEXT_RUNTIME === 'nodejs') {
    const { registerOTel } = await import('@uselemma/tracing');
    registerOTel();

    // OpenAI
    const { OpenAIInstrumentation } = await import('@arizeai/openinference-instrumentation-openai');
    new OpenAIInstrumentation().instrument();

    // Anthropic
    const { AnthropicInstrumentation } = await import('@arizeai/openinference-instrumentation-anthropic');
    new AnthropicInstrumentation().instrument();
  }
}

For Node.js outside of Next.js, create a tracer.ts and import it first:

// tracer.ts
import { registerOTel } from '@uselemma/tracing';
import { OpenAIInstrumentation } from '@arizeai/openinference-instrumentation-openai';
import { AnthropicInstrumentation } from '@arizeai/openinference-instrumentation-anthropic';

registerOTel();
new OpenAIInstrumentation().instrument();
new AnthropicInstrumentation().instrument();

# OpenAI
from uselemma_tracing import instrument_openai
instrument_openai()

# Anthropic
from uselemma_tracing import instrument_anthropic
instrument_anthropic()

Set LEMMA_API_KEY and LEMMA_PROJECT_ID environment variables in your application. You can find these in your Lemma project settings.

Tracing an Agent

Wrap your agent function with wrapAgent. The top-level span captures inputs, outputs, and timing. The provider instrumentor adds child spans for each LLM call with token counts and model metadata.

OpenAI

TypeScript
Python

import OpenAI from 'openai';
import { wrapAgent } from '@uselemma/tracing';

const client = new OpenAI();

export const callAgent = async (userMessage: string) => {
  const wrappedFn = wrapAgent(
    'my-agent',
    async ({ onComplete }, input) => {
      const response = await client.chat.completions.create({
        model: 'gpt-4o',
        messages: [{ role: 'user', content: input.userMessage }],
      });

      const text = response.choices[0].message.content ?? '';
      onComplete(text);
      return text;
    }
  );

  const { result, runId } = await wrappedFn({ userMessage });
  return { result, runId };
};

from openai import AsyncOpenAI
from uselemma_tracing import TraceContext, wrap_agent

client = AsyncOpenAI()

async def run_agent(ctx: TraceContext, user_message: str):
    response = await client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": user_message}],
    )

    text = response.choices[0].message.content or ""
    ctx.on_complete(text)
    return text

async def call_agent(user_message: str):
    wrapped = wrap_agent("my-agent", run_agent)

    result, run_id, span = await wrapped(user_message)
    return result, run_id

Anthropic

TypeScript
Python

import Anthropic from '@anthropic-ai/sdk';
import { wrapAgent } from '@uselemma/tracing';

const client = new Anthropic();

export const callAgent = async (userMessage: string) => {
  const wrappedFn = wrapAgent(
    'my-agent',
    async ({ onComplete }, input) => {
      const response = await client.messages.create({
        model: 'claude-sonnet-4-5',
        max_tokens: 1024,
        messages: [{ role: 'user', content: input.userMessage }],
      });

      const text = response.content[0].type === 'text' ? response.content[0].text : '';
      onComplete(text);
      return text;
    }
  );

  const { result, runId } = await wrappedFn({ userMessage });
  return { result, runId };
};

import anthropic
from uselemma_tracing import TraceContext, wrap_agent

client = anthropic.AsyncAnthropic()

async def run_agent(ctx: TraceContext, user_message: str):
    response = await client.messages.create(
        model="claude-sonnet-4-5",
        max_tokens=1024,
        messages=[{"role": "user", "content": user_message}],
    )

    text = response.content[0].text
    ctx.on_complete(text)
    return text

async def call_agent(user_message: str):
    wrapped = wrap_agent("my-agent", run_agent)

    result, run_id, span = await wrapped(user_message)
    return result, run_id

Streaming

For streaming responses, set autoEndRoot: true so the RunBatchSpanProcessor automatically ends the root span when all direct child spans have finished. Call onComplete to record the output once the stream finishes, and recordError if something goes wrong.

TypeScript
Python

import OpenAI from 'openai';
import { wrapAgent } from '@uselemma/tracing';

const client = new OpenAI();

export const callAgent = async (userMessage: string) => {
  const wrappedFn = wrapAgent(
    'my-agent',
    async ({ onComplete, recordError }, input) => {
      try {
        const stream = await client.chat.completions.create({
          model: 'gpt-4o',
          messages: [{ role: 'user', content: input.userMessage }],
          stream: true,
        });

        let fullText = '';
        for await (const chunk of stream) {
          fullText += chunk.choices[0]?.delta?.content ?? '';
          // yield chunk to your client here
        }

        onComplete({ text: fullText });
        return fullText;
      } catch (err) {
        recordError(err);
        throw err;
      }
    },
    { autoEndRoot: true }
  );

  const { result, runId } = await wrappedFn({ userMessage });
  return { result, runId };
};

from openai import AsyncOpenAI
from uselemma_tracing import TraceContext, wrap_agent

client = AsyncOpenAI()

async def run_agent(ctx: TraceContext, user_message: str):
    try:
        stream = await client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": user_message}],
            stream=True,
        )

        full_text = ""
        async for chunk in stream:
            full_text += chunk.choices[0].delta.content or ""
            # yield chunk to your client here

        ctx.on_complete(full_text)
        return full_text
    except Exception as err:
        ctx.record_error(err)
        raise

async def call_agent(user_message: str):
    wrapped = wrap_agent(
        "my-agent",
        run_agent,
        auto_end_root=True,
    )

    result, run_id, span = await wrapped(user_message)
    return result, run_id

Adding Custom Child Spans

For multi-step agents (retrieval, reranking, multiple LLM calls), you can create child spans for individual operations. They automatically nest under the wrapAgent span in the dashboard.

TypeScript
Python

import { trace } from '@opentelemetry/api';
import { wrapAgent } from '@uselemma/tracing';

const tracer = trace.getTracer('my-app');

export const callAgent = async (userMessage: string) => {
  const wrappedFn = wrapAgent(
    'my-agent',
    async ({ onComplete }, input) => {
      const context = await tracer.startActiveSpan('retrieve-context', async (span) => {
        const docs = await vectorSearch(input.userMessage);
        span.setAttribute('docs.count', docs.length);
        span.end();
        return docs;
      });

      const response = await tracer.startActiveSpan('generate', async (span) => {
        const result = await callLLM(input.userMessage, context);
        span.end();
        return result;
      });

      onComplete(response);
      return response;
    }
  );

  const { result, runId } = await wrappedFn({ userMessage });
  return { result, runId };
};

from opentelemetry import trace
from uselemma_tracing import TraceContext, wrap_agent

tracer = trace.get_tracer("my-app")

async def run_agent(ctx: TraceContext, user_message: str):
    with tracer.start_as_current_span("retrieve-context") as span:
        docs = await vector_search(user_message)
        span.set_attribute("docs.count", len(docs))

    with tracer.start_as_current_span("generate"):
        response = await call_llm(user_message, docs)

    ctx.on_complete(response)
    return response

async def call_agent(user_message: str):
    wrapped = wrap_agent("my-agent", run_agent)

    result, run_id, span = await wrapped(user_message)
    return result, run_id

Next Steps

Learn how to record metric events to capture feedback
Set up experiments to compare different strategies
If you adopt a supported framework: Vercel AI SDK, OpenAI Agents SDK

Getting Started

Tracing

​What Gets Traced

​Getting Started

​Step 1: Install Dependencies

​Step 2: Register and Instrument

​Tracing an Agent

​OpenAI

​Anthropic

​Streaming

​Adding Custom Child Spans

​Next Steps

What Gets Traced

Getting Started

Step 1: Install Dependencies

Step 2: Register and Instrument

Tracing an Agent

OpenAI

Anthropic

Streaming

Adding Custom Child Spans

Next Steps