Initial implementation of opentelemetry-llm tool

This commit is contained in:
2025-07-14 16:27:29 -05:00
parent 63cf87a6c6
commit 1f201a093f
14 changed files with 3191 additions and 61 deletions

View File

@@ -0,0 +1,130 @@
import { z } from 'zod';
/**
* Configuration schema for OpenTelemetry
*/
export const OpenTelemetryConfigSchema = z.object({
// Service configuration
serviceName: z.string().default('llm-observability-mcp'),
serviceVersion: z.string().default('1.0.0'),
environment: z.string().default('development'),
// OTLP endpoints
metricsEndpoint: z.string().optional(),
tracesEndpoint: z.string().optional(),
logsEndpoint: z.string().optional(),
// Authentication headers
headers: z.record(z.string()).optional(),
// Export configuration
exportIntervalMillis: z.number().default(10000),
exportTimeoutMillis: z.number().default(5000),
// Sampling configuration
samplingRatio: z.number().min(0).max(1).default(1.0),
});
export type OpenTelemetryConfigType = z.infer<typeof OpenTelemetryConfigSchema>;
/**
* OpenTelemetry configuration class
*/
export class OpenTelemetryConfig {
public readonly serviceName: string;
public readonly serviceVersion: string;
public readonly environment: string;
public readonly metricsEndpoint?: string;
public readonly tracesEndpoint?: string;
public readonly logsEndpoint?: string;
public readonly headers?: Record<string, string>;
public readonly exportIntervalMillis: number;
public readonly exportTimeoutMillis: number;
public readonly samplingRatio: number;
constructor(config: OpenTelemetryConfigType) {
this.serviceName = config.serviceName;
this.serviceVersion = config.serviceVersion;
this.environment = config.environment;
this.metricsEndpoint = config.metricsEndpoint;
this.tracesEndpoint = config.tracesEndpoint;
this.logsEndpoint = config.logsEndpoint;
this.headers = config.headers;
this.exportIntervalMillis = config.exportIntervalMillis;
this.exportTimeoutMillis = config.exportTimeoutMillis;
this.samplingRatio = config.samplingRatio;
}
/**
* Create configuration from environment variables
*/
public static fromEnv(): OpenTelemetryConfig {
const config = OpenTelemetryConfigSchema.parse({
serviceName:
process.env.OTEL_SERVICE_NAME || 'llm-observability-mcp',
serviceVersion: process.env.OTEL_SERVICE_VERSION || '1.0.0',
environment: process.env.OTEL_ENVIRONMENT || 'development',
metricsEndpoint: process.env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT,
tracesEndpoint: process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
logsEndpoint: process.env.OTEL_EXPORTER_OTLP_LOGS_ENDPOINT,
headers: process.env.OTEL_EXPORTER_OTLP_HEADERS
? parseHeaders(process.env.OTEL_EXPORTER_OTLP_HEADERS)
: undefined,
exportIntervalMillis: parseInt(
process.env.OTEL_METRIC_EXPORT_INTERVAL || '10000',
10,
),
exportTimeoutMillis: parseInt(
process.env.OTEL_METRIC_EXPORT_TIMEOUT || '5000',
10,
),
samplingRatio: parseFloat(
process.env.OTEL_TRACES_SAMPLER_ARG || '1.0',
),
});
return new OpenTelemetryConfig(config);
}
/**
* Check if OpenTelemetry is enabled
*/
public isEnabled(): boolean {
return !!(
this.metricsEndpoint ||
this.tracesEndpoint ||
this.logsEndpoint
);
}
/**
* Get combined OTLP endpoint if only one is provided
*/
public getDefaultEndpoint(): string | undefined {
if (
this.metricsEndpoint &&
this.tracesEndpoint === this.metricsEndpoint
) {
return this.metricsEndpoint;
}
return this.metricsEndpoint || this.tracesEndpoint;
}
}
/**
* Parse headers from environment variable string
* Format: "key1=value1,key2=value2"
*/
function parseHeaders(headersString: string): Record<string, string> {
const headers: Record<string, string> = {};
const pairs = headersString.split(',');
for (const pair of pairs) {
const [key, value] = pair.split('=');
if (key && value) {
headers[key.trim()] = value.trim();
}
}
return headers;
}

View File

@@ -0,0 +1,74 @@
import { Logger } from '../utils/logger.util.js';
import { OpenTelemetryService } from '../services/opentelemetry-llm.service.js';
import { OpenTelemetryLlmInputSchemaType } from '../types/opentelemetry-llm.types.js';
import { ControllerResponse } from '../types/common.types.js';
/**
* Controller for OpenTelemetry LLM observability
*/
export class OpenTelemetryController {
private service: OpenTelemetryService;
constructor(service: OpenTelemetryService) {
this.service = service;
}
/**
* Capture LLM observability data
*/
public async captureLlmObservability(
data: OpenTelemetryLlmInputSchemaType,
): Promise<ControllerResponse> {
const logger = Logger.forContext(
'opentelemetry.controller',
'captureLlmObservability',
);
logger.debug('Capturing LLM observability data', data);
try {
// Record the LLM request
this.service.recordLlmRequest(data);
const content = `## OpenTelemetry LLM Observability Captured
Successfully recorded LLM observability data:
- **Model**: ${data.model}
- **Provider**: ${data.provider}
- **User ID**: ${data.userId}
- **Operation**: ${data.operationName || 'generate'}
- **Input Tokens**: ${data.inputTokens || 'N/A'}
- **Output Tokens**: ${data.outputTokens || 'N/A'}
- **Total Tokens**: ${(data.inputTokens || 0) + (data.outputTokens || 0)}
- **Latency**: ${data.latency ? `${data.latency}s` : 'N/A'}
- **Status**: ${data.error ? 'Error' : 'Success'}
${data.error ? `- **Error**: ${data.error}` : ''}
### Metrics Recorded
- ✅ Request counter incremented
- ✅ Token usage recorded
- ✅ Latency histogram updated
- ✅ Distributed trace created
### Trace Details
- **Trace ID**: ${data.traceId || 'auto-generated'}
- **Span Name**: ${data.operationName || 'llm.generate'}
- **Attributes**: model, provider, user_id, tokens, latency, error details
The data has been sent to your configured OpenTelemetry collector and is now available in your observability platform (Jaeger, New Relic, Grafana, etc.).`;
return { content };
} catch (error) {
logger.error('Error capturing LLM observability', error);
throw error;
}
}
}
// Export singleton instance
const openTelemetryService = OpenTelemetryService.getInstance();
const openTelemetryController = new OpenTelemetryController(
openTelemetryService,
);
export default openTelemetryController;

View File

@@ -7,6 +7,7 @@ import { config } from '../utils/config.util';
import { PACKAGE_NAME, VERSION } from '../utils/constants.util';
import posthogLlmResources from '../resources/posthog-llm.resource.js';
import posthogLlmTools from '../tools/posthog-llm.tool.js';
import openTelemetryTools from '../tools/opentelemetry-llm.tool.js';
export function createServer() {
const serverLogger = Logger.forContext('utils/server.util.ts', 'getServer');
@@ -29,6 +30,7 @@ export function createServer() {
serverLogger.info('Registering MCP tools and resources...');
posthogLlmTools.registerTools(server);
posthogLlmResources.registerResources(server);
openTelemetryTools.registerTools(server);
serverLogger.debug('All tools and resources registered');
return server;

View File

@@ -0,0 +1,294 @@
import {
MeterProvider,
PeriodicExportingMetricReader,
} from '@opentelemetry/sdk-metrics';
import { NodeSDK } from '@opentelemetry/sdk-node';
import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-node';
import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
import { resourceFromAttributes } from '@opentelemetry/resources';
import {
ATTR_SERVICE_NAME,
ATTR_SERVICE_VERSION,
SEMRESATTRS_DEPLOYMENT_ENVIRONMENT,
} from '@opentelemetry/semantic-conventions';
import { Logger } from '../utils/logger.util.js';
import { OpenTelemetryConfig } from '../config/opentelemetry-llm.config.js';
import { OpenTelemetryLlmInputSchemaType } from '../types/opentelemetry-llm.types.js';
import {
Meter,
Counter,
Histogram,
UpDownCounter,
metrics,
} from '@opentelemetry/api';
import { trace, Tracer, SpanStatusCode } from '@opentelemetry/api';
/**
* OpenTelemetry service for LLM observability
*/
export class OpenTelemetryService {
private static instance: OpenTelemetryService;
private sdk: NodeSDK | null = null;
private meterProvider: MeterProvider | null = null;
private tracer: Tracer;
private meter: Meter;
private config: OpenTelemetryConfig;
// Metrics
private requestCounter: Counter;
private tokenCounter: Counter;
private latencyHistogram: Histogram;
private activeRequests: UpDownCounter;
private constructor(config: OpenTelemetryConfig) {
this.config = config;
this.tracer = trace.getTracer('llm-observability-mcp');
this.meter = this.initializeMetrics();
// Initialize metrics
this.requestCounter = this.meter.createCounter('llm.requests.total', {
description: 'Total number of LLM requests',
});
this.tokenCounter = this.meter.createCounter('llm.tokens.total', {
description: 'Total number of tokens processed',
});
this.latencyHistogram = this.meter.createHistogram(
'llm.latency.duration',
{
description: 'Duration of LLM requests in milliseconds',
unit: 'ms',
},
);
this.activeRequests = this.meter.createUpDownCounter(
'llm.requests.active',
{
description: 'Number of active LLM requests',
},
);
}
/**
* Get singleton instance
*/
public static getInstance(
config?: OpenTelemetryConfig,
): OpenTelemetryService {
if (!OpenTelemetryService.instance) {
if (!config) {
throw new Error(
'OpenTelemetryService requires configuration on first initialization',
);
}
OpenTelemetryService.instance = new OpenTelemetryService(config);
}
return OpenTelemetryService.instance;
}
/**
* Initialize OpenTelemetry SDK
*/
public initialize(): void {
const logger = Logger.forContext('opentelemetry.service', 'initialize');
if (this.sdk) {
logger.warn('OpenTelemetry SDK already initialized');
return;
}
try {
const resource = resourceFromAttributes({
[ATTR_SERVICE_NAME]: this.config.serviceName,
[ATTR_SERVICE_VERSION]: this.config.serviceVersion,
[SEMRESATTRS_DEPLOYMENT_ENVIRONMENT]: this.config.environment,
});
// Configure metric exporter
const metricExporter = this.config.metricsEndpoint
? new OTLPMetricExporter({
url: this.config.metricsEndpoint,
headers: this.config.headers,
})
: undefined;
// Configure trace exporter
const traceExporter = this.config.tracesEndpoint
? new OTLPTraceExporter({
url: this.config.tracesEndpoint,
headers: this.config.headers,
})
: undefined;
// Initialize meter provider
if (metricExporter) {
this.meterProvider = new MeterProvider({
resource,
readers: [
new PeriodicExportingMetricReader({
exporter: metricExporter,
exportIntervalMillis:
this.config.exportIntervalMillis,
exportTimeoutMillis:
this.config.exportTimeoutMillis,
}),
],
});
}
// Initialize SDK
this.sdk = new NodeSDK({
resource,
spanProcessor: traceExporter
? new BatchSpanProcessor(traceExporter)
: undefined,
});
this.sdk.start();
logger.info('OpenTelemetry SDK initialized successfully');
} catch (error) {
logger.error('Failed to initialize OpenTelemetry SDK', error);
throw error;
}
}
/**
* Initialize metrics
*/
private initializeMetrics(): Meter {
if (this.meterProvider) {
return this.meterProvider.getMeter('llm-observability-mcp');
}
return metrics.getMeter('llm-observability-mcp');
}
/**
* Record LLM request metrics and traces
*/
public recordLlmRequest(data: OpenTelemetryLlmInputSchemaType): void {
const labels = {
model: data.model,
provider: data.provider,
userId: data.userId,
operationName: data.operationName || 'generate',
status: data.error ? 'error' : 'success',
errorType: data.errorType,
mcpToolsUsed: data.mcpToolsUsed?.join(',') || '',
};
// Record metrics
this.requestCounter.add(1, labels);
if (data.inputTokens || data.outputTokens) {
const totalTokens =
(data.inputTokens || 0) + (data.outputTokens || 0);
this.tokenCounter.add(totalTokens, {
...labels,
type: 'total',
});
if (data.inputTokens) {
this.tokenCounter.add(data.inputTokens, {
...labels,
type: 'input',
});
}
if (data.outputTokens) {
this.tokenCounter.add(data.outputTokens, {
...labels,
type: 'output',
});
}
}
if (data.latency) {
this.latencyHistogram.record(data.latency * 1000, labels);
}
// Create trace
this.createLlmTrace(data);
}
/**
* Create distributed trace for LLM request
*/
private createLlmTrace(data: OpenTelemetryLlmInputSchemaType): void {
const spanName = data.operationName || 'llm.generate';
const span = this.tracer.startSpan(spanName, {
attributes: {
'llm.model': data.model,
'llm.provider': data.provider,
'llm.user_id': data.userId,
'llm.operation': data.operationName || 'generate',
'llm.input_tokens': data.inputTokens,
'llm.output_tokens': data.outputTokens,
'llm.total_tokens':
(data.inputTokens || 0) + (data.outputTokens || 0),
'llm.latency_ms': data.latency
? data.latency * 1000
: undefined,
'llm.http_status': data.httpStatus,
'llm.base_url': data.baseUrl,
'llm.error': data.error,
'llm.error_type': data.errorType,
'llm.input':
typeof data.input === 'string'
? data.input
: JSON.stringify(data.input),
'llm.output':
typeof data.outputChoices === 'string'
? data.outputChoices
: JSON.stringify(data.outputChoices),
'llm.mcp_tools_used': data.mcpToolsUsed?.join(','),
},
});
if (data.traceId) {
span.setAttribute('trace.id', data.traceId);
}
if (data.error) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: data.error,
});
} else {
span.setStatus({ code: SpanStatusCode.OK });
}
span.end();
}
/**
* Record active request count
*/
public incrementActiveRequests(): void {
this.activeRequests.add(1);
}
/**
* Decrement active request count
*/
public decrementActiveRequests(): void {
this.activeRequests.add(-1);
}
/**
* Shutdown OpenTelemetry SDK
*/
public async shutdown(): Promise<void> {
const logger = Logger.forContext('opentelemetry.service', 'shutdown');
if (this.sdk) {
try {
await this.sdk.shutdown();
logger.info('OpenTelemetry SDK shutdown successfully');
} catch (error) {
logger.error('Error shutting down OpenTelemetry SDK', error);
}
}
}
}

View File

@@ -0,0 +1,91 @@
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { Logger } from '../utils/logger.util.js';
import { formatErrorForMcpTool } from '../utils/error.util.js';
import { OpenTelemetryService } from '../services/opentelemetry-llm.service.js';
import { OpenTelemetryController } from '../controllers/opentelemetry-llm.controller.js';
import { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import {
OpenTelemetryLlmInputSchema,
OpenTelemetryLlmInputSchemaType,
} from '../types/opentelemetry-llm.types.js';
import { OpenTelemetryConfig } from '../config/opentelemetry-llm.config.js';
/**
* @function captureOpenTelemetryLlmObservability
* @description MCP Tool handler to capture LLM observability events using OpenTelemetry.
* It records metrics, traces, and spans for LLM requests that can be sent to any OpenTelemetry-compatible backend.
* @param {OpenTelemetryLlmInputSchemaType} args - Arguments provided to the tool.
* @returns {Promise<CallToolResult>} Formatted response for the MCP.
* @throws {McpError} Formatted error if the controller or service layer encounters an issue.
*/
async function captureOpenTelemetryLlmObservability(
args: OpenTelemetryLlmInputSchemaType,
): Promise<CallToolResult> {
const methodLogger = Logger.forContext(
'opentelemetry.tool',
'captureOpenTelemetryLlmObservability',
);
methodLogger.debug('Capture LLM Observability with OpenTelemetry...', args);
try {
// Parse and validate arguments
const validatedArgs = OpenTelemetryLlmInputSchema.parse(args);
// Ensure OpenTelemetry is configured
const config = OpenTelemetryConfig.fromEnv();
const service = OpenTelemetryService.getInstance(config);
// Initialize if not already done
service.initialize();
// Create controller with the service
const controller = new OpenTelemetryController(service);
// Pass validated args to the controller
const result = await controller.captureLlmObservability(validatedArgs);
methodLogger.debug('Got response from controller', result);
// Format the response for the MCP tool
return {
content: [
{
type: 'text' as const,
text: result.content,
},
],
};
} catch (error) {
methodLogger.error(
'Error tracking LLM generation with OpenTelemetry',
error,
);
return formatErrorForMcpTool(error);
}
}
/**
* @function registerTools
* @description Registers the OpenTelemetry LLM observability tool with the MCP server.
*
* @param {McpServer} server - The MCP server instance.
*/
function registerTools(server: McpServer) {
const methodLogger = Logger.forContext(
'opentelemetry.tool',
'registerTools',
);
methodLogger.debug('Registering OpenTelemetry LLM observability tools...');
server.tool(
'capture_llm_observability_opentelemetry',
`Captures LLM usage using OpenTelemetry for observability, including requests, responses, and performance metrics. Works with any OpenTelemetry-compatible backend like Jaeger, New Relic, Grafana, etc.`,
OpenTelemetryLlmInputSchema.shape,
captureOpenTelemetryLlmObservability,
);
methodLogger.debug(
'Successfully registered capture_llm_observability_opentelemetry tool.',
);
}
export default { registerTools };

View File

@@ -6,7 +6,7 @@ import { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import {
GetToolInputSchema,
GetToolInputSchemaType,
} from './posthog-llm.types.js';
} from '../types/posthog-llm.types.js';
/**
* @function capturePosthogLlmObservability

View File

@@ -0,0 +1,168 @@
import { z } from 'zod';
/**
* Zod schema for the OpenTelemetry LLM observability tool arguments
*/
export const OpenTelemetryLlmInputSchema = z.object({
// Required fields
userId: z.string().describe('The distinct ID of the user'),
model: z.string().describe('The model used (e.g., gpt-4, claude-3, etc.)'),
provider: z
.string()
.describe('The LLM provider (e.g., openai, anthropic, etc.)'),
// Optional fields
traceId: z.string().optional().describe('The trace ID to group AI events'),
input: z
.any()
.optional()
.describe('The input to the LLM (messages, prompt, etc.)'),
outputChoices: z.any().optional().describe('The output from the LLM'),
inputTokens: z
.number()
.optional()
.describe('The number of tokens in the input'),
outputTokens: z
.number()
.optional()
.describe('The number of tokens in the output'),
latency: z
.number()
.optional()
.describe('The latency of the LLM call in seconds'),
httpStatus: z
.number()
.optional()
.describe('The HTTP status code of the LLM call'),
baseUrl: z.string().optional().describe('The base URL of the LLM API'),
// OpenTelemetry specific fields
operationName: z
.string()
.optional()
.describe('The name of the operation being performed'),
error: z
.string()
.optional()
.describe('Error message if the request failed'),
errorType: z
.string()
.optional()
.describe('Type of error (e.g., rate_limit, timeout, etc.)'),
mcpToolsUsed: z
.array(z.string())
.optional()
.describe('List of MCP tools used during the request'),
});
/**
* TypeScript type inferred from the OpenTelemetryLlmInputSchema Zod schema
*/
export type OpenTelemetryLlmInputSchemaType = z.infer<
typeof OpenTelemetryLlmInputSchema
>;
/**
* Configuration for OpenTelemetry exporters
*/
export interface OpenTelemetryExporterConfig {
url: string;
headers?: Record<string, string>;
timeout?: number;
}
/**
* Supported OpenTelemetry backends
*/
export type OpenTelemetryBackend =
| 'jaeger'
| 'newrelic'
| 'grafana'
| 'datadog'
| 'honeycomb'
| 'lightstep'
| 'custom';
/**
* Pre-configured settings for popular OpenTelemetry backends
*/
export const OpenTelemetryBackendConfigs: Record<
OpenTelemetryBackend,
Partial<OpenTelemetryExporterConfig>
> = {
jaeger: {
url: 'http://localhost:4318/v1/traces',
headers: {},
},
newrelic: {
url: 'https://otlp.nr-data.net:4318/v1/traces',
headers: {
'api-key': process.env.NEW_RELIC_LICENSE_KEY || '',
},
},
grafana: {
url: 'https://otlp-gateway-prod-us-central-0.grafana.net/otlp/v1/traces',
headers: {
Authorization:
process.env.GRAFANA_INSTANCE_ID && process.env.GRAFANA_API_KEY
? `Basic ${Buffer.from(
`${process.env.GRAFANA_INSTANCE_ID}:${process.env.GRAFANA_API_KEY}`,
).toString('base64')}`
: '',
},
},
datadog: {
url: 'https://api.datadoghq.com/api/v2/series',
headers: {
'DD-API-KEY': process.env.DD_API_KEY || '',
},
},
honeycomb: {
url: 'https://api.honeycomb.io/v1/traces',
headers: {
'x-honeycomb-team': process.env.HONEYCOMB_API_KEY || '',
},
},
lightstep: {
url: 'https://ingest.lightstep.com:443/api/v2/otel/trace',
headers: {
'lightstep-access-token': process.env.LIGHTSTEP_ACCESS_TOKEN || '',
},
},
custom: {
url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT || 'http://localhost:4318',
headers: {},
},
};
/**
* Metric names used by the OpenTelemetry service
*/
export const OpenTelemetryMetrics = {
REQUESTS_TOTAL: 'llm.requests.total',
TOKENS_TOTAL: 'llm.tokens.total',
LATENCY_DURATION: 'llm.latency.duration',
REQUESTS_ACTIVE: 'llm.requests.active',
} as const;
/**
* Trace attribute names used by the OpenTelemetry service
*/
export const OpenTelemetryAttributes = {
LLM_MODEL: 'llm.model',
LLM_PROVIDER: 'llm.provider',
LLM_USER_ID: 'llm.user_id',
LLM_OPERATION: 'llm.operation',
LLM_INPUT_TOKENS: 'llm.input_tokens',
LLM_OUTPUT_TOKENS: 'llm.output_tokens',
LLM_TOTAL_TOKENS: 'llm.total_tokens',
LLM_LATENCY_MS: 'llm.latency_ms',
LLM_HTTP_STATUS: 'llm.http_status',
LLM_BASE_URL: 'llm.base_url',
LLM_ERROR: 'llm.error',
LLM_ERROR_TYPE: 'llm.error_type',
LLM_INPUT: 'llm.input',
LLM_OUTPUT: 'llm.output',
LLM_MCP_TOOLS_USED: 'llm.mcp_tools_used',
TRACE_ID: 'trace.id',
} as const;