Initial implementation of opentelemetry-llm tool

This commit is contained in:
2025-07-14 16:27:29 -05:00
parent 63cf87a6c6
commit 1f201a093f
14 changed files with 3191 additions and 61 deletions

View File

@@ -1,9 +1,52 @@
# Enable debug logging # PostHog Configuration (existing)
POSTHOG_API_KEY=your_posthog_api_key_here
POSTHOG_HOST=https://app.posthog.com
# OpenTelemetry Configuration
# ==========================
# Service Configuration
OTEL_SERVICE_NAME=llm-observability-mcp
OTEL_SERVICE_VERSION=1.0.0
OTEL_ENVIRONMENT=development
# OTLP Endpoints
# Uncomment and configure based on your backend
# Jaeger (local development)
# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
# New Relic
# OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
# OTEL_EXPORTER_OTLP_HEADERS=api-key=YOUR_NEW_RELIC_LICENSE_KEY
# Grafana Cloud
# OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
# OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic YOUR_BASE64_ENCODED_CREDENTIALS
# Datadog
# OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/series
# OTEL_EXPORTER_OTLP_HEADERS=DD-API-KEY=YOUR_DD_API_KEY
# Honeycomb
# OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io/v1/traces
# OTEL_EXPORTER_OTLP_HEADERS=x-honeycomb-team=YOUR_API_KEY
# Lightstep
# OTEL_EXPORTER_OTLP_ENDPOINT=https://ingest.lightstep.com:443/api/v2/otel/trace
# OTEL_EXPORTER_OTLP_HEADERS=lightstep-access-token=YOUR_ACCESS_TOKEN
# Separate endpoints (optional)
# OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=http://localhost:4318/v1/metrics
# OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://localhost:4318/v1/traces
# OTEL_EXPORTER_OTLP_LOGS_ENDPOINT=http://localhost:4318/v1/logs
# Export Configuration
OTEL_METRIC_EXPORT_INTERVAL=10000
OTEL_METRIC_EXPORT_TIMEOUT=5000
# Sampling Configuration
OTEL_TRACES_SAMPLER_ARG=1.0
# Debug Mode
DEBUG=false DEBUG=false
# Transport mode
TRANSPORT_MODE=<http | stdio>
# PostHog LLM Observability configuration
POSTHOG_API_KEY=<YourPostHogAPIKeyGoesHere>
POSTHOG_HOST=https://us.i.posthog.com

301
OPENTELEMETRY.md Normal file
View File

@@ -0,0 +1,301 @@
# OpenTelemetry LLM Observability Integration
This MCP server now includes comprehensive OpenTelemetry support for LLM observability, compatible with any OpenTelemetry backend including Jaeger, New Relic, Grafana, Datadog, Honeycomb, and more.
## Features
- **Universal Compatibility**: Works with any OpenTelemetry-compatible backend
- **Comprehensive Metrics**: Request counts, token usage, latency, error rates
- **Distributed Tracing**: Full request lifecycle tracking with spans
- **Flexible Configuration**: Environment-based configuration for different backends
- **Zero-Code Integration**: Drop-in replacement for existing observability tools
## Quick Start
### 1. Install Dependencies
The OpenTelemetry dependencies are already included in the package.json:
```bash
npm install
```
### 2. Configure Your Backend
#### Jaeger (Local Development)
```bash
# Start Jaeger locally
docker run -d --name jaeger \
-e COLLECTOR_OTLP_ENABLED=true \
-p 16686:16686 \
-p 4317:4317 \
-p 4318:4318 \
jaegertracing/all-in-one:latest
# Configure the MCP server
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
export OTEL_SERVICE_NAME=llm-observability-mcp
```
#### New Relic
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_NEW_RELIC_LICENSE_KEY"
export OTEL_SERVICE_NAME=llm-observability-mcp
```
#### Grafana Cloud
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)"
export OTEL_SERVICE_NAME=llm-observability-mcp
```
#### Datadog
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/series
export OTEL_EXPORTER_OTLP_HEADERS="DD-API-KEY=YOUR_DD_API_KEY"
export OTEL_SERVICE_NAME=llm-observability-mcp
```
### 3. Start the MCP Server
```bash
# Start with stdio transport
npm run mcp:stdio
# Start with HTTP transport
npm run mcp:http
```
## Usage
### Using the OpenTelemetry Tool
The MCP server provides a new tool: `capture_llm_observability_opentelemetry`
#### Required Parameters
- `userId`: The distinct ID of the user
- `model`: The model used (e.g., "gpt-4", "claude-3")
- `provider`: The LLM provider (e.g., "openai", "anthropic")
#### Optional Parameters
- `traceId`: Trace ID for grouping related events
- `input`: The input to the LLM (messages, prompt, etc.)
- `outputChoices`: The output from the LLM
- `inputTokens`: Number of tokens in the input
- `outputTokens`: Number of tokens in the output
- `latency`: Latency of the LLM call in seconds
- `httpStatus`: HTTP status code of the LLM call
- `baseUrl`: Base URL of the LLM API
- `operationName`: Name of the operation being performed
- `error`: Error message if the request failed
- `errorType`: Type of error (e.g., "rate_limit", "timeout")
- `mcpToolsUsed`: List of MCP tools used during the request
### Example Usage
```json
{
"userId": "user-123",
"model": "gpt-4",
"provider": "openai",
"inputTokens": 150,
"outputTokens": 75,
"latency": 2.5,
"httpStatus": 200,
"operationName": "chat-completion",
"traceId": "trace-abc123"
}
```
## Configuration Reference
### Environment Variables
| Variable | Description | Default |
|----------|-------------|---------|
| `OTEL_SERVICE_NAME` | Service name for OpenTelemetry | `llm-observability-mcp` |
| `OTEL_SERVICE_VERSION` | Service version | `1.0.0` |
| `OTEL_ENVIRONMENT` | Environment name | `development` |
| `OTEL_EXPORTER_OTLP_ENDPOINT` | Default OTLP endpoint | - |
| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | Metrics endpoint | - |
| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | Traces endpoint | - |
| `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT` | Logs endpoint | - |
| `OTEL_EXPORTER_OTLP_HEADERS` | Headers for authentication (format: "key1=value1,key2=value2") | - |
| `OTEL_METRIC_EXPORT_INTERVAL` | Metrics export interval in ms | `10000` |
| `OTEL_METRIC_EXPORT_TIMEOUT` | Metrics export timeout in ms | `5000` |
| `OTEL_TRACES_SAMPLER_ARG` | Sampling ratio (0.0-1.0) | `1.0` |
### Backend-Specific Configuration
#### New Relic
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_LICENSE_KEY"
```
#### Jaeger
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
```
#### Grafana Cloud
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)"
```
#### Honeycomb
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io/v1/traces
export OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=YOUR_API_KEY"
```
#### Lightstep
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://ingest.lightstep.com:443/api/v2/otel/trace
export OTEL_EXPORTER_OTLP_HEADERS="lightstep-access-token=YOUR_ACCESS_TOKEN"
```
## Metrics Collected
### Counters
- `llm.requests.total`: Total number of LLM requests
- `llm.tokens.total`: Total tokens used (input + output)
### Histograms
- `llm.latency.duration`: Request latency in milliseconds
### Gauges
- `llm.requests.active`: Number of active requests
### Trace Attributes
- `llm.model`: The model used
- `llm.provider`: The provider name
- `llm.user_id`: The user ID
- `llm.operation`: The operation name
- `llm.input_tokens`: Input token count
- `llm.output_tokens`: Output token count
- `llm.total_tokens`: Total token count
- `llm.latency_ms`: Latency in milliseconds
- `llm.http_status`: HTTP status code
- `llm.base_url`: API base URL
- `llm.error`: Error message (if any)
- `llm.error_type`: Error type classification
- `llm.input`: Input content (optional)
- `llm.output`: Output content (optional)
- `llm.mcp_tools_used`: MCP tools used
## Testing with Jaeger
### 1. Start Jaeger
```bash
docker run -d --name jaeger \
-e COLLECTOR_OTLP_ENABLED=true \
-p 16686:16686 \
-p 4317:4317 \
-p 4318:4318 \
jaegertracing/all-in-one:latest
```
### 2. Configure MCP Server
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
export OTEL_SERVICE_NAME=llm-observability-mcp
npm run mcp:stdio
```
### 3. View Traces
Open <http://localhost:16686> to view traces in Jaeger UI.
## Migration from PostHog
The OpenTelemetry tool is designed to be a drop-in replacement for the PostHog tool. Both tools can coexist, allowing for gradual migration:
1. **PostHog Tool**: `capture_llm_observability`
2. **OpenTelemetry Tool**: `capture_llm_observability_opentelemetry`
Both tools accept the same parameters, making migration straightforward.
## Troubleshooting
### Common Issues
#### No Data in Backend
1. Verify endpoint URLs are correct
2. Check authentication headers
3. Ensure network connectivity
4. Check server logs for errors
#### High Resource Usage
1. Adjust sampling ratio: `OTEL_TRACES_SAMPLER_ARG=0.1`
2. Increase export intervals: `OTEL_METRIC_EXPORT_INTERVAL=30000`
#### Missing Traces
1. Verify OpenTelemetry is enabled (check for endpoint configuration)
2. Check for initialization errors in logs
3. Ensure proper service name configuration
### Debug Mode
Enable debug logging:
```bash
export DEBUG=true
npm run mcp:stdio
```
## Advanced Configuration
### Custom Headers
```bash
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer token,Custom-Header=value"
```
### Multiple Backends
Configure different endpoints for metrics and traces:
```bash
export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=https://metrics.example.com
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=https://traces.example.com
```
### Sampling Configuration
```bash
# Sample 10% of traces
export OTEL_TRACES_SAMPLER_ARG=0.1
```
## Support
For issues or questions:
1. Check the troubleshooting section above
2. Review server logs with `DEBUG=true`
3. Verify OpenTelemetry configuration
4. Test with Jaeger locally first

151
README.md
View File

@@ -1,21 +1,24 @@
# LLM Observability MCP for PostHog # LLM Observability MCP Server
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
A Model Context Protocol (MCP) server that provides a tool to capture LLM Observability events and send them to PostHog. A Model Context Protocol (MCP) server that provides comprehensive LLM observability tools supporting both PostHog and OpenTelemetry backends.
## Overview ## Overview
This project is an MCP server designed to track and observe Large Language Model (LLM) interactions using [PostHog's LLM Observability](https://posthog.com/docs/llm-observability) features. It allows you to capture detailed information about LLM requests, responses, performance, and costs, providing valuable insights into your AI-powered applications. This project is an MCP server designed to track and observe Large Language Model (LLM) interactions using both [PostHog's LLM Observability](https://posthog.com/docs/llm-observability) and **OpenTelemetry** for universal observability across any backend that supports OpenTelemetry (Jaeger, New Relic, Grafana, Datadog, Honeycomb, etc.).
The server can be run as a local process communicating over `stdio` or as a remote `http` server, making it compatible with any MCP client, such as AI-powered IDEs (e.g., VS Code with an MCP extension, Cursor) or custom applications. The server can be run as a local process communicating over `stdio` or as a remote `http` server, making it compatible with any MCP client, such as AI-powered IDEs (e.g., VS Code with an MCP extension, Cursor) or custom applications.
## Features ## Features
- **Capture LLM Metrics**: Log key details of LLM interactions, including model, provider, latency, token counts, and more. - **Dual Backend Support**: Choose between PostHog or OpenTelemetry (or use both)
- **Flexible Transport**: Run as a local `stdio` process for tight IDE integration or as a standalone `http` server for remote access. - **Universal OpenTelemetry**: Works with any OpenTelemetry-compatible backend
- **Dynamic Configuration**: Configure the server easily using environment variables. - **Comprehensive Metrics**: Request counts, token usage, latency, error rates
- **Easy Integration**: Connect to MCP-compatible IDEs or use the programmatic client for use in any TypeScript/JavaScript application. - **Distributed Tracing**: Full request lifecycle tracking with spans
- **Flexible Transport**: Run as local `stdio` process or standalone `http` server
- **Dynamic Configuration**: Environment-based configuration for different backends
- **Zero-Code Integration**: Drop-in replacement for existing observability tools
## Installation for Development ## Installation for Development
@@ -46,10 +49,29 @@ Follow these steps to set up the server for local development.
The server is configured via environment variables. The server is configured via environment variables.
### PostHog Configuration
| Variable | Description | Default | Example |
| ----------------- | --------------------------------------------------------------------------- | --------- | ------------------------------------- |
| `POSTHOG_API_KEY` | Your PostHog Project API Key (required for PostHog tool) | - | `phc_...` |
| `POSTHOG_HOST` | The URL of your PostHog instance | - | `https://us.i.posthog.com` |
### OpenTelemetry Configuration
| Variable | Description | Default | Example |
| ------------------------------- | --------------------------------------------------------------------------- | -------------------------- | ------------------------------------- |
| `OTEL_EXPORTER_OTLP_ENDPOINT` | OpenTelemetry collector endpoint | - | `http://localhost:4318` |
| `OTEL_EXPORTER_OTLP_HEADERS` | Headers for authentication (comma-separated key=value pairs) | - | `api-key=YOUR_KEY` |
| `OTEL_SERVICE_NAME` | Service name for traces and metrics | `llm-observability-mcp` | `my-llm-app` |
| `OTEL_SERVICE_VERSION` | Service version | `1.0.0` | `2.1.0` |
| `OTEL_ENVIRONMENT` | Environment name | `development` | `production` |
| `OTEL_TRACES_SAMPLER_ARG` | Sampling ratio (0.0-1.0) | `1.0` | `0.1` |
| `OTEL_METRIC_EXPORT_INTERVAL` | Metrics export interval in milliseconds | `10000` | `30000` |
### General Configuration
| Variable | Description | Default | Example | | Variable | Description | Default | Example |
| ----------------- | --------------------------------------------------------------------------- | --------- | ------------------------------------- | | ----------------- | --------------------------------------------------------------------------- | --------- | ------------------------------------- |
| `POSTHOG_API_KEY` | **Required.** Your PostHog Project API Key. | - | `phc_...` |
| `POSTHOG_HOST` | **Required.** The URL of your PostHog instance. | - | `https://us.i.posthog.com` |
| `TRANSPORT_MODE` | The transport protocol to use. Can be `http` or `stdio`. | `http` | `stdio` | | `TRANSPORT_MODE` | The transport protocol to use. Can be `http` or `stdio`. | `http` | `stdio` |
| `DEBUG` | Set to `true` to enable detailed debug logging. | `false` | `true` | | `DEBUG` | Set to `true` to enable detailed debug logging. | `false` | `true` |
@@ -177,25 +199,98 @@ async function main() {
main().catch(console.error); main().catch(console.error);
``` ```
## Tool Reference: `capture_llm_observability` ## Available Tools
This is the core tool provided by the server. It captures LLM usage in PostHog for observability, including requests, responses, and performance metrics. ### PostHog Tool: `capture_llm_observability`
### Parameters Captures LLM usage in PostHog for observability, including requests, responses, and performance metrics.
| Parameter | Type | Required | Description | ### OpenTelemetry Tool: `capture_llm_observability_opentelemetry`
| --------------- | ------------------- | -------- | ----------------------------------------------- |
| `userId` | `string` | Yes | The distinct ID of the user. | Captures LLM usage using OpenTelemetry for universal observability across any OpenTelemetry-compatible backend.
| `model` | `string` | Yes | The model used (e.g., `gpt-4`, `claude-3`). |
| `provider` | `string` | Yes | The LLM provider (e.g., `openai`, `anthropic`). | ### Parameters Comparison
| `traceId` | `string` | No | The trace ID to group related AI events. |
| `input` | `any` | No | The input to the LLM (e.g., messages, prompt). | | Parameter | Type | Required | Description | PostHog | OpenTelemetry |
| `outputChoices` | `any` | No | The output choices from the LLM. | | --------------- | ------------------- | -------- | ----------------------------------------------- | ------- | ------------- |
| `inputTokens` | `number` | No | The number of tokens in the input. | | `userId` | `string` | Yes | The distinct ID of the user. | ✅ | ✅ |
| `outputTokens` | `number` | No | The number of tokens in the output. | | `model` | `string` | Yes | The model used (e.g., `gpt-4`, `claude-3`). | ✅ | ✅ |
| `latency` | `number` | No | The latency of the LLM call in seconds. | | `provider` | `string` | Yes | The LLM provider (e.g., `openai`, `anthropic`). | ✅ | ✅ |
| `httpStatus` | `number` | No | The HTTP status code of the LLM API call. | | `traceId` | `string` | No | The trace ID to group related AI events. | ✅ | ✅ |
| `baseUrl` | `string` | No | The base URL of the LLM API. | | `input` | `any` | No | The input to the LLM (e.g., messages, prompt). | ✅ | ✅ |
| `outputChoices` | `any` | No | The output choices from the LLM. | ✅ | ✅ |
| `inputTokens` | `number` | No | The number of tokens in the input. | ✅ | ✅ |
| `outputTokens` | `number` | No | The number of tokens in the output. | ✅ | ✅ |
| `latency` | `number` | No | The latency of the LLM call in seconds. | ✅ | ✅ |
| `httpStatus` | `number` | No | The HTTP status code of the LLM API call. | ✅ | ✅ |
| `baseUrl` | `string` | No | The base URL of the LLM API. | ✅ | ✅ |
| `operationName` | `string` | No | The name of the operation being performed. | ❌ | ✅ |
| `error` | `string` | No | Error message if the request failed. | ❌ | ✅ |
| `errorType` | `string` | No | Type of error (e.g., rate_limit, timeout). | ❌ | ✅ |
| `mcpToolsUsed` | `string[]` | No | List of MCP tools used during the request. | ❌ | ✅ |
## Quick Start with OpenTelemetry
### 1. Choose Your Backend
**For local testing with Jaeger:**
```bash
# Start Jaeger with OTLP support
docker run -d --name jaeger \
-e COLLECTOR_OTLP_ENABLED=true \
-p 16686:16686 \
-p 4318:4318 \
jaegertracing/all-in-one:latest
```
**For New Relic:**
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_LICENSE_KEY"
```
### 2. Configure Environment
```bash
# Copy example configuration
cp .env.example .env
# Edit .env with your backend settings
# For Jaeger:
echo "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318" >> .env
echo "OTEL_SERVICE_NAME=llm-observability-mcp" >> .env
```
### 3. Start the Server
```bash
npm run mcp:http
# or
npm run mcp:stdio
```
### 4. Test the Integration
```bash
# Test with curl
curl -X POST http://localhost:3000/mcp \
-H "Content-Type: application/json" \
-d '{
"tool": "capture_llm_observability_opentelemetry",
"arguments": {
"userId": "test-user",
"model": "gpt-4",
"provider": "openai",
"inputTokens": 100,
"outputTokens": 50,
"latency": 1.5,
"httpStatus": 200,
"operationName": "test-completion"
}
}'
```
## Development ## Development
@@ -203,6 +298,12 @@ This is the core tool provided by the server. It captures LLM usage in PostHog f
- **Run tests**: `npm test` - **Run tests**: `npm test`
- **Lint and format**: `npm run lint` and `npm run format` - **Lint and format**: `npm run lint` and `npm run format`
## Documentation
- [OpenTelemetry Setup Guide](OPENTELEMETRY.md) - Complete OpenTelemetry configuration
- [Usage Examples](examples/opentelemetry-usage.md) - Practical examples for different backends
- [Environment Configuration](.env.example) - All available configuration options
## License ## License
[MIT License](https://opensource.org/licenses/MIT) [MIT License](https://opensource.org/licenses/MIT)

View File

@@ -0,0 +1,380 @@
# OpenTelemetry LLM Observability Examples
This document provides practical examples for using the OpenTelemetry LLM observability tool with various backends.
## Example 1: Basic Jaeger Setup
### 1. Start Jaeger
```bash
# Start Jaeger with OTLP support
docker run -d --name jaeger \
-e COLLECTOR_OTLP_ENABLED=true \
-p 16686:16686 \
-p 4317:4317 \
-p 4318:4318 \
jaegertracing/all-in-one:latest
```
### 2. Configure Environment
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
export OTEL_SERVICE_NAME=llm-observability-mcp
export OTEL_ENVIRONMENT=development
```
### 3. Start MCP Server
```bash
npm run mcp:stdio
```
### 4. Test with Claude Desktop
Add to your Claude Desktop configuration:
```json
{
"mcpServers": {
"llm-observability": {
"command": "node",
"args": ["/path/to/llm-observability-mcp/dist/index.js"],
"env": {
"OTEL_EXPORTER_OTLP_ENDPOINT": "http://localhost:4318",
"OTEL_SERVICE_NAME": "llm-observability-mcp",
"OTEL_ENVIRONMENT": "development"
}
}
}
}
```
### 5. View Traces
Open <http://localhost:16686> to see your traces.
## Example 2: New Relic Integration
### 1. Get Your License Key
From New Relic: Account Settings > API Keys > License Key
### 2. Configure Environment
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_LICENSE_KEY"
export OTEL_SERVICE_NAME=llm-observability-mcp
export OTEL_ENVIRONMENT=production
```
### 3. Usage Example
```json
{
"tool": "capture_llm_observability_opentelemetry",
"arguments": {
"userId": "user-12345",
"model": "gpt-4",
"provider": "openai",
"inputTokens": 150,
"outputTokens": 75,
"latency": 2.3,
"httpStatus": 200,
"operationName": "chat-completion",
"traceId": "trace-abc123",
"input": "What is the weather like today?",
"outputChoices": ["The weather is sunny and 75°F today."]
}
}
```
## Example 3: Grafana Cloud
### 1. Get Your Credentials
From Grafana Cloud: Connections > Data Sources > OpenTelemetry
### 2. Configure Environment
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)"
export OTEL_SERVICE_NAME=llm-observability-mcp
```
### 3. Docker Compose Setup
```yaml
# docker-compose.yml
version: '3.8'
services:
llm-observability:
build: .
environment:
- OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
- OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic YOUR_BASE64_ENCODED_CREDENTIALS
- OTEL_SERVICE_NAME=llm-observability-mcp
ports:
- "3000:3000"
```
## Example 4: Honeycomb
### 1. Get Your API Key
From Honeycomb: Account Settings > API Keys
### 2. Configure Environment
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io/v1/traces
export OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=YOUR_API_KEY"
export OTEL_SERVICE_NAME=llm-observability-mcp
export OTEL_ENVIRONMENT=production
```
## Example 5: Datadog
### 1. Get Your API Key
From Datadog: Organization Settings > API Keys
### 2. Configure Environment
```bash
export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/series
export OTEL_EXPORTER_OTLP_HEADERS="DD-API-KEY=YOUR_API_KEY"
export OTEL_SERVICE_NAME=llm-observability-mcp
```
## Example 6: Production Configuration
### Environment Variables
```bash
# Service Configuration
export OTEL_SERVICE_NAME=llm-observability-mcp
export OTEL_SERVICE_VERSION=1.2.3
export OTEL_ENVIRONMENT=production
# Sampling (10% of traces)
export OTEL_TRACES_SAMPLER_ARG=0.1
# Export Configuration
export OTEL_METRIC_EXPORT_INTERVAL=30000
export OTEL_METRIC_EXPORT_TIMEOUT=10000
# Backend Configuration
export OTEL_EXPORTER_OTLP_ENDPOINT=https://your-backend.com:4318
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer your-token,Custom-Header=value"
```
### Kubernetes Deployment
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-observability-mcp
spec:
replicas: 3
selector:
matchLabels:
app: llm-observability-mcp
template:
metadata:
labels:
app: llm-observability-mcp
spec:
containers:
- name: llm-observability-mcp
image: llm-observability-mcp:latest
ports:
- containerPort: 3000
env:
- name: OTEL_SERVICE_NAME
value: "llm-observability-mcp"
- name: OTEL_SERVICE_VERSION
value: "1.2.3"
- name: OTEL_ENVIRONMENT
value: "production"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "https://your-backend.com:4318"
- name: OTEL_EXPORTER_OTLP_HEADERS
valueFrom:
secretKeyRef:
name: otel-credentials
key: headers
```
## Example 7: Error Handling and Monitoring
### Error Tracking
```json
{
"tool": "capture_llm_observability_opentelemetry",
"arguments": {
"userId": "user-12345",
"model": "gpt-4",
"provider": "openai",
"httpStatus": 429,
"error": "Rate limit exceeded",
"errorType": "rate_limit",
"latency": 0.1,
"operationName": "chat-completion"
}
}
```
### Multi-Tool Usage Tracking
```json
{
"tool": "capture_llm_observability_opentelemetry",
"arguments": {
"userId": "user-12345",
"model": "gpt-4",
"provider": "openai",
"inputTokens": 500,
"outputTokens": 200,
"latency": 5.2,
"httpStatus": 200,
"operationName": "complex-workflow",
"mcpToolsUsed": ["file_read", "web_search", "code_execution"],
"traceId": "complex-workflow-123"
}
}
```
## Example 8: Testing Script
### Test Script
```bash
#!/bin/bash
# test-opentelemetry.sh
# Start Jaeger
echo "Starting Jaeger..."
docker run -d --name jaeger-test \
-e COLLECTOR_OTLP_ENABLED=true \
-p 16686:16686 \
-p 4318:4318 \
jaegertracing/all-in-one:latest
# Wait for Jaeger to start
sleep 5
# Configure environment
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
export OTEL_SERVICE_NAME=llm-observability-test
export OTEL_ENVIRONMENT=test
# Start MCP server in background
echo "Starting MCP server..."
npm run mcp:stdio &
# Wait for server to start
sleep 3
# Test the tool
echo "Testing OpenTelemetry tool..."
curl -X POST http://localhost:3000/mcp \
-H "Content-Type: application/json" \
-d '{
"tool": "capture_llm_observability_opentelemetry",
"arguments": {
"userId": "test-user",
"model": "gpt-4",
"provider": "openai",
"inputTokens": 100,
"outputTokens": 50,
"latency": 1.5,
"httpStatus": 200,
"operationName": "test-completion"
}
}'
echo "Test complete. View traces at http://localhost:16686"
```
## Example 9: Integration with Existing Tools
### Gradual Migration from PostHog
You can use both tools simultaneously during migration:
```json
// PostHog (existing)
{
"tool": "capture_llm_observability",
"arguments": {
"userId": "user-123",
"model": "gpt-4",
"provider": "openai"
}
}
// OpenTelemetry (new)
{
"tool": "capture_llm_observability_opentelemetry",
"arguments": {
"userId": "user-123",
"model": "gpt-4",
"provider": "openai"
}
}
```
## Troubleshooting Examples
### Debug Mode
```bash
export DEBUG=true
npm run mcp:stdio
```
### Check Configuration
```bash
# Test connectivity
curl -X POST http://localhost:4318/v1/traces \
-H "Content-Type: application/json" \
-d '{"resourceSpans":[]}'
```
### Verify Environment
```bash
# Check environment variables
env | grep OTEL
```
## Performance Tuning
### High-Volume Configuration
```bash
# Reduce sampling for high-volume
export OTEL_TRACES_SAMPLER_ARG=0.01
# Increase export intervals
export OTEL_METRIC_EXPORT_INTERVAL=60000
export OTEL_METRIC_EXPORT_TIMEOUT=30000
```
### Resource Optimization
```bash
# Disable metrics if only traces needed
unset OTEL_EXPORTER_OTLP_METRICS_ENDPOINT
# Disable logs if not needed
unset OTEL_EXPORTER_OTLP_LOGS_ENDPOINT
```
These examples should help you get started with OpenTelemetry LLM observability across different backends and use cases.

1587
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
{ {
"name": "@sfiorini/llm-observability-mcp", "name": "@sfiorini/llm-observability-mcp",
"version": "0.1.0", "version": "0.1.0",
"description": "A Model Context Protocol (MCP) server that provides a tool to capture LLM Observability events and send them to PostHog.", "description": "A Model Context Protocol (MCP) server that provides comprehensive LLM observability tools supporting both PostHog and OpenTelemetry backends.",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",
"type": "commonjs", "type": "commonjs",
@@ -76,6 +76,15 @@
}, },
"dependencies": { "dependencies": {
"@modelcontextprotocol/sdk": "^1.15.1", "@modelcontextprotocol/sdk": "^1.15.1",
"@opentelemetry/api": "^1.9.0",
"@opentelemetry/exporter-otlp-grpc": "^0.26.0",
"@opentelemetry/exporter-otlp-http": "^0.26.0",
"@opentelemetry/instrumentation": "^0.203.0",
"@opentelemetry/resources": "^2.0.1",
"@opentelemetry/sdk-metrics": "^2.0.1",
"@opentelemetry/sdk-node": "^0.203.0",
"@opentelemetry/sdk-trace-node": "^2.0.1",
"@opentelemetry/semantic-conventions": "^1.36.0",
"commander": "^14.0.0", "commander": "^14.0.0",
"cors": "^2.8.5", "cors": "^2.8.5",
"dotenv": "^17.2.0", "dotenv": "^17.2.0",
@@ -113,4 +122,4 @@
".ts" ".ts"
] ]
} }
} }

View File

@@ -0,0 +1,130 @@
import { z } from 'zod';
/**
* Configuration schema for OpenTelemetry
*/
export const OpenTelemetryConfigSchema = z.object({
// Service configuration
serviceName: z.string().default('llm-observability-mcp'),
serviceVersion: z.string().default('1.0.0'),
environment: z.string().default('development'),
// OTLP endpoints
metricsEndpoint: z.string().optional(),
tracesEndpoint: z.string().optional(),
logsEndpoint: z.string().optional(),
// Authentication headers
headers: z.record(z.string()).optional(),
// Export configuration
exportIntervalMillis: z.number().default(10000),
exportTimeoutMillis: z.number().default(5000),
// Sampling configuration
samplingRatio: z.number().min(0).max(1).default(1.0),
});
export type OpenTelemetryConfigType = z.infer<typeof OpenTelemetryConfigSchema>;
/**
* OpenTelemetry configuration class
*/
export class OpenTelemetryConfig {
public readonly serviceName: string;
public readonly serviceVersion: string;
public readonly environment: string;
public readonly metricsEndpoint?: string;
public readonly tracesEndpoint?: string;
public readonly logsEndpoint?: string;
public readonly headers?: Record<string, string>;
public readonly exportIntervalMillis: number;
public readonly exportTimeoutMillis: number;
public readonly samplingRatio: number;
constructor(config: OpenTelemetryConfigType) {
this.serviceName = config.serviceName;
this.serviceVersion = config.serviceVersion;
this.environment = config.environment;
this.metricsEndpoint = config.metricsEndpoint;
this.tracesEndpoint = config.tracesEndpoint;
this.logsEndpoint = config.logsEndpoint;
this.headers = config.headers;
this.exportIntervalMillis = config.exportIntervalMillis;
this.exportTimeoutMillis = config.exportTimeoutMillis;
this.samplingRatio = config.samplingRatio;
}
/**
* Create configuration from environment variables
*/
public static fromEnv(): OpenTelemetryConfig {
const config = OpenTelemetryConfigSchema.parse({
serviceName:
process.env.OTEL_SERVICE_NAME || 'llm-observability-mcp',
serviceVersion: process.env.OTEL_SERVICE_VERSION || '1.0.0',
environment: process.env.OTEL_ENVIRONMENT || 'development',
metricsEndpoint: process.env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT,
tracesEndpoint: process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
logsEndpoint: process.env.OTEL_EXPORTER_OTLP_LOGS_ENDPOINT,
headers: process.env.OTEL_EXPORTER_OTLP_HEADERS
? parseHeaders(process.env.OTEL_EXPORTER_OTLP_HEADERS)
: undefined,
exportIntervalMillis: parseInt(
process.env.OTEL_METRIC_EXPORT_INTERVAL || '10000',
10,
),
exportTimeoutMillis: parseInt(
process.env.OTEL_METRIC_EXPORT_TIMEOUT || '5000',
10,
),
samplingRatio: parseFloat(
process.env.OTEL_TRACES_SAMPLER_ARG || '1.0',
),
});
return new OpenTelemetryConfig(config);
}
/**
* Check if OpenTelemetry is enabled
*/
public isEnabled(): boolean {
return !!(
this.metricsEndpoint ||
this.tracesEndpoint ||
this.logsEndpoint
);
}
/**
* Get combined OTLP endpoint if only one is provided
*/
public getDefaultEndpoint(): string | undefined {
if (
this.metricsEndpoint &&
this.tracesEndpoint === this.metricsEndpoint
) {
return this.metricsEndpoint;
}
return this.metricsEndpoint || this.tracesEndpoint;
}
}
/**
* Parse headers from environment variable string
* Format: "key1=value1,key2=value2"
*/
function parseHeaders(headersString: string): Record<string, string> {
const headers: Record<string, string> = {};
const pairs = headersString.split(',');
for (const pair of pairs) {
const [key, value] = pair.split('=');
if (key && value) {
headers[key.trim()] = value.trim();
}
}
return headers;
}

View File

@@ -0,0 +1,74 @@
import { Logger } from '../utils/logger.util.js';
import { OpenTelemetryService } from '../services/opentelemetry-llm.service.js';
import { OpenTelemetryLlmInputSchemaType } from '../types/opentelemetry-llm.types.js';
import { ControllerResponse } from '../types/common.types.js';
/**
* Controller for OpenTelemetry LLM observability
*/
export class OpenTelemetryController {
private service: OpenTelemetryService;
constructor(service: OpenTelemetryService) {
this.service = service;
}
/**
* Capture LLM observability data
*/
public async captureLlmObservability(
data: OpenTelemetryLlmInputSchemaType,
): Promise<ControllerResponse> {
const logger = Logger.forContext(
'opentelemetry.controller',
'captureLlmObservability',
);
logger.debug('Capturing LLM observability data', data);
try {
// Record the LLM request
this.service.recordLlmRequest(data);
const content = `## OpenTelemetry LLM Observability Captured
Successfully recorded LLM observability data:
- **Model**: ${data.model}
- **Provider**: ${data.provider}
- **User ID**: ${data.userId}
- **Operation**: ${data.operationName || 'generate'}
- **Input Tokens**: ${data.inputTokens || 'N/A'}
- **Output Tokens**: ${data.outputTokens || 'N/A'}
- **Total Tokens**: ${(data.inputTokens || 0) + (data.outputTokens || 0)}
- **Latency**: ${data.latency ? `${data.latency}s` : 'N/A'}
- **Status**: ${data.error ? 'Error' : 'Success'}
${data.error ? `- **Error**: ${data.error}` : ''}
### Metrics Recorded
- ✅ Request counter incremented
- ✅ Token usage recorded
- ✅ Latency histogram updated
- ✅ Distributed trace created
### Trace Details
- **Trace ID**: ${data.traceId || 'auto-generated'}
- **Span Name**: ${data.operationName || 'llm.generate'}
- **Attributes**: model, provider, user_id, tokens, latency, error details
The data has been sent to your configured OpenTelemetry collector and is now available in your observability platform (Jaeger, New Relic, Grafana, etc.).`;
return { content };
} catch (error) {
logger.error('Error capturing LLM observability', error);
throw error;
}
}
}
// Export singleton instance
const openTelemetryService = OpenTelemetryService.getInstance();
const openTelemetryController = new OpenTelemetryController(
openTelemetryService,
);
export default openTelemetryController;

View File

@@ -7,6 +7,7 @@ import { config } from '../utils/config.util';
import { PACKAGE_NAME, VERSION } from '../utils/constants.util'; import { PACKAGE_NAME, VERSION } from '../utils/constants.util';
import posthogLlmResources from '../resources/posthog-llm.resource.js'; import posthogLlmResources from '../resources/posthog-llm.resource.js';
import posthogLlmTools from '../tools/posthog-llm.tool.js'; import posthogLlmTools from '../tools/posthog-llm.tool.js';
import openTelemetryTools from '../tools/opentelemetry-llm.tool.js';
export function createServer() { export function createServer() {
const serverLogger = Logger.forContext('utils/server.util.ts', 'getServer'); const serverLogger = Logger.forContext('utils/server.util.ts', 'getServer');
@@ -29,6 +30,7 @@ export function createServer() {
serverLogger.info('Registering MCP tools and resources...'); serverLogger.info('Registering MCP tools and resources...');
posthogLlmTools.registerTools(server); posthogLlmTools.registerTools(server);
posthogLlmResources.registerResources(server); posthogLlmResources.registerResources(server);
openTelemetryTools.registerTools(server);
serverLogger.debug('All tools and resources registered'); serverLogger.debug('All tools and resources registered');
return server; return server;

View File

@@ -0,0 +1,294 @@
import {
MeterProvider,
PeriodicExportingMetricReader,
} from '@opentelemetry/sdk-metrics';
import { NodeSDK } from '@opentelemetry/sdk-node';
import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-node';
import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
import { resourceFromAttributes } from '@opentelemetry/resources';
import {
ATTR_SERVICE_NAME,
ATTR_SERVICE_VERSION,
SEMRESATTRS_DEPLOYMENT_ENVIRONMENT,
} from '@opentelemetry/semantic-conventions';
import { Logger } from '../utils/logger.util.js';
import { OpenTelemetryConfig } from '../config/opentelemetry-llm.config.js';
import { OpenTelemetryLlmInputSchemaType } from '../types/opentelemetry-llm.types.js';
import {
Meter,
Counter,
Histogram,
UpDownCounter,
metrics,
} from '@opentelemetry/api';
import { trace, Tracer, SpanStatusCode } from '@opentelemetry/api';
/**
* OpenTelemetry service for LLM observability
*/
export class OpenTelemetryService {
private static instance: OpenTelemetryService;
private sdk: NodeSDK | null = null;
private meterProvider: MeterProvider | null = null;
private tracer: Tracer;
private meter: Meter;
private config: OpenTelemetryConfig;
// Metrics
private requestCounter: Counter;
private tokenCounter: Counter;
private latencyHistogram: Histogram;
private activeRequests: UpDownCounter;
private constructor(config: OpenTelemetryConfig) {
this.config = config;
this.tracer = trace.getTracer('llm-observability-mcp');
this.meter = this.initializeMetrics();
// Initialize metrics
this.requestCounter = this.meter.createCounter('llm.requests.total', {
description: 'Total number of LLM requests',
});
this.tokenCounter = this.meter.createCounter('llm.tokens.total', {
description: 'Total number of tokens processed',
});
this.latencyHistogram = this.meter.createHistogram(
'llm.latency.duration',
{
description: 'Duration of LLM requests in milliseconds',
unit: 'ms',
},
);
this.activeRequests = this.meter.createUpDownCounter(
'llm.requests.active',
{
description: 'Number of active LLM requests',
},
);
}
/**
* Get singleton instance
*/
public static getInstance(
config?: OpenTelemetryConfig,
): OpenTelemetryService {
if (!OpenTelemetryService.instance) {
if (!config) {
throw new Error(
'OpenTelemetryService requires configuration on first initialization',
);
}
OpenTelemetryService.instance = new OpenTelemetryService(config);
}
return OpenTelemetryService.instance;
}
/**
* Initialize OpenTelemetry SDK
*/
public initialize(): void {
const logger = Logger.forContext('opentelemetry.service', 'initialize');
if (this.sdk) {
logger.warn('OpenTelemetry SDK already initialized');
return;
}
try {
const resource = resourceFromAttributes({
[ATTR_SERVICE_NAME]: this.config.serviceName,
[ATTR_SERVICE_VERSION]: this.config.serviceVersion,
[SEMRESATTRS_DEPLOYMENT_ENVIRONMENT]: this.config.environment,
});
// Configure metric exporter
const metricExporter = this.config.metricsEndpoint
? new OTLPMetricExporter({
url: this.config.metricsEndpoint,
headers: this.config.headers,
})
: undefined;
// Configure trace exporter
const traceExporter = this.config.tracesEndpoint
? new OTLPTraceExporter({
url: this.config.tracesEndpoint,
headers: this.config.headers,
})
: undefined;
// Initialize meter provider
if (metricExporter) {
this.meterProvider = new MeterProvider({
resource,
readers: [
new PeriodicExportingMetricReader({
exporter: metricExporter,
exportIntervalMillis:
this.config.exportIntervalMillis,
exportTimeoutMillis:
this.config.exportTimeoutMillis,
}),
],
});
}
// Initialize SDK
this.sdk = new NodeSDK({
resource,
spanProcessor: traceExporter
? new BatchSpanProcessor(traceExporter)
: undefined,
});
this.sdk.start();
logger.info('OpenTelemetry SDK initialized successfully');
} catch (error) {
logger.error('Failed to initialize OpenTelemetry SDK', error);
throw error;
}
}
/**
* Initialize metrics
*/
private initializeMetrics(): Meter {
if (this.meterProvider) {
return this.meterProvider.getMeter('llm-observability-mcp');
}
return metrics.getMeter('llm-observability-mcp');
}
/**
* Record LLM request metrics and traces
*/
public recordLlmRequest(data: OpenTelemetryLlmInputSchemaType): void {
const labels = {
model: data.model,
provider: data.provider,
userId: data.userId,
operationName: data.operationName || 'generate',
status: data.error ? 'error' : 'success',
errorType: data.errorType,
mcpToolsUsed: data.mcpToolsUsed?.join(',') || '',
};
// Record metrics
this.requestCounter.add(1, labels);
if (data.inputTokens || data.outputTokens) {
const totalTokens =
(data.inputTokens || 0) + (data.outputTokens || 0);
this.tokenCounter.add(totalTokens, {
...labels,
type: 'total',
});
if (data.inputTokens) {
this.tokenCounter.add(data.inputTokens, {
...labels,
type: 'input',
});
}
if (data.outputTokens) {
this.tokenCounter.add(data.outputTokens, {
...labels,
type: 'output',
});
}
}
if (data.latency) {
this.latencyHistogram.record(data.latency * 1000, labels);
}
// Create trace
this.createLlmTrace(data);
}
/**
* Create distributed trace for LLM request
*/
private createLlmTrace(data: OpenTelemetryLlmInputSchemaType): void {
const spanName = data.operationName || 'llm.generate';
const span = this.tracer.startSpan(spanName, {
attributes: {
'llm.model': data.model,
'llm.provider': data.provider,
'llm.user_id': data.userId,
'llm.operation': data.operationName || 'generate',
'llm.input_tokens': data.inputTokens,
'llm.output_tokens': data.outputTokens,
'llm.total_tokens':
(data.inputTokens || 0) + (data.outputTokens || 0),
'llm.latency_ms': data.latency
? data.latency * 1000
: undefined,
'llm.http_status': data.httpStatus,
'llm.base_url': data.baseUrl,
'llm.error': data.error,
'llm.error_type': data.errorType,
'llm.input':
typeof data.input === 'string'
? data.input
: JSON.stringify(data.input),
'llm.output':
typeof data.outputChoices === 'string'
? data.outputChoices
: JSON.stringify(data.outputChoices),
'llm.mcp_tools_used': data.mcpToolsUsed?.join(','),
},
});
if (data.traceId) {
span.setAttribute('trace.id', data.traceId);
}
if (data.error) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: data.error,
});
} else {
span.setStatus({ code: SpanStatusCode.OK });
}
span.end();
}
/**
* Record active request count
*/
public incrementActiveRequests(): void {
this.activeRequests.add(1);
}
/**
* Decrement active request count
*/
public decrementActiveRequests(): void {
this.activeRequests.add(-1);
}
/**
* Shutdown OpenTelemetry SDK
*/
public async shutdown(): Promise<void> {
const logger = Logger.forContext('opentelemetry.service', 'shutdown');
if (this.sdk) {
try {
await this.sdk.shutdown();
logger.info('OpenTelemetry SDK shutdown successfully');
} catch (error) {
logger.error('Error shutting down OpenTelemetry SDK', error);
}
}
}
}

View File

@@ -0,0 +1,91 @@
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { Logger } from '../utils/logger.util.js';
import { formatErrorForMcpTool } from '../utils/error.util.js';
import { OpenTelemetryService } from '../services/opentelemetry-llm.service.js';
import { OpenTelemetryController } from '../controllers/opentelemetry-llm.controller.js';
import { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import {
OpenTelemetryLlmInputSchema,
OpenTelemetryLlmInputSchemaType,
} from '../types/opentelemetry-llm.types.js';
import { OpenTelemetryConfig } from '../config/opentelemetry-llm.config.js';
/**
* @function captureOpenTelemetryLlmObservability
* @description MCP Tool handler to capture LLM observability events using OpenTelemetry.
* It records metrics, traces, and spans for LLM requests that can be sent to any OpenTelemetry-compatible backend.
* @param {OpenTelemetryLlmInputSchemaType} args - Arguments provided to the tool.
* @returns {Promise<CallToolResult>} Formatted response for the MCP.
* @throws {McpError} Formatted error if the controller or service layer encounters an issue.
*/
async function captureOpenTelemetryLlmObservability(
args: OpenTelemetryLlmInputSchemaType,
): Promise<CallToolResult> {
const methodLogger = Logger.forContext(
'opentelemetry.tool',
'captureOpenTelemetryLlmObservability',
);
methodLogger.debug('Capture LLM Observability with OpenTelemetry...', args);
try {
// Parse and validate arguments
const validatedArgs = OpenTelemetryLlmInputSchema.parse(args);
// Ensure OpenTelemetry is configured
const config = OpenTelemetryConfig.fromEnv();
const service = OpenTelemetryService.getInstance(config);
// Initialize if not already done
service.initialize();
// Create controller with the service
const controller = new OpenTelemetryController(service);
// Pass validated args to the controller
const result = await controller.captureLlmObservability(validatedArgs);
methodLogger.debug('Got response from controller', result);
// Format the response for the MCP tool
return {
content: [
{
type: 'text' as const,
text: result.content,
},
],
};
} catch (error) {
methodLogger.error(
'Error tracking LLM generation with OpenTelemetry',
error,
);
return formatErrorForMcpTool(error);
}
}
/**
* @function registerTools
* @description Registers the OpenTelemetry LLM observability tool with the MCP server.
*
* @param {McpServer} server - The MCP server instance.
*/
function registerTools(server: McpServer) {
const methodLogger = Logger.forContext(
'opentelemetry.tool',
'registerTools',
);
methodLogger.debug('Registering OpenTelemetry LLM observability tools...');
server.tool(
'capture_llm_observability_opentelemetry',
`Captures LLM usage using OpenTelemetry for observability, including requests, responses, and performance metrics. Works with any OpenTelemetry-compatible backend like Jaeger, New Relic, Grafana, etc.`,
OpenTelemetryLlmInputSchema.shape,
captureOpenTelemetryLlmObservability,
);
methodLogger.debug(
'Successfully registered capture_llm_observability_opentelemetry tool.',
);
}
export default { registerTools };

View File

@@ -6,7 +6,7 @@ import { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import { import {
GetToolInputSchema, GetToolInputSchema,
GetToolInputSchemaType, GetToolInputSchemaType,
} from './posthog-llm.types.js'; } from '../types/posthog-llm.types.js';
/** /**
* @function capturePosthogLlmObservability * @function capturePosthogLlmObservability

View File

@@ -0,0 +1,168 @@
import { z } from 'zod';
/**
* Zod schema for the OpenTelemetry LLM observability tool arguments
*/
export const OpenTelemetryLlmInputSchema = z.object({
// Required fields
userId: z.string().describe('The distinct ID of the user'),
model: z.string().describe('The model used (e.g., gpt-4, claude-3, etc.)'),
provider: z
.string()
.describe('The LLM provider (e.g., openai, anthropic, etc.)'),
// Optional fields
traceId: z.string().optional().describe('The trace ID to group AI events'),
input: z
.any()
.optional()
.describe('The input to the LLM (messages, prompt, etc.)'),
outputChoices: z.any().optional().describe('The output from the LLM'),
inputTokens: z
.number()
.optional()
.describe('The number of tokens in the input'),
outputTokens: z
.number()
.optional()
.describe('The number of tokens in the output'),
latency: z
.number()
.optional()
.describe('The latency of the LLM call in seconds'),
httpStatus: z
.number()
.optional()
.describe('The HTTP status code of the LLM call'),
baseUrl: z.string().optional().describe('The base URL of the LLM API'),
// OpenTelemetry specific fields
operationName: z
.string()
.optional()
.describe('The name of the operation being performed'),
error: z
.string()
.optional()
.describe('Error message if the request failed'),
errorType: z
.string()
.optional()
.describe('Type of error (e.g., rate_limit, timeout, etc.)'),
mcpToolsUsed: z
.array(z.string())
.optional()
.describe('List of MCP tools used during the request'),
});
/**
* TypeScript type inferred from the OpenTelemetryLlmInputSchema Zod schema
*/
export type OpenTelemetryLlmInputSchemaType = z.infer<
typeof OpenTelemetryLlmInputSchema
>;
/**
* Configuration for OpenTelemetry exporters
*/
export interface OpenTelemetryExporterConfig {
url: string;
headers?: Record<string, string>;
timeout?: number;
}
/**
* Supported OpenTelemetry backends
*/
export type OpenTelemetryBackend =
| 'jaeger'
| 'newrelic'
| 'grafana'
| 'datadog'
| 'honeycomb'
| 'lightstep'
| 'custom';
/**
* Pre-configured settings for popular OpenTelemetry backends
*/
export const OpenTelemetryBackendConfigs: Record<
OpenTelemetryBackend,
Partial<OpenTelemetryExporterConfig>
> = {
jaeger: {
url: 'http://localhost:4318/v1/traces',
headers: {},
},
newrelic: {
url: 'https://otlp.nr-data.net:4318/v1/traces',
headers: {
'api-key': process.env.NEW_RELIC_LICENSE_KEY || '',
},
},
grafana: {
url: 'https://otlp-gateway-prod-us-central-0.grafana.net/otlp/v1/traces',
headers: {
Authorization:
process.env.GRAFANA_INSTANCE_ID && process.env.GRAFANA_API_KEY
? `Basic ${Buffer.from(
`${process.env.GRAFANA_INSTANCE_ID}:${process.env.GRAFANA_API_KEY}`,
).toString('base64')}`
: '',
},
},
datadog: {
url: 'https://api.datadoghq.com/api/v2/series',
headers: {
'DD-API-KEY': process.env.DD_API_KEY || '',
},
},
honeycomb: {
url: 'https://api.honeycomb.io/v1/traces',
headers: {
'x-honeycomb-team': process.env.HONEYCOMB_API_KEY || '',
},
},
lightstep: {
url: 'https://ingest.lightstep.com:443/api/v2/otel/trace',
headers: {
'lightstep-access-token': process.env.LIGHTSTEP_ACCESS_TOKEN || '',
},
},
custom: {
url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT || 'http://localhost:4318',
headers: {},
},
};
/**
* Metric names used by the OpenTelemetry service
*/
export const OpenTelemetryMetrics = {
REQUESTS_TOTAL: 'llm.requests.total',
TOKENS_TOTAL: 'llm.tokens.total',
LATENCY_DURATION: 'llm.latency.duration',
REQUESTS_ACTIVE: 'llm.requests.active',
} as const;
/**
* Trace attribute names used by the OpenTelemetry service
*/
export const OpenTelemetryAttributes = {
LLM_MODEL: 'llm.model',
LLM_PROVIDER: 'llm.provider',
LLM_USER_ID: 'llm.user_id',
LLM_OPERATION: 'llm.operation',
LLM_INPUT_TOKENS: 'llm.input_tokens',
LLM_OUTPUT_TOKENS: 'llm.output_tokens',
LLM_TOTAL_TOKENS: 'llm.total_tokens',
LLM_LATENCY_MS: 'llm.latency_ms',
LLM_HTTP_STATUS: 'llm.http_status',
LLM_BASE_URL: 'llm.base_url',
LLM_ERROR: 'llm.error',
LLM_ERROR_TYPE: 'llm.error_type',
LLM_INPUT: 'llm.input',
LLM_OUTPUT: 'llm.output',
LLM_MCP_TOOLS_USED: 'llm.mcp_tools_used',
TRACE_ID: 'trace.id',
} as const;