Initial implementation of opentelemetry-llm tool
This commit is contained in:
59
.env.example
59
.env.example
@@ -1,9 +1,52 @@
|
|||||||
# Enable debug logging
|
# PostHog Configuration (existing)
|
||||||
|
POSTHOG_API_KEY=your_posthog_api_key_here
|
||||||
|
POSTHOG_HOST=https://app.posthog.com
|
||||||
|
|
||||||
|
# OpenTelemetry Configuration
|
||||||
|
# ==========================
|
||||||
|
|
||||||
|
# Service Configuration
|
||||||
|
OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
OTEL_SERVICE_VERSION=1.0.0
|
||||||
|
OTEL_ENVIRONMENT=development
|
||||||
|
|
||||||
|
# OTLP Endpoints
|
||||||
|
# Uncomment and configure based on your backend
|
||||||
|
|
||||||
|
# Jaeger (local development)
|
||||||
|
# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
||||||
|
|
||||||
|
# New Relic
|
||||||
|
# OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
|
||||||
|
# OTEL_EXPORTER_OTLP_HEADERS=api-key=YOUR_NEW_RELIC_LICENSE_KEY
|
||||||
|
|
||||||
|
# Grafana Cloud
|
||||||
|
# OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
|
||||||
|
# OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic YOUR_BASE64_ENCODED_CREDENTIALS
|
||||||
|
|
||||||
|
# Datadog
|
||||||
|
# OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/series
|
||||||
|
# OTEL_EXPORTER_OTLP_HEADERS=DD-API-KEY=YOUR_DD_API_KEY
|
||||||
|
|
||||||
|
# Honeycomb
|
||||||
|
# OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io/v1/traces
|
||||||
|
# OTEL_EXPORTER_OTLP_HEADERS=x-honeycomb-team=YOUR_API_KEY
|
||||||
|
|
||||||
|
# Lightstep
|
||||||
|
# OTEL_EXPORTER_OTLP_ENDPOINT=https://ingest.lightstep.com:443/api/v2/otel/trace
|
||||||
|
# OTEL_EXPORTER_OTLP_HEADERS=lightstep-access-token=YOUR_ACCESS_TOKEN
|
||||||
|
|
||||||
|
# Separate endpoints (optional)
|
||||||
|
# OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=http://localhost:4318/v1/metrics
|
||||||
|
# OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://localhost:4318/v1/traces
|
||||||
|
# OTEL_EXPORTER_OTLP_LOGS_ENDPOINT=http://localhost:4318/v1/logs
|
||||||
|
|
||||||
|
# Export Configuration
|
||||||
|
OTEL_METRIC_EXPORT_INTERVAL=10000
|
||||||
|
OTEL_METRIC_EXPORT_TIMEOUT=5000
|
||||||
|
|
||||||
|
# Sampling Configuration
|
||||||
|
OTEL_TRACES_SAMPLER_ARG=1.0
|
||||||
|
|
||||||
|
# Debug Mode
|
||||||
DEBUG=false
|
DEBUG=false
|
||||||
|
|
||||||
# Transport mode
|
|
||||||
TRANSPORT_MODE=<http | stdio>
|
|
||||||
|
|
||||||
# PostHog LLM Observability configuration
|
|
||||||
POSTHOG_API_KEY=<YourPostHogAPIKeyGoesHere>
|
|
||||||
POSTHOG_HOST=https://us.i.posthog.com
|
|
||||||
|
|||||||
301
OPENTELEMETRY.md
Normal file
301
OPENTELEMETRY.md
Normal file
@@ -0,0 +1,301 @@
|
|||||||
|
# OpenTelemetry LLM Observability Integration
|
||||||
|
|
||||||
|
This MCP server now includes comprehensive OpenTelemetry support for LLM observability, compatible with any OpenTelemetry backend including Jaeger, New Relic, Grafana, Datadog, Honeycomb, and more.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Universal Compatibility**: Works with any OpenTelemetry-compatible backend
|
||||||
|
- **Comprehensive Metrics**: Request counts, token usage, latency, error rates
|
||||||
|
- **Distributed Tracing**: Full request lifecycle tracking with spans
|
||||||
|
- **Flexible Configuration**: Environment-based configuration for different backends
|
||||||
|
- **Zero-Code Integration**: Drop-in replacement for existing observability tools
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### 1. Install Dependencies
|
||||||
|
|
||||||
|
The OpenTelemetry dependencies are already included in the package.json:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure Your Backend
|
||||||
|
|
||||||
|
#### Jaeger (Local Development)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start Jaeger locally
|
||||||
|
docker run -d --name jaeger \
|
||||||
|
-e COLLECTOR_OTLP_ENABLED=true \
|
||||||
|
-p 16686:16686 \
|
||||||
|
-p 4317:4317 \
|
||||||
|
-p 4318:4318 \
|
||||||
|
jaegertracing/all-in-one:latest
|
||||||
|
|
||||||
|
# Configure the MCP server
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
```
|
||||||
|
|
||||||
|
#### New Relic
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_NEW_RELIC_LICENSE_KEY"
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Grafana Cloud
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)"
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Datadog
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/series
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="DD-API-KEY=YOUR_DD_API_KEY"
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Start the MCP Server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start with stdio transport
|
||||||
|
npm run mcp:stdio
|
||||||
|
|
||||||
|
# Start with HTTP transport
|
||||||
|
npm run mcp:http
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Using the OpenTelemetry Tool
|
||||||
|
|
||||||
|
The MCP server provides a new tool: `capture_llm_observability_opentelemetry`
|
||||||
|
|
||||||
|
#### Required Parameters
|
||||||
|
|
||||||
|
- `userId`: The distinct ID of the user
|
||||||
|
- `model`: The model used (e.g., "gpt-4", "claude-3")
|
||||||
|
- `provider`: The LLM provider (e.g., "openai", "anthropic")
|
||||||
|
|
||||||
|
#### Optional Parameters
|
||||||
|
|
||||||
|
- `traceId`: Trace ID for grouping related events
|
||||||
|
- `input`: The input to the LLM (messages, prompt, etc.)
|
||||||
|
- `outputChoices`: The output from the LLM
|
||||||
|
- `inputTokens`: Number of tokens in the input
|
||||||
|
- `outputTokens`: Number of tokens in the output
|
||||||
|
- `latency`: Latency of the LLM call in seconds
|
||||||
|
- `httpStatus`: HTTP status code of the LLM call
|
||||||
|
- `baseUrl`: Base URL of the LLM API
|
||||||
|
- `operationName`: Name of the operation being performed
|
||||||
|
- `error`: Error message if the request failed
|
||||||
|
- `errorType`: Type of error (e.g., "rate_limit", "timeout")
|
||||||
|
- `mcpToolsUsed`: List of MCP tools used during the request
|
||||||
|
|
||||||
|
### Example Usage
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"userId": "user-123",
|
||||||
|
"model": "gpt-4",
|
||||||
|
"provider": "openai",
|
||||||
|
"inputTokens": 150,
|
||||||
|
"outputTokens": 75,
|
||||||
|
"latency": 2.5,
|
||||||
|
"httpStatus": 200,
|
||||||
|
"operationName": "chat-completion",
|
||||||
|
"traceId": "trace-abc123"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Reference
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Description | Default |
|
||||||
|
|----------|-------------|---------|
|
||||||
|
| `OTEL_SERVICE_NAME` | Service name for OpenTelemetry | `llm-observability-mcp` |
|
||||||
|
| `OTEL_SERVICE_VERSION` | Service version | `1.0.0` |
|
||||||
|
| `OTEL_ENVIRONMENT` | Environment name | `development` |
|
||||||
|
| `OTEL_EXPORTER_OTLP_ENDPOINT` | Default OTLP endpoint | - |
|
||||||
|
| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | Metrics endpoint | - |
|
||||||
|
| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | Traces endpoint | - |
|
||||||
|
| `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT` | Logs endpoint | - |
|
||||||
|
| `OTEL_EXPORTER_OTLP_HEADERS` | Headers for authentication (format: "key1=value1,key2=value2") | - |
|
||||||
|
| `OTEL_METRIC_EXPORT_INTERVAL` | Metrics export interval in ms | `10000` |
|
||||||
|
| `OTEL_METRIC_EXPORT_TIMEOUT` | Metrics export timeout in ms | `5000` |
|
||||||
|
| `OTEL_TRACES_SAMPLER_ARG` | Sampling ratio (0.0-1.0) | `1.0` |
|
||||||
|
|
||||||
|
### Backend-Specific Configuration
|
||||||
|
|
||||||
|
#### New Relic
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_LICENSE_KEY"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Jaeger
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Grafana Cloud
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Honeycomb
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io/v1/traces
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=YOUR_API_KEY"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Lightstep
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://ingest.lightstep.com:443/api/v2/otel/trace
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="lightstep-access-token=YOUR_ACCESS_TOKEN"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Metrics Collected
|
||||||
|
|
||||||
|
### Counters
|
||||||
|
|
||||||
|
- `llm.requests.total`: Total number of LLM requests
|
||||||
|
- `llm.tokens.total`: Total tokens used (input + output)
|
||||||
|
|
||||||
|
### Histograms
|
||||||
|
|
||||||
|
- `llm.latency.duration`: Request latency in milliseconds
|
||||||
|
|
||||||
|
### Gauges
|
||||||
|
|
||||||
|
- `llm.requests.active`: Number of active requests
|
||||||
|
|
||||||
|
### Trace Attributes
|
||||||
|
|
||||||
|
- `llm.model`: The model used
|
||||||
|
- `llm.provider`: The provider name
|
||||||
|
- `llm.user_id`: The user ID
|
||||||
|
- `llm.operation`: The operation name
|
||||||
|
- `llm.input_tokens`: Input token count
|
||||||
|
- `llm.output_tokens`: Output token count
|
||||||
|
- `llm.total_tokens`: Total token count
|
||||||
|
- `llm.latency_ms`: Latency in milliseconds
|
||||||
|
- `llm.http_status`: HTTP status code
|
||||||
|
- `llm.base_url`: API base URL
|
||||||
|
- `llm.error`: Error message (if any)
|
||||||
|
- `llm.error_type`: Error type classification
|
||||||
|
- `llm.input`: Input content (optional)
|
||||||
|
- `llm.output`: Output content (optional)
|
||||||
|
- `llm.mcp_tools_used`: MCP tools used
|
||||||
|
|
||||||
|
## Testing with Jaeger
|
||||||
|
|
||||||
|
### 1. Start Jaeger
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -d --name jaeger \
|
||||||
|
-e COLLECTOR_OTLP_ENABLED=true \
|
||||||
|
-p 16686:16686 \
|
||||||
|
-p 4317:4317 \
|
||||||
|
-p 4318:4318 \
|
||||||
|
jaegertracing/all-in-one:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure MCP Server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
npm run mcp:stdio
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. View Traces
|
||||||
|
|
||||||
|
Open <http://localhost:16686> to view traces in Jaeger UI.
|
||||||
|
|
||||||
|
## Migration from PostHog
|
||||||
|
|
||||||
|
The OpenTelemetry tool is designed to be a drop-in replacement for the PostHog tool. Both tools can coexist, allowing for gradual migration:
|
||||||
|
|
||||||
|
1. **PostHog Tool**: `capture_llm_observability`
|
||||||
|
2. **OpenTelemetry Tool**: `capture_llm_observability_opentelemetry`
|
||||||
|
|
||||||
|
Both tools accept the same parameters, making migration straightforward.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
#### No Data in Backend
|
||||||
|
|
||||||
|
1. Verify endpoint URLs are correct
|
||||||
|
2. Check authentication headers
|
||||||
|
3. Ensure network connectivity
|
||||||
|
4. Check server logs for errors
|
||||||
|
|
||||||
|
#### High Resource Usage
|
||||||
|
|
||||||
|
1. Adjust sampling ratio: `OTEL_TRACES_SAMPLER_ARG=0.1`
|
||||||
|
2. Increase export intervals: `OTEL_METRIC_EXPORT_INTERVAL=30000`
|
||||||
|
|
||||||
|
#### Missing Traces
|
||||||
|
|
||||||
|
1. Verify OpenTelemetry is enabled (check for endpoint configuration)
|
||||||
|
2. Check for initialization errors in logs
|
||||||
|
3. Ensure proper service name configuration
|
||||||
|
|
||||||
|
### Debug Mode
|
||||||
|
|
||||||
|
Enable debug logging:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export DEBUG=true
|
||||||
|
npm run mcp:stdio
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced Configuration
|
||||||
|
|
||||||
|
### Custom Headers
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer token,Custom-Header=value"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Multiple Backends
|
||||||
|
|
||||||
|
Configure different endpoints for metrics and traces:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=https://metrics.example.com
|
||||||
|
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=https://traces.example.com
|
||||||
|
```
|
||||||
|
|
||||||
|
### Sampling Configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Sample 10% of traces
|
||||||
|
export OTEL_TRACES_SAMPLER_ARG=0.1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For issues or questions:
|
||||||
|
|
||||||
|
1. Check the troubleshooting section above
|
||||||
|
2. Review server logs with `DEBUG=true`
|
||||||
|
3. Verify OpenTelemetry configuration
|
||||||
|
4. Test with Jaeger locally first
|
||||||
151
README.md
151
README.md
@@ -1,21 +1,24 @@
|
|||||||
# LLM Observability MCP for PostHog
|
# LLM Observability MCP Server
|
||||||
|
|
||||||
[](https://opensource.org/licenses/MIT)
|
[](https://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
A Model Context Protocol (MCP) server that provides a tool to capture LLM Observability events and send them to PostHog.
|
A Model Context Protocol (MCP) server that provides comprehensive LLM observability tools supporting both PostHog and OpenTelemetry backends.
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
This project is an MCP server designed to track and observe Large Language Model (LLM) interactions using [PostHog's LLM Observability](https://posthog.com/docs/llm-observability) features. It allows you to capture detailed information about LLM requests, responses, performance, and costs, providing valuable insights into your AI-powered applications.
|
This project is an MCP server designed to track and observe Large Language Model (LLM) interactions using both [PostHog's LLM Observability](https://posthog.com/docs/llm-observability) and **OpenTelemetry** for universal observability across any backend that supports OpenTelemetry (Jaeger, New Relic, Grafana, Datadog, Honeycomb, etc.).
|
||||||
|
|
||||||
The server can be run as a local process communicating over `stdio` or as a remote `http` server, making it compatible with any MCP client, such as AI-powered IDEs (e.g., VS Code with an MCP extension, Cursor) or custom applications.
|
The server can be run as a local process communicating over `stdio` or as a remote `http` server, making it compatible with any MCP client, such as AI-powered IDEs (e.g., VS Code with an MCP extension, Cursor) or custom applications.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **Capture LLM Metrics**: Log key details of LLM interactions, including model, provider, latency, token counts, and more.
|
- **Dual Backend Support**: Choose between PostHog or OpenTelemetry (or use both)
|
||||||
- **Flexible Transport**: Run as a local `stdio` process for tight IDE integration or as a standalone `http` server for remote access.
|
- **Universal OpenTelemetry**: Works with any OpenTelemetry-compatible backend
|
||||||
- **Dynamic Configuration**: Configure the server easily using environment variables.
|
- **Comprehensive Metrics**: Request counts, token usage, latency, error rates
|
||||||
- **Easy Integration**: Connect to MCP-compatible IDEs or use the programmatic client for use in any TypeScript/JavaScript application.
|
- **Distributed Tracing**: Full request lifecycle tracking with spans
|
||||||
|
- **Flexible Transport**: Run as local `stdio` process or standalone `http` server
|
||||||
|
- **Dynamic Configuration**: Environment-based configuration for different backends
|
||||||
|
- **Zero-Code Integration**: Drop-in replacement for existing observability tools
|
||||||
|
|
||||||
## Installation for Development
|
## Installation for Development
|
||||||
|
|
||||||
@@ -46,10 +49,29 @@ Follow these steps to set up the server for local development.
|
|||||||
|
|
||||||
The server is configured via environment variables.
|
The server is configured via environment variables.
|
||||||
|
|
||||||
|
### PostHog Configuration
|
||||||
|
|
||||||
|
| Variable | Description | Default | Example |
|
||||||
|
| ----------------- | --------------------------------------------------------------------------- | --------- | ------------------------------------- |
|
||||||
|
| `POSTHOG_API_KEY` | Your PostHog Project API Key (required for PostHog tool) | - | `phc_...` |
|
||||||
|
| `POSTHOG_HOST` | The URL of your PostHog instance | - | `https://us.i.posthog.com` |
|
||||||
|
|
||||||
|
### OpenTelemetry Configuration
|
||||||
|
|
||||||
|
| Variable | Description | Default | Example |
|
||||||
|
| ------------------------------- | --------------------------------------------------------------------------- | -------------------------- | ------------------------------------- |
|
||||||
|
| `OTEL_EXPORTER_OTLP_ENDPOINT` | OpenTelemetry collector endpoint | - | `http://localhost:4318` |
|
||||||
|
| `OTEL_EXPORTER_OTLP_HEADERS` | Headers for authentication (comma-separated key=value pairs) | - | `api-key=YOUR_KEY` |
|
||||||
|
| `OTEL_SERVICE_NAME` | Service name for traces and metrics | `llm-observability-mcp` | `my-llm-app` |
|
||||||
|
| `OTEL_SERVICE_VERSION` | Service version | `1.0.0` | `2.1.0` |
|
||||||
|
| `OTEL_ENVIRONMENT` | Environment name | `development` | `production` |
|
||||||
|
| `OTEL_TRACES_SAMPLER_ARG` | Sampling ratio (0.0-1.0) | `1.0` | `0.1` |
|
||||||
|
| `OTEL_METRIC_EXPORT_INTERVAL` | Metrics export interval in milliseconds | `10000` | `30000` |
|
||||||
|
|
||||||
|
### General Configuration
|
||||||
|
|
||||||
| Variable | Description | Default | Example |
|
| Variable | Description | Default | Example |
|
||||||
| ----------------- | --------------------------------------------------------------------------- | --------- | ------------------------------------- |
|
| ----------------- | --------------------------------------------------------------------------- | --------- | ------------------------------------- |
|
||||||
| `POSTHOG_API_KEY` | **Required.** Your PostHog Project API Key. | - | `phc_...` |
|
|
||||||
| `POSTHOG_HOST` | **Required.** The URL of your PostHog instance. | - | `https://us.i.posthog.com` |
|
|
||||||
| `TRANSPORT_MODE` | The transport protocol to use. Can be `http` or `stdio`. | `http` | `stdio` |
|
| `TRANSPORT_MODE` | The transport protocol to use. Can be `http` or `stdio`. | `http` | `stdio` |
|
||||||
| `DEBUG` | Set to `true` to enable detailed debug logging. | `false` | `true` |
|
| `DEBUG` | Set to `true` to enable detailed debug logging. | `false` | `true` |
|
||||||
|
|
||||||
@@ -177,25 +199,98 @@ async function main() {
|
|||||||
main().catch(console.error);
|
main().catch(console.error);
|
||||||
```
|
```
|
||||||
|
|
||||||
## Tool Reference: `capture_llm_observability`
|
## Available Tools
|
||||||
|
|
||||||
This is the core tool provided by the server. It captures LLM usage in PostHog for observability, including requests, responses, and performance metrics.
|
### PostHog Tool: `capture_llm_observability`
|
||||||
|
|
||||||
### Parameters
|
Captures LLM usage in PostHog for observability, including requests, responses, and performance metrics.
|
||||||
|
|
||||||
| Parameter | Type | Required | Description |
|
### OpenTelemetry Tool: `capture_llm_observability_opentelemetry`
|
||||||
| --------------- | ------------------- | -------- | ----------------------------------------------- |
|
|
||||||
| `userId` | `string` | Yes | The distinct ID of the user. |
|
Captures LLM usage using OpenTelemetry for universal observability across any OpenTelemetry-compatible backend.
|
||||||
| `model` | `string` | Yes | The model used (e.g., `gpt-4`, `claude-3`). |
|
|
||||||
| `provider` | `string` | Yes | The LLM provider (e.g., `openai`, `anthropic`). |
|
### Parameters Comparison
|
||||||
| `traceId` | `string` | No | The trace ID to group related AI events. |
|
|
||||||
| `input` | `any` | No | The input to the LLM (e.g., messages, prompt). |
|
| Parameter | Type | Required | Description | PostHog | OpenTelemetry |
|
||||||
| `outputChoices` | `any` | No | The output choices from the LLM. |
|
| --------------- | ------------------- | -------- | ----------------------------------------------- | ------- | ------------- |
|
||||||
| `inputTokens` | `number` | No | The number of tokens in the input. |
|
| `userId` | `string` | Yes | The distinct ID of the user. | ✅ | ✅ |
|
||||||
| `outputTokens` | `number` | No | The number of tokens in the output. |
|
| `model` | `string` | Yes | The model used (e.g., `gpt-4`, `claude-3`). | ✅ | ✅ |
|
||||||
| `latency` | `number` | No | The latency of the LLM call in seconds. |
|
| `provider` | `string` | Yes | The LLM provider (e.g., `openai`, `anthropic`). | ✅ | ✅ |
|
||||||
| `httpStatus` | `number` | No | The HTTP status code of the LLM API call. |
|
| `traceId` | `string` | No | The trace ID to group related AI events. | ✅ | ✅ |
|
||||||
| `baseUrl` | `string` | No | The base URL of the LLM API. |
|
| `input` | `any` | No | The input to the LLM (e.g., messages, prompt). | ✅ | ✅ |
|
||||||
|
| `outputChoices` | `any` | No | The output choices from the LLM. | ✅ | ✅ |
|
||||||
|
| `inputTokens` | `number` | No | The number of tokens in the input. | ✅ | ✅ |
|
||||||
|
| `outputTokens` | `number` | No | The number of tokens in the output. | ✅ | ✅ |
|
||||||
|
| `latency` | `number` | No | The latency of the LLM call in seconds. | ✅ | ✅ |
|
||||||
|
| `httpStatus` | `number` | No | The HTTP status code of the LLM API call. | ✅ | ✅ |
|
||||||
|
| `baseUrl` | `string` | No | The base URL of the LLM API. | ✅ | ✅ |
|
||||||
|
| `operationName` | `string` | No | The name of the operation being performed. | ❌ | ✅ |
|
||||||
|
| `error` | `string` | No | Error message if the request failed. | ❌ | ✅ |
|
||||||
|
| `errorType` | `string` | No | Type of error (e.g., rate_limit, timeout). | ❌ | ✅ |
|
||||||
|
| `mcpToolsUsed` | `string[]` | No | List of MCP tools used during the request. | ❌ | ✅ |
|
||||||
|
|
||||||
|
## Quick Start with OpenTelemetry
|
||||||
|
|
||||||
|
### 1. Choose Your Backend
|
||||||
|
|
||||||
|
**For local testing with Jaeger:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start Jaeger with OTLP support
|
||||||
|
docker run -d --name jaeger \
|
||||||
|
-e COLLECTOR_OTLP_ENABLED=true \
|
||||||
|
-p 16686:16686 \
|
||||||
|
-p 4318:4318 \
|
||||||
|
jaegertracing/all-in-one:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
**For New Relic:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_LICENSE_KEY"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Copy example configuration
|
||||||
|
cp .env.example .env
|
||||||
|
|
||||||
|
# Edit .env with your backend settings
|
||||||
|
# For Jaeger:
|
||||||
|
echo "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318" >> .env
|
||||||
|
echo "OTEL_SERVICE_NAME=llm-observability-mcp" >> .env
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Start the Server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run mcp:http
|
||||||
|
# or
|
||||||
|
npm run mcp:stdio
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Test the Integration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test with curl
|
||||||
|
curl -X POST http://localhost:3000/mcp \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"tool": "capture_llm_observability_opentelemetry",
|
||||||
|
"arguments": {
|
||||||
|
"userId": "test-user",
|
||||||
|
"model": "gpt-4",
|
||||||
|
"provider": "openai",
|
||||||
|
"inputTokens": 100,
|
||||||
|
"outputTokens": 50,
|
||||||
|
"latency": 1.5,
|
||||||
|
"httpStatus": 200,
|
||||||
|
"operationName": "test-completion"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
@@ -203,6 +298,12 @@ This is the core tool provided by the server. It captures LLM usage in PostHog f
|
|||||||
- **Run tests**: `npm test`
|
- **Run tests**: `npm test`
|
||||||
- **Lint and format**: `npm run lint` and `npm run format`
|
- **Lint and format**: `npm run lint` and `npm run format`
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
- [OpenTelemetry Setup Guide](OPENTELEMETRY.md) - Complete OpenTelemetry configuration
|
||||||
|
- [Usage Examples](examples/opentelemetry-usage.md) - Practical examples for different backends
|
||||||
|
- [Environment Configuration](.env.example) - All available configuration options
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
[MIT License](https://opensource.org/licenses/MIT)
|
[MIT License](https://opensource.org/licenses/MIT)
|
||||||
|
|||||||
380
examples/opentelemetry-usage.md
Normal file
380
examples/opentelemetry-usage.md
Normal file
@@ -0,0 +1,380 @@
|
|||||||
|
# OpenTelemetry LLM Observability Examples
|
||||||
|
|
||||||
|
This document provides practical examples for using the OpenTelemetry LLM observability tool with various backends.
|
||||||
|
|
||||||
|
## Example 1: Basic Jaeger Setup
|
||||||
|
|
||||||
|
### 1. Start Jaeger
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start Jaeger with OTLP support
|
||||||
|
docker run -d --name jaeger \
|
||||||
|
-e COLLECTOR_OTLP_ENABLED=true \
|
||||||
|
-p 16686:16686 \
|
||||||
|
-p 4317:4317 \
|
||||||
|
-p 4318:4318 \
|
||||||
|
jaegertracing/all-in-one:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
export OTEL_ENVIRONMENT=development
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Start MCP Server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run mcp:stdio
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Test with Claude Desktop
|
||||||
|
|
||||||
|
Add to your Claude Desktop configuration:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"llm-observability": {
|
||||||
|
"command": "node",
|
||||||
|
"args": ["/path/to/llm-observability-mcp/dist/index.js"],
|
||||||
|
"env": {
|
||||||
|
"OTEL_EXPORTER_OTLP_ENDPOINT": "http://localhost:4318",
|
||||||
|
"OTEL_SERVICE_NAME": "llm-observability-mcp",
|
||||||
|
"OTEL_ENVIRONMENT": "development"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. View Traces
|
||||||
|
|
||||||
|
Open <http://localhost:16686> to see your traces.
|
||||||
|
|
||||||
|
## Example 2: New Relic Integration
|
||||||
|
|
||||||
|
### 1. Get Your License Key
|
||||||
|
|
||||||
|
From New Relic: Account Settings > API Keys > License Key
|
||||||
|
|
||||||
|
### 2. Configure Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_LICENSE_KEY"
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
export OTEL_ENVIRONMENT=production
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Usage Example
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"tool": "capture_llm_observability_opentelemetry",
|
||||||
|
"arguments": {
|
||||||
|
"userId": "user-12345",
|
||||||
|
"model": "gpt-4",
|
||||||
|
"provider": "openai",
|
||||||
|
"inputTokens": 150,
|
||||||
|
"outputTokens": 75,
|
||||||
|
"latency": 2.3,
|
||||||
|
"httpStatus": 200,
|
||||||
|
"operationName": "chat-completion",
|
||||||
|
"traceId": "trace-abc123",
|
||||||
|
"input": "What is the weather like today?",
|
||||||
|
"outputChoices": ["The weather is sunny and 75°F today."]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example 3: Grafana Cloud
|
||||||
|
|
||||||
|
### 1. Get Your Credentials
|
||||||
|
|
||||||
|
From Grafana Cloud: Connections > Data Sources > OpenTelemetry
|
||||||
|
|
||||||
|
### 2. Configure Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)"
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Docker Compose Setup
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
llm-observability:
|
||||||
|
build: .
|
||||||
|
environment:
|
||||||
|
- OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp
|
||||||
|
- OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic YOUR_BASE64_ENCODED_CREDENTIALS
|
||||||
|
- OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example 4: Honeycomb
|
||||||
|
|
||||||
|
### 1. Get Your API Key
|
||||||
|
|
||||||
|
From Honeycomb: Account Settings > API Keys
|
||||||
|
|
||||||
|
### 2. Configure Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io/v1/traces
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=YOUR_API_KEY"
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
export OTEL_ENVIRONMENT=production
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example 5: Datadog
|
||||||
|
|
||||||
|
### 1. Get Your API Key
|
||||||
|
|
||||||
|
From Datadog: Organization Settings > API Keys
|
||||||
|
|
||||||
|
### 2. Configure Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/series
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="DD-API-KEY=YOUR_API_KEY"
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example 6: Production Configuration
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Service Configuration
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-mcp
|
||||||
|
export OTEL_SERVICE_VERSION=1.2.3
|
||||||
|
export OTEL_ENVIRONMENT=production
|
||||||
|
|
||||||
|
# Sampling (10% of traces)
|
||||||
|
export OTEL_TRACES_SAMPLER_ARG=0.1
|
||||||
|
|
||||||
|
# Export Configuration
|
||||||
|
export OTEL_METRIC_EXPORT_INTERVAL=30000
|
||||||
|
export OTEL_METRIC_EXPORT_TIMEOUT=10000
|
||||||
|
|
||||||
|
# Backend Configuration
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=https://your-backend.com:4318
|
||||||
|
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer your-token,Custom-Header=value"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kubernetes Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: llm-observability-mcp
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: llm-observability-mcp
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: llm-observability-mcp
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: llm-observability-mcp
|
||||||
|
image: llm-observability-mcp:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 3000
|
||||||
|
env:
|
||||||
|
- name: OTEL_SERVICE_NAME
|
||||||
|
value: "llm-observability-mcp"
|
||||||
|
- name: OTEL_SERVICE_VERSION
|
||||||
|
value: "1.2.3"
|
||||||
|
- name: OTEL_ENVIRONMENT
|
||||||
|
value: "production"
|
||||||
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
|
value: "https://your-backend.com:4318"
|
||||||
|
- name: OTEL_EXPORTER_OTLP_HEADERS
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: otel-credentials
|
||||||
|
key: headers
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example 7: Error Handling and Monitoring
|
||||||
|
|
||||||
|
### Error Tracking
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"tool": "capture_llm_observability_opentelemetry",
|
||||||
|
"arguments": {
|
||||||
|
"userId": "user-12345",
|
||||||
|
"model": "gpt-4",
|
||||||
|
"provider": "openai",
|
||||||
|
"httpStatus": 429,
|
||||||
|
"error": "Rate limit exceeded",
|
||||||
|
"errorType": "rate_limit",
|
||||||
|
"latency": 0.1,
|
||||||
|
"operationName": "chat-completion"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Multi-Tool Usage Tracking
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"tool": "capture_llm_observability_opentelemetry",
|
||||||
|
"arguments": {
|
||||||
|
"userId": "user-12345",
|
||||||
|
"model": "gpt-4",
|
||||||
|
"provider": "openai",
|
||||||
|
"inputTokens": 500,
|
||||||
|
"outputTokens": 200,
|
||||||
|
"latency": 5.2,
|
||||||
|
"httpStatus": 200,
|
||||||
|
"operationName": "complex-workflow",
|
||||||
|
"mcpToolsUsed": ["file_read", "web_search", "code_execution"],
|
||||||
|
"traceId": "complex-workflow-123"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example 8: Testing Script
|
||||||
|
|
||||||
|
### Test Script
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# test-opentelemetry.sh
|
||||||
|
|
||||||
|
# Start Jaeger
|
||||||
|
echo "Starting Jaeger..."
|
||||||
|
docker run -d --name jaeger-test \
|
||||||
|
-e COLLECTOR_OTLP_ENABLED=true \
|
||||||
|
-p 16686:16686 \
|
||||||
|
-p 4318:4318 \
|
||||||
|
jaegertracing/all-in-one:latest
|
||||||
|
|
||||||
|
# Wait for Jaeger to start
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# Configure environment
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
||||||
|
export OTEL_SERVICE_NAME=llm-observability-test
|
||||||
|
export OTEL_ENVIRONMENT=test
|
||||||
|
|
||||||
|
# Start MCP server in background
|
||||||
|
echo "Starting MCP server..."
|
||||||
|
npm run mcp:stdio &
|
||||||
|
|
||||||
|
# Wait for server to start
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
# Test the tool
|
||||||
|
echo "Testing OpenTelemetry tool..."
|
||||||
|
curl -X POST http://localhost:3000/mcp \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"tool": "capture_llm_observability_opentelemetry",
|
||||||
|
"arguments": {
|
||||||
|
"userId": "test-user",
|
||||||
|
"model": "gpt-4",
|
||||||
|
"provider": "openai",
|
||||||
|
"inputTokens": 100,
|
||||||
|
"outputTokens": 50,
|
||||||
|
"latency": 1.5,
|
||||||
|
"httpStatus": 200,
|
||||||
|
"operationName": "test-completion"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
|
||||||
|
echo "Test complete. View traces at http://localhost:16686"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example 9: Integration with Existing Tools
|
||||||
|
|
||||||
|
### Gradual Migration from PostHog
|
||||||
|
|
||||||
|
You can use both tools simultaneously during migration:
|
||||||
|
|
||||||
|
```json
|
||||||
|
// PostHog (existing)
|
||||||
|
{
|
||||||
|
"tool": "capture_llm_observability",
|
||||||
|
"arguments": {
|
||||||
|
"userId": "user-123",
|
||||||
|
"model": "gpt-4",
|
||||||
|
"provider": "openai"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenTelemetry (new)
|
||||||
|
{
|
||||||
|
"tool": "capture_llm_observability_opentelemetry",
|
||||||
|
"arguments": {
|
||||||
|
"userId": "user-123",
|
||||||
|
"model": "gpt-4",
|
||||||
|
"provider": "openai"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting Examples
|
||||||
|
|
||||||
|
### Debug Mode
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export DEBUG=true
|
||||||
|
npm run mcp:stdio
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test connectivity
|
||||||
|
curl -X POST http://localhost:4318/v1/traces \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"resourceSpans":[]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Verify Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check environment variables
|
||||||
|
env | grep OTEL
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Tuning
|
||||||
|
|
||||||
|
### High-Volume Configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Reduce sampling for high-volume
|
||||||
|
export OTEL_TRACES_SAMPLER_ARG=0.01
|
||||||
|
|
||||||
|
# Increase export intervals
|
||||||
|
export OTEL_METRIC_EXPORT_INTERVAL=60000
|
||||||
|
export OTEL_METRIC_EXPORT_TIMEOUT=30000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resource Optimization
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Disable metrics if only traces needed
|
||||||
|
unset OTEL_EXPORTER_OTLP_METRICS_ENDPOINT
|
||||||
|
|
||||||
|
# Disable logs if not needed
|
||||||
|
unset OTEL_EXPORTER_OTLP_LOGS_ENDPOINT
|
||||||
|
```
|
||||||
|
|
||||||
|
These examples should help you get started with OpenTelemetry LLM observability across different backends and use cases.
|
||||||
1587
package-lock.json
generated
1587
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
13
package.json
13
package.json
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "@sfiorini/llm-observability-mcp",
|
"name": "@sfiorini/llm-observability-mcp",
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"description": "A Model Context Protocol (MCP) server that provides a tool to capture LLM Observability events and send them to PostHog.",
|
"description": "A Model Context Protocol (MCP) server that provides comprehensive LLM observability tools supporting both PostHog and OpenTelemetry backends.",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
"type": "commonjs",
|
"type": "commonjs",
|
||||||
@@ -76,6 +76,15 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@modelcontextprotocol/sdk": "^1.15.1",
|
"@modelcontextprotocol/sdk": "^1.15.1",
|
||||||
|
"@opentelemetry/api": "^1.9.0",
|
||||||
|
"@opentelemetry/exporter-otlp-grpc": "^0.26.0",
|
||||||
|
"@opentelemetry/exporter-otlp-http": "^0.26.0",
|
||||||
|
"@opentelemetry/instrumentation": "^0.203.0",
|
||||||
|
"@opentelemetry/resources": "^2.0.1",
|
||||||
|
"@opentelemetry/sdk-metrics": "^2.0.1",
|
||||||
|
"@opentelemetry/sdk-node": "^0.203.0",
|
||||||
|
"@opentelemetry/sdk-trace-node": "^2.0.1",
|
||||||
|
"@opentelemetry/semantic-conventions": "^1.36.0",
|
||||||
"commander": "^14.0.0",
|
"commander": "^14.0.0",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
"dotenv": "^17.2.0",
|
"dotenv": "^17.2.0",
|
||||||
@@ -113,4 +122,4 @@
|
|||||||
".ts"
|
".ts"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
130
src/config/opentelemetry-llm.config.ts
Normal file
130
src/config/opentelemetry-llm.config.ts
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration schema for OpenTelemetry
|
||||||
|
*/
|
||||||
|
export const OpenTelemetryConfigSchema = z.object({
|
||||||
|
// Service configuration
|
||||||
|
serviceName: z.string().default('llm-observability-mcp'),
|
||||||
|
serviceVersion: z.string().default('1.0.0'),
|
||||||
|
environment: z.string().default('development'),
|
||||||
|
|
||||||
|
// OTLP endpoints
|
||||||
|
metricsEndpoint: z.string().optional(),
|
||||||
|
tracesEndpoint: z.string().optional(),
|
||||||
|
logsEndpoint: z.string().optional(),
|
||||||
|
|
||||||
|
// Authentication headers
|
||||||
|
headers: z.record(z.string()).optional(),
|
||||||
|
|
||||||
|
// Export configuration
|
||||||
|
exportIntervalMillis: z.number().default(10000),
|
||||||
|
exportTimeoutMillis: z.number().default(5000),
|
||||||
|
|
||||||
|
// Sampling configuration
|
||||||
|
samplingRatio: z.number().min(0).max(1).default(1.0),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type OpenTelemetryConfigType = z.infer<typeof OpenTelemetryConfigSchema>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OpenTelemetry configuration class
|
||||||
|
*/
|
||||||
|
export class OpenTelemetryConfig {
|
||||||
|
public readonly serviceName: string;
|
||||||
|
public readonly serviceVersion: string;
|
||||||
|
public readonly environment: string;
|
||||||
|
public readonly metricsEndpoint?: string;
|
||||||
|
public readonly tracesEndpoint?: string;
|
||||||
|
public readonly logsEndpoint?: string;
|
||||||
|
public readonly headers?: Record<string, string>;
|
||||||
|
public readonly exportIntervalMillis: number;
|
||||||
|
public readonly exportTimeoutMillis: number;
|
||||||
|
public readonly samplingRatio: number;
|
||||||
|
|
||||||
|
constructor(config: OpenTelemetryConfigType) {
|
||||||
|
this.serviceName = config.serviceName;
|
||||||
|
this.serviceVersion = config.serviceVersion;
|
||||||
|
this.environment = config.environment;
|
||||||
|
this.metricsEndpoint = config.metricsEndpoint;
|
||||||
|
this.tracesEndpoint = config.tracesEndpoint;
|
||||||
|
this.logsEndpoint = config.logsEndpoint;
|
||||||
|
this.headers = config.headers;
|
||||||
|
this.exportIntervalMillis = config.exportIntervalMillis;
|
||||||
|
this.exportTimeoutMillis = config.exportTimeoutMillis;
|
||||||
|
this.samplingRatio = config.samplingRatio;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create configuration from environment variables
|
||||||
|
*/
|
||||||
|
public static fromEnv(): OpenTelemetryConfig {
|
||||||
|
const config = OpenTelemetryConfigSchema.parse({
|
||||||
|
serviceName:
|
||||||
|
process.env.OTEL_SERVICE_NAME || 'llm-observability-mcp',
|
||||||
|
serviceVersion: process.env.OTEL_SERVICE_VERSION || '1.0.0',
|
||||||
|
environment: process.env.OTEL_ENVIRONMENT || 'development',
|
||||||
|
metricsEndpoint: process.env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT,
|
||||||
|
tracesEndpoint: process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
|
||||||
|
logsEndpoint: process.env.OTEL_EXPORTER_OTLP_LOGS_ENDPOINT,
|
||||||
|
headers: process.env.OTEL_EXPORTER_OTLP_HEADERS
|
||||||
|
? parseHeaders(process.env.OTEL_EXPORTER_OTLP_HEADERS)
|
||||||
|
: undefined,
|
||||||
|
exportIntervalMillis: parseInt(
|
||||||
|
process.env.OTEL_METRIC_EXPORT_INTERVAL || '10000',
|
||||||
|
10,
|
||||||
|
),
|
||||||
|
exportTimeoutMillis: parseInt(
|
||||||
|
process.env.OTEL_METRIC_EXPORT_TIMEOUT || '5000',
|
||||||
|
10,
|
||||||
|
),
|
||||||
|
samplingRatio: parseFloat(
|
||||||
|
process.env.OTEL_TRACES_SAMPLER_ARG || '1.0',
|
||||||
|
),
|
||||||
|
});
|
||||||
|
|
||||||
|
return new OpenTelemetryConfig(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if OpenTelemetry is enabled
|
||||||
|
*/
|
||||||
|
public isEnabled(): boolean {
|
||||||
|
return !!(
|
||||||
|
this.metricsEndpoint ||
|
||||||
|
this.tracesEndpoint ||
|
||||||
|
this.logsEndpoint
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get combined OTLP endpoint if only one is provided
|
||||||
|
*/
|
||||||
|
public getDefaultEndpoint(): string | undefined {
|
||||||
|
if (
|
||||||
|
this.metricsEndpoint &&
|
||||||
|
this.tracesEndpoint === this.metricsEndpoint
|
||||||
|
) {
|
||||||
|
return this.metricsEndpoint;
|
||||||
|
}
|
||||||
|
return this.metricsEndpoint || this.tracesEndpoint;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse headers from environment variable string
|
||||||
|
* Format: "key1=value1,key2=value2"
|
||||||
|
*/
|
||||||
|
function parseHeaders(headersString: string): Record<string, string> {
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
|
||||||
|
const pairs = headersString.split(',');
|
||||||
|
for (const pair of pairs) {
|
||||||
|
const [key, value] = pair.split('=');
|
||||||
|
if (key && value) {
|
||||||
|
headers[key.trim()] = value.trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return headers;
|
||||||
|
}
|
||||||
74
src/controllers/opentelemetry-llm.controller.ts
Normal file
74
src/controllers/opentelemetry-llm.controller.ts
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import { Logger } from '../utils/logger.util.js';
|
||||||
|
import { OpenTelemetryService } from '../services/opentelemetry-llm.service.js';
|
||||||
|
import { OpenTelemetryLlmInputSchemaType } from '../types/opentelemetry-llm.types.js';
|
||||||
|
import { ControllerResponse } from '../types/common.types.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Controller for OpenTelemetry LLM observability
|
||||||
|
*/
|
||||||
|
export class OpenTelemetryController {
|
||||||
|
private service: OpenTelemetryService;
|
||||||
|
|
||||||
|
constructor(service: OpenTelemetryService) {
|
||||||
|
this.service = service;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Capture LLM observability data
|
||||||
|
*/
|
||||||
|
public async captureLlmObservability(
|
||||||
|
data: OpenTelemetryLlmInputSchemaType,
|
||||||
|
): Promise<ControllerResponse> {
|
||||||
|
const logger = Logger.forContext(
|
||||||
|
'opentelemetry.controller',
|
||||||
|
'captureLlmObservability',
|
||||||
|
);
|
||||||
|
logger.debug('Capturing LLM observability data', data);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Record the LLM request
|
||||||
|
this.service.recordLlmRequest(data);
|
||||||
|
|
||||||
|
const content = `## OpenTelemetry LLM Observability Captured
|
||||||
|
|
||||||
|
Successfully recorded LLM observability data:
|
||||||
|
|
||||||
|
- **Model**: ${data.model}
|
||||||
|
- **Provider**: ${data.provider}
|
||||||
|
- **User ID**: ${data.userId}
|
||||||
|
- **Operation**: ${data.operationName || 'generate'}
|
||||||
|
- **Input Tokens**: ${data.inputTokens || 'N/A'}
|
||||||
|
- **Output Tokens**: ${data.outputTokens || 'N/A'}
|
||||||
|
- **Total Tokens**: ${(data.inputTokens || 0) + (data.outputTokens || 0)}
|
||||||
|
- **Latency**: ${data.latency ? `${data.latency}s` : 'N/A'}
|
||||||
|
- **Status**: ${data.error ? 'Error' : 'Success'}
|
||||||
|
${data.error ? `- **Error**: ${data.error}` : ''}
|
||||||
|
|
||||||
|
### Metrics Recorded
|
||||||
|
- ✅ Request counter incremented
|
||||||
|
- ✅ Token usage recorded
|
||||||
|
- ✅ Latency histogram updated
|
||||||
|
- ✅ Distributed trace created
|
||||||
|
|
||||||
|
### Trace Details
|
||||||
|
- **Trace ID**: ${data.traceId || 'auto-generated'}
|
||||||
|
- **Span Name**: ${data.operationName || 'llm.generate'}
|
||||||
|
- **Attributes**: model, provider, user_id, tokens, latency, error details
|
||||||
|
|
||||||
|
The data has been sent to your configured OpenTelemetry collector and is now available in your observability platform (Jaeger, New Relic, Grafana, etc.).`;
|
||||||
|
|
||||||
|
return { content };
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Error capturing LLM observability', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export singleton instance
|
||||||
|
const openTelemetryService = OpenTelemetryService.getInstance();
|
||||||
|
const openTelemetryController = new OpenTelemetryController(
|
||||||
|
openTelemetryService,
|
||||||
|
);
|
||||||
|
|
||||||
|
export default openTelemetryController;
|
||||||
@@ -7,6 +7,7 @@ import { config } from '../utils/config.util';
|
|||||||
import { PACKAGE_NAME, VERSION } from '../utils/constants.util';
|
import { PACKAGE_NAME, VERSION } from '../utils/constants.util';
|
||||||
import posthogLlmResources from '../resources/posthog-llm.resource.js';
|
import posthogLlmResources from '../resources/posthog-llm.resource.js';
|
||||||
import posthogLlmTools from '../tools/posthog-llm.tool.js';
|
import posthogLlmTools from '../tools/posthog-llm.tool.js';
|
||||||
|
import openTelemetryTools from '../tools/opentelemetry-llm.tool.js';
|
||||||
|
|
||||||
export function createServer() {
|
export function createServer() {
|
||||||
const serverLogger = Logger.forContext('utils/server.util.ts', 'getServer');
|
const serverLogger = Logger.forContext('utils/server.util.ts', 'getServer');
|
||||||
@@ -29,6 +30,7 @@ export function createServer() {
|
|||||||
serverLogger.info('Registering MCP tools and resources...');
|
serverLogger.info('Registering MCP tools and resources...');
|
||||||
posthogLlmTools.registerTools(server);
|
posthogLlmTools.registerTools(server);
|
||||||
posthogLlmResources.registerResources(server);
|
posthogLlmResources.registerResources(server);
|
||||||
|
openTelemetryTools.registerTools(server);
|
||||||
serverLogger.debug('All tools and resources registered');
|
serverLogger.debug('All tools and resources registered');
|
||||||
|
|
||||||
return server;
|
return server;
|
||||||
|
|||||||
294
src/services/opentelemetry-llm.service.ts
Normal file
294
src/services/opentelemetry-llm.service.ts
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
import {
|
||||||
|
MeterProvider,
|
||||||
|
PeriodicExportingMetricReader,
|
||||||
|
} from '@opentelemetry/sdk-metrics';
|
||||||
|
import { NodeSDK } from '@opentelemetry/sdk-node';
|
||||||
|
import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-node';
|
||||||
|
import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http';
|
||||||
|
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
|
||||||
|
import { resourceFromAttributes } from '@opentelemetry/resources';
|
||||||
|
import {
|
||||||
|
ATTR_SERVICE_NAME,
|
||||||
|
ATTR_SERVICE_VERSION,
|
||||||
|
SEMRESATTRS_DEPLOYMENT_ENVIRONMENT,
|
||||||
|
} from '@opentelemetry/semantic-conventions';
|
||||||
|
import { Logger } from '../utils/logger.util.js';
|
||||||
|
import { OpenTelemetryConfig } from '../config/opentelemetry-llm.config.js';
|
||||||
|
import { OpenTelemetryLlmInputSchemaType } from '../types/opentelemetry-llm.types.js';
|
||||||
|
import {
|
||||||
|
Meter,
|
||||||
|
Counter,
|
||||||
|
Histogram,
|
||||||
|
UpDownCounter,
|
||||||
|
metrics,
|
||||||
|
} from '@opentelemetry/api';
|
||||||
|
import { trace, Tracer, SpanStatusCode } from '@opentelemetry/api';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OpenTelemetry service for LLM observability
|
||||||
|
*/
|
||||||
|
export class OpenTelemetryService {
|
||||||
|
private static instance: OpenTelemetryService;
|
||||||
|
private sdk: NodeSDK | null = null;
|
||||||
|
private meterProvider: MeterProvider | null = null;
|
||||||
|
private tracer: Tracer;
|
||||||
|
private meter: Meter;
|
||||||
|
private config: OpenTelemetryConfig;
|
||||||
|
|
||||||
|
// Metrics
|
||||||
|
private requestCounter: Counter;
|
||||||
|
private tokenCounter: Counter;
|
||||||
|
private latencyHistogram: Histogram;
|
||||||
|
private activeRequests: UpDownCounter;
|
||||||
|
|
||||||
|
private constructor(config: OpenTelemetryConfig) {
|
||||||
|
this.config = config;
|
||||||
|
this.tracer = trace.getTracer('llm-observability-mcp');
|
||||||
|
this.meter = this.initializeMetrics();
|
||||||
|
|
||||||
|
// Initialize metrics
|
||||||
|
this.requestCounter = this.meter.createCounter('llm.requests.total', {
|
||||||
|
description: 'Total number of LLM requests',
|
||||||
|
});
|
||||||
|
|
||||||
|
this.tokenCounter = this.meter.createCounter('llm.tokens.total', {
|
||||||
|
description: 'Total number of tokens processed',
|
||||||
|
});
|
||||||
|
|
||||||
|
this.latencyHistogram = this.meter.createHistogram(
|
||||||
|
'llm.latency.duration',
|
||||||
|
{
|
||||||
|
description: 'Duration of LLM requests in milliseconds',
|
||||||
|
unit: 'ms',
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
this.activeRequests = this.meter.createUpDownCounter(
|
||||||
|
'llm.requests.active',
|
||||||
|
{
|
||||||
|
description: 'Number of active LLM requests',
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get singleton instance
|
||||||
|
*/
|
||||||
|
public static getInstance(
|
||||||
|
config?: OpenTelemetryConfig,
|
||||||
|
): OpenTelemetryService {
|
||||||
|
if (!OpenTelemetryService.instance) {
|
||||||
|
if (!config) {
|
||||||
|
throw new Error(
|
||||||
|
'OpenTelemetryService requires configuration on first initialization',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
OpenTelemetryService.instance = new OpenTelemetryService(config);
|
||||||
|
}
|
||||||
|
return OpenTelemetryService.instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize OpenTelemetry SDK
|
||||||
|
*/
|
||||||
|
public initialize(): void {
|
||||||
|
const logger = Logger.forContext('opentelemetry.service', 'initialize');
|
||||||
|
|
||||||
|
if (this.sdk) {
|
||||||
|
logger.warn('OpenTelemetry SDK already initialized');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const resource = resourceFromAttributes({
|
||||||
|
[ATTR_SERVICE_NAME]: this.config.serviceName,
|
||||||
|
[ATTR_SERVICE_VERSION]: this.config.serviceVersion,
|
||||||
|
[SEMRESATTRS_DEPLOYMENT_ENVIRONMENT]: this.config.environment,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Configure metric exporter
|
||||||
|
const metricExporter = this.config.metricsEndpoint
|
||||||
|
? new OTLPMetricExporter({
|
||||||
|
url: this.config.metricsEndpoint,
|
||||||
|
headers: this.config.headers,
|
||||||
|
})
|
||||||
|
: undefined;
|
||||||
|
|
||||||
|
// Configure trace exporter
|
||||||
|
const traceExporter = this.config.tracesEndpoint
|
||||||
|
? new OTLPTraceExporter({
|
||||||
|
url: this.config.tracesEndpoint,
|
||||||
|
headers: this.config.headers,
|
||||||
|
})
|
||||||
|
: undefined;
|
||||||
|
|
||||||
|
// Initialize meter provider
|
||||||
|
if (metricExporter) {
|
||||||
|
this.meterProvider = new MeterProvider({
|
||||||
|
resource,
|
||||||
|
readers: [
|
||||||
|
new PeriodicExportingMetricReader({
|
||||||
|
exporter: metricExporter,
|
||||||
|
exportIntervalMillis:
|
||||||
|
this.config.exportIntervalMillis,
|
||||||
|
exportTimeoutMillis:
|
||||||
|
this.config.exportTimeoutMillis,
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize SDK
|
||||||
|
this.sdk = new NodeSDK({
|
||||||
|
resource,
|
||||||
|
spanProcessor: traceExporter
|
||||||
|
? new BatchSpanProcessor(traceExporter)
|
||||||
|
: undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
this.sdk.start();
|
||||||
|
logger.info('OpenTelemetry SDK initialized successfully');
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Failed to initialize OpenTelemetry SDK', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize metrics
|
||||||
|
*/
|
||||||
|
private initializeMetrics(): Meter {
|
||||||
|
if (this.meterProvider) {
|
||||||
|
return this.meterProvider.getMeter('llm-observability-mcp');
|
||||||
|
}
|
||||||
|
return metrics.getMeter('llm-observability-mcp');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record LLM request metrics and traces
|
||||||
|
*/
|
||||||
|
public recordLlmRequest(data: OpenTelemetryLlmInputSchemaType): void {
|
||||||
|
const labels = {
|
||||||
|
model: data.model,
|
||||||
|
provider: data.provider,
|
||||||
|
userId: data.userId,
|
||||||
|
operationName: data.operationName || 'generate',
|
||||||
|
status: data.error ? 'error' : 'success',
|
||||||
|
errorType: data.errorType,
|
||||||
|
mcpToolsUsed: data.mcpToolsUsed?.join(',') || '',
|
||||||
|
};
|
||||||
|
|
||||||
|
// Record metrics
|
||||||
|
this.requestCounter.add(1, labels);
|
||||||
|
|
||||||
|
if (data.inputTokens || data.outputTokens) {
|
||||||
|
const totalTokens =
|
||||||
|
(data.inputTokens || 0) + (data.outputTokens || 0);
|
||||||
|
this.tokenCounter.add(totalTokens, {
|
||||||
|
...labels,
|
||||||
|
type: 'total',
|
||||||
|
});
|
||||||
|
|
||||||
|
if (data.inputTokens) {
|
||||||
|
this.tokenCounter.add(data.inputTokens, {
|
||||||
|
...labels,
|
||||||
|
type: 'input',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.outputTokens) {
|
||||||
|
this.tokenCounter.add(data.outputTokens, {
|
||||||
|
...labels,
|
||||||
|
type: 'output',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.latency) {
|
||||||
|
this.latencyHistogram.record(data.latency * 1000, labels);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create trace
|
||||||
|
this.createLlmTrace(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create distributed trace for LLM request
|
||||||
|
*/
|
||||||
|
private createLlmTrace(data: OpenTelemetryLlmInputSchemaType): void {
|
||||||
|
const spanName = data.operationName || 'llm.generate';
|
||||||
|
const span = this.tracer.startSpan(spanName, {
|
||||||
|
attributes: {
|
||||||
|
'llm.model': data.model,
|
||||||
|
'llm.provider': data.provider,
|
||||||
|
'llm.user_id': data.userId,
|
||||||
|
'llm.operation': data.operationName || 'generate',
|
||||||
|
'llm.input_tokens': data.inputTokens,
|
||||||
|
'llm.output_tokens': data.outputTokens,
|
||||||
|
'llm.total_tokens':
|
||||||
|
(data.inputTokens || 0) + (data.outputTokens || 0),
|
||||||
|
'llm.latency_ms': data.latency
|
||||||
|
? data.latency * 1000
|
||||||
|
: undefined,
|
||||||
|
'llm.http_status': data.httpStatus,
|
||||||
|
'llm.base_url': data.baseUrl,
|
||||||
|
'llm.error': data.error,
|
||||||
|
'llm.error_type': data.errorType,
|
||||||
|
'llm.input':
|
||||||
|
typeof data.input === 'string'
|
||||||
|
? data.input
|
||||||
|
: JSON.stringify(data.input),
|
||||||
|
'llm.output':
|
||||||
|
typeof data.outputChoices === 'string'
|
||||||
|
? data.outputChoices
|
||||||
|
: JSON.stringify(data.outputChoices),
|
||||||
|
'llm.mcp_tools_used': data.mcpToolsUsed?.join(','),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (data.traceId) {
|
||||||
|
span.setAttribute('trace.id', data.traceId);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.error) {
|
||||||
|
span.setStatus({
|
||||||
|
code: SpanStatusCode.ERROR,
|
||||||
|
message: data.error,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
span.setStatus({ code: SpanStatusCode.OK });
|
||||||
|
}
|
||||||
|
|
||||||
|
span.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record active request count
|
||||||
|
*/
|
||||||
|
public incrementActiveRequests(): void {
|
||||||
|
this.activeRequests.add(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decrement active request count
|
||||||
|
*/
|
||||||
|
public decrementActiveRequests(): void {
|
||||||
|
this.activeRequests.add(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shutdown OpenTelemetry SDK
|
||||||
|
*/
|
||||||
|
public async shutdown(): Promise<void> {
|
||||||
|
const logger = Logger.forContext('opentelemetry.service', 'shutdown');
|
||||||
|
|
||||||
|
if (this.sdk) {
|
||||||
|
try {
|
||||||
|
await this.sdk.shutdown();
|
||||||
|
logger.info('OpenTelemetry SDK shutdown successfully');
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Error shutting down OpenTelemetry SDK', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
91
src/tools/opentelemetry-llm.tool.ts
Normal file
91
src/tools/opentelemetry-llm.tool.ts
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||||
|
import { Logger } from '../utils/logger.util.js';
|
||||||
|
import { formatErrorForMcpTool } from '../utils/error.util.js';
|
||||||
|
import { OpenTelemetryService } from '../services/opentelemetry-llm.service.js';
|
||||||
|
import { OpenTelemetryController } from '../controllers/opentelemetry-llm.controller.js';
|
||||||
|
import { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
|
||||||
|
import {
|
||||||
|
OpenTelemetryLlmInputSchema,
|
||||||
|
OpenTelemetryLlmInputSchemaType,
|
||||||
|
} from '../types/opentelemetry-llm.types.js';
|
||||||
|
import { OpenTelemetryConfig } from '../config/opentelemetry-llm.config.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @function captureOpenTelemetryLlmObservability
|
||||||
|
* @description MCP Tool handler to capture LLM observability events using OpenTelemetry.
|
||||||
|
* It records metrics, traces, and spans for LLM requests that can be sent to any OpenTelemetry-compatible backend.
|
||||||
|
* @param {OpenTelemetryLlmInputSchemaType} args - Arguments provided to the tool.
|
||||||
|
* @returns {Promise<CallToolResult>} Formatted response for the MCP.
|
||||||
|
* @throws {McpError} Formatted error if the controller or service layer encounters an issue.
|
||||||
|
*/
|
||||||
|
async function captureOpenTelemetryLlmObservability(
|
||||||
|
args: OpenTelemetryLlmInputSchemaType,
|
||||||
|
): Promise<CallToolResult> {
|
||||||
|
const methodLogger = Logger.forContext(
|
||||||
|
'opentelemetry.tool',
|
||||||
|
'captureOpenTelemetryLlmObservability',
|
||||||
|
);
|
||||||
|
methodLogger.debug('Capture LLM Observability with OpenTelemetry...', args);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Parse and validate arguments
|
||||||
|
const validatedArgs = OpenTelemetryLlmInputSchema.parse(args);
|
||||||
|
|
||||||
|
// Ensure OpenTelemetry is configured
|
||||||
|
const config = OpenTelemetryConfig.fromEnv();
|
||||||
|
const service = OpenTelemetryService.getInstance(config);
|
||||||
|
|
||||||
|
// Initialize if not already done
|
||||||
|
service.initialize();
|
||||||
|
|
||||||
|
// Create controller with the service
|
||||||
|
const controller = new OpenTelemetryController(service);
|
||||||
|
|
||||||
|
// Pass validated args to the controller
|
||||||
|
const result = await controller.captureLlmObservability(validatedArgs);
|
||||||
|
methodLogger.debug('Got response from controller', result);
|
||||||
|
|
||||||
|
// Format the response for the MCP tool
|
||||||
|
return {
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text' as const,
|
||||||
|
text: result.content,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
methodLogger.error(
|
||||||
|
'Error tracking LLM generation with OpenTelemetry',
|
||||||
|
error,
|
||||||
|
);
|
||||||
|
return formatErrorForMcpTool(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @function registerTools
|
||||||
|
* @description Registers the OpenTelemetry LLM observability tool with the MCP server.
|
||||||
|
*
|
||||||
|
* @param {McpServer} server - The MCP server instance.
|
||||||
|
*/
|
||||||
|
function registerTools(server: McpServer) {
|
||||||
|
const methodLogger = Logger.forContext(
|
||||||
|
'opentelemetry.tool',
|
||||||
|
'registerTools',
|
||||||
|
);
|
||||||
|
methodLogger.debug('Registering OpenTelemetry LLM observability tools...');
|
||||||
|
|
||||||
|
server.tool(
|
||||||
|
'capture_llm_observability_opentelemetry',
|
||||||
|
`Captures LLM usage using OpenTelemetry for observability, including requests, responses, and performance metrics. Works with any OpenTelemetry-compatible backend like Jaeger, New Relic, Grafana, etc.`,
|
||||||
|
OpenTelemetryLlmInputSchema.shape,
|
||||||
|
captureOpenTelemetryLlmObservability,
|
||||||
|
);
|
||||||
|
|
||||||
|
methodLogger.debug(
|
||||||
|
'Successfully registered capture_llm_observability_opentelemetry tool.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default { registerTools };
|
||||||
@@ -6,7 +6,7 @@ import { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
|
|||||||
import {
|
import {
|
||||||
GetToolInputSchema,
|
GetToolInputSchema,
|
||||||
GetToolInputSchemaType,
|
GetToolInputSchemaType,
|
||||||
} from './posthog-llm.types.js';
|
} from '../types/posthog-llm.types.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @function capturePosthogLlmObservability
|
* @function capturePosthogLlmObservability
|
||||||
|
|||||||
168
src/types/opentelemetry-llm.types.ts
Normal file
168
src/types/opentelemetry-llm.types.ts
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Zod schema for the OpenTelemetry LLM observability tool arguments
|
||||||
|
*/
|
||||||
|
export const OpenTelemetryLlmInputSchema = z.object({
|
||||||
|
// Required fields
|
||||||
|
userId: z.string().describe('The distinct ID of the user'),
|
||||||
|
model: z.string().describe('The model used (e.g., gpt-4, claude-3, etc.)'),
|
||||||
|
provider: z
|
||||||
|
.string()
|
||||||
|
.describe('The LLM provider (e.g., openai, anthropic, etc.)'),
|
||||||
|
|
||||||
|
// Optional fields
|
||||||
|
traceId: z.string().optional().describe('The trace ID to group AI events'),
|
||||||
|
input: z
|
||||||
|
.any()
|
||||||
|
.optional()
|
||||||
|
.describe('The input to the LLM (messages, prompt, etc.)'),
|
||||||
|
outputChoices: z.any().optional().describe('The output from the LLM'),
|
||||||
|
inputTokens: z
|
||||||
|
.number()
|
||||||
|
.optional()
|
||||||
|
.describe('The number of tokens in the input'),
|
||||||
|
outputTokens: z
|
||||||
|
.number()
|
||||||
|
.optional()
|
||||||
|
.describe('The number of tokens in the output'),
|
||||||
|
latency: z
|
||||||
|
.number()
|
||||||
|
.optional()
|
||||||
|
.describe('The latency of the LLM call in seconds'),
|
||||||
|
httpStatus: z
|
||||||
|
.number()
|
||||||
|
.optional()
|
||||||
|
.describe('The HTTP status code of the LLM call'),
|
||||||
|
baseUrl: z.string().optional().describe('The base URL of the LLM API'),
|
||||||
|
|
||||||
|
// OpenTelemetry specific fields
|
||||||
|
operationName: z
|
||||||
|
.string()
|
||||||
|
.optional()
|
||||||
|
.describe('The name of the operation being performed'),
|
||||||
|
error: z
|
||||||
|
.string()
|
||||||
|
.optional()
|
||||||
|
.describe('Error message if the request failed'),
|
||||||
|
errorType: z
|
||||||
|
.string()
|
||||||
|
.optional()
|
||||||
|
.describe('Type of error (e.g., rate_limit, timeout, etc.)'),
|
||||||
|
mcpToolsUsed: z
|
||||||
|
.array(z.string())
|
||||||
|
.optional()
|
||||||
|
.describe('List of MCP tools used during the request'),
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TypeScript type inferred from the OpenTelemetryLlmInputSchema Zod schema
|
||||||
|
*/
|
||||||
|
export type OpenTelemetryLlmInputSchemaType = z.infer<
|
||||||
|
typeof OpenTelemetryLlmInputSchema
|
||||||
|
>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration for OpenTelemetry exporters
|
||||||
|
*/
|
||||||
|
export interface OpenTelemetryExporterConfig {
|
||||||
|
url: string;
|
||||||
|
headers?: Record<string, string>;
|
||||||
|
timeout?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Supported OpenTelemetry backends
|
||||||
|
*/
|
||||||
|
export type OpenTelemetryBackend =
|
||||||
|
| 'jaeger'
|
||||||
|
| 'newrelic'
|
||||||
|
| 'grafana'
|
||||||
|
| 'datadog'
|
||||||
|
| 'honeycomb'
|
||||||
|
| 'lightstep'
|
||||||
|
| 'custom';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pre-configured settings for popular OpenTelemetry backends
|
||||||
|
*/
|
||||||
|
export const OpenTelemetryBackendConfigs: Record<
|
||||||
|
OpenTelemetryBackend,
|
||||||
|
Partial<OpenTelemetryExporterConfig>
|
||||||
|
> = {
|
||||||
|
jaeger: {
|
||||||
|
url: 'http://localhost:4318/v1/traces',
|
||||||
|
headers: {},
|
||||||
|
},
|
||||||
|
newrelic: {
|
||||||
|
url: 'https://otlp.nr-data.net:4318/v1/traces',
|
||||||
|
headers: {
|
||||||
|
'api-key': process.env.NEW_RELIC_LICENSE_KEY || '',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
grafana: {
|
||||||
|
url: 'https://otlp-gateway-prod-us-central-0.grafana.net/otlp/v1/traces',
|
||||||
|
headers: {
|
||||||
|
Authorization:
|
||||||
|
process.env.GRAFANA_INSTANCE_ID && process.env.GRAFANA_API_KEY
|
||||||
|
? `Basic ${Buffer.from(
|
||||||
|
`${process.env.GRAFANA_INSTANCE_ID}:${process.env.GRAFANA_API_KEY}`,
|
||||||
|
).toString('base64')}`
|
||||||
|
: '',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
datadog: {
|
||||||
|
url: 'https://api.datadoghq.com/api/v2/series',
|
||||||
|
headers: {
|
||||||
|
'DD-API-KEY': process.env.DD_API_KEY || '',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
honeycomb: {
|
||||||
|
url: 'https://api.honeycomb.io/v1/traces',
|
||||||
|
headers: {
|
||||||
|
'x-honeycomb-team': process.env.HONEYCOMB_API_KEY || '',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
lightstep: {
|
||||||
|
url: 'https://ingest.lightstep.com:443/api/v2/otel/trace',
|
||||||
|
headers: {
|
||||||
|
'lightstep-access-token': process.env.LIGHTSTEP_ACCESS_TOKEN || '',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
custom: {
|
||||||
|
url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT || 'http://localhost:4318',
|
||||||
|
headers: {},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Metric names used by the OpenTelemetry service
|
||||||
|
*/
|
||||||
|
export const OpenTelemetryMetrics = {
|
||||||
|
REQUESTS_TOTAL: 'llm.requests.total',
|
||||||
|
TOKENS_TOTAL: 'llm.tokens.total',
|
||||||
|
LATENCY_DURATION: 'llm.latency.duration',
|
||||||
|
REQUESTS_ACTIVE: 'llm.requests.active',
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trace attribute names used by the OpenTelemetry service
|
||||||
|
*/
|
||||||
|
export const OpenTelemetryAttributes = {
|
||||||
|
LLM_MODEL: 'llm.model',
|
||||||
|
LLM_PROVIDER: 'llm.provider',
|
||||||
|
LLM_USER_ID: 'llm.user_id',
|
||||||
|
LLM_OPERATION: 'llm.operation',
|
||||||
|
LLM_INPUT_TOKENS: 'llm.input_tokens',
|
||||||
|
LLM_OUTPUT_TOKENS: 'llm.output_tokens',
|
||||||
|
LLM_TOTAL_TOKENS: 'llm.total_tokens',
|
||||||
|
LLM_LATENCY_MS: 'llm.latency_ms',
|
||||||
|
LLM_HTTP_STATUS: 'llm.http_status',
|
||||||
|
LLM_BASE_URL: 'llm.base_url',
|
||||||
|
LLM_ERROR: 'llm.error',
|
||||||
|
LLM_ERROR_TYPE: 'llm.error_type',
|
||||||
|
LLM_INPUT: 'llm.input',
|
||||||
|
LLM_OUTPUT: 'llm.output',
|
||||||
|
LLM_MCP_TOOLS_USED: 'llm.mcp_tools_used',
|
||||||
|
TRACE_ID: 'trace.id',
|
||||||
|
} as const;
|
||||||
Reference in New Issue
Block a user