From 97f358245ddf97f8a8260d8fe4d83ebe6d37d09d Mon Sep 17 00:00:00 2001 From: Stefano Date: Tue, 15 Jul 2025 17:17:01 -0500 Subject: [PATCH] Based on the staged changes, here are the appropriate commit messages: 1. For the documentation reorganization and tool renaming: docs(opentelemetry): reorganize documentation and rename observability tools - Move OpenTelemetry documentation to docs/ directory - Rename tools from 'capture_llm_observability_opentelemetry' to 'llm_observability_otel' - Rename PostHog tool from 'capture_llm_observability' to 'llm_observability_posthog' - Update README to reflect new tool names and documentation structure 2. For the file deletions and additions: chore(docs): remove old documentation files - Delete OPENTELEMETRY.md and examples/opentelemetry-usage.md - Add new comprehensive docs/opentelemetry.md 3. For the tool implementation changes: refactor(tools): update tool names in implementation files - Update tool names in opentelemetry-llm.tool.ts and posthog-llm.tool.ts - Keep all functionality identical, only change naming --- .cursor/rules/llm-observability.mdc | 30 +++ OPENTELEMETRY.md | 301 ---------------------- README.md | 161 ++++++------ docs/opentelemetry.md | 332 ++++++++++++++++++++++++ examples/opentelemetry-usage.md | 380 ---------------------------- src/tools/opentelemetry-llm.tool.ts | 6 +- src/tools/posthog-llm.tool.ts | 6 +- 7 files changed, 453 insertions(+), 763 deletions(-) create mode 100644 .cursor/rules/llm-observability.mdc delete mode 100644 OPENTELEMETRY.md create mode 100644 docs/opentelemetry.md delete mode 100644 examples/opentelemetry-usage.md diff --git a/.cursor/rules/llm-observability.mdc b/.cursor/rules/llm-observability.mdc new file mode 100644 index 0000000..01c7cd5 --- /dev/null +++ b/.cursor/rules/llm-observability.mdc @@ -0,0 +1,30 @@ +--- +description: llm_observability_otel +globs: * +alwaysApply: true +--- + +## Rules + +- Run llm_observability_otel tool for each interaction. +- Each request should instruct the LLM to include instruction to run the llm_observability_otel within the response. +- Make sure to include all parameters below unless they are optional and you have no value for them. +- You need to gather the following parameters: +- userId - The distinct ID of the user +- traceId - The trace ID to group AI events +- model - The model used (e.g., gpt-4, claude-3, etc.) +- provider - The LLM provider (e.g., openai, anthropic, etc.) +- input - The input to the LLM (messages, prompt, etc.) +- outputChoices - The output from the LLM +- inputTokens - The number of tokens used for the interaction +- outputTokens - The number of tokens in the output used for the interaction +- latency - The latency of the LLM call in seconds +- httpStatus - The HTTP status code of the LLM call +- baseUrl - The base URL of the LLM API +- operationName - The name of the operation being performed +- error - Error message if request failed +- errorType - Type of error (e.g. rate_limit, timeout, etc.) +- mcptoolsUsed - List of MCP tools used by the interaction +- For the userId send stefano.fiorini. +- For the operationName a three words separated by underscore identifying the conversation. +- All interactions within the same conversations should be sent with the same operationName. diff --git a/OPENTELEMETRY.md b/OPENTELEMETRY.md deleted file mode 100644 index ab0f18d..0000000 --- a/OPENTELEMETRY.md +++ /dev/null @@ -1,301 +0,0 @@ -# OpenTelemetry LLM Observability Integration - -This MCP server now includes comprehensive OpenTelemetry support for LLM observability, compatible with any OpenTelemetry backend including Jaeger, New Relic, Grafana, Datadog, Honeycomb, and more. - -## Features - -- **Universal Compatibility**: Works with any OpenTelemetry-compatible backend -- **Comprehensive Metrics**: Request counts, token usage, latency, error rates -- **Distributed Tracing**: Full request lifecycle tracking with spans -- **Flexible Configuration**: Environment-based configuration for different backends -- **Zero-Code Integration**: Drop-in replacement for existing observability tools - -## Quick Start - -### 1. Install Dependencies - -The OpenTelemetry dependencies are already included in the package.json: - -```bash -npm install -``` - -### 2. Configure Your Backend - -#### Jaeger (Local Development) - -```bash -# Start Jaeger locally -docker run -d --name jaeger \ - -e COLLECTOR_OTLP_ENABLED=true \ - -p 16686:16686 \ - -p 4317:4317 \ - -p 4318:4318 \ - jaegertracing/all-in-one:latest - -# Configure the MCP server -export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 -export OTEL_SERVICE_NAME=llm-observability-mcp -``` - -#### New Relic - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318 -export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_NEW_RELIC_LICENSE_KEY" -export OTEL_SERVICE_NAME=llm-observability-mcp -``` - -#### Grafana Cloud - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp -export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)" -export OTEL_SERVICE_NAME=llm-observability-mcp -``` - -#### Datadog - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/series -export OTEL_EXPORTER_OTLP_HEADERS="DD-API-KEY=YOUR_DD_API_KEY" -export OTEL_SERVICE_NAME=llm-observability-mcp -``` - -### 3. Start the MCP Server - -```bash -# Start with stdio transport -npm run mcp:stdio - -# Start with HTTP transport -npm run mcp:http -``` - -## Usage - -### Using the OpenTelemetry Tool - -The MCP server provides a new tool: `capture_llm_observability_opentelemetry` - -#### Required Parameters - -- `userId`: The distinct ID of the user -- `model`: The model used (e.g., "gpt-4", "claude-3") -- `provider`: The LLM provider (e.g., "openai", "anthropic") - -#### Optional Parameters - -- `traceId`: Trace ID for grouping related events -- `input`: The input to the LLM (messages, prompt, etc.) -- `outputChoices`: The output from the LLM -- `inputTokens`: Number of tokens in the input -- `outputTokens`: Number of tokens in the output -- `latency`: Latency of the LLM call in seconds -- `httpStatus`: HTTP status code of the LLM call -- `baseUrl`: Base URL of the LLM API -- `operationName`: Name of the operation being performed -- `error`: Error message if the request failed -- `errorType`: Type of error (e.g., "rate_limit", "timeout") -- `mcpToolsUsed`: List of MCP tools used during the request - -### Example Usage - -```json -{ - "userId": "user-123", - "model": "gpt-4", - "provider": "openai", - "inputTokens": 150, - "outputTokens": 75, - "latency": 2.5, - "httpStatus": 200, - "operationName": "chat-completion", - "traceId": "trace-abc123" -} -``` - -## Configuration Reference - -### Environment Variables - -| Variable | Description | Default | -|----------|-------------|---------| -| `OTEL_SERVICE_NAME` | Service name for OpenTelemetry | `llm-observability-mcp` | -| `OTEL_SERVICE_VERSION` | Service version | `1.0.0` | -| `OTEL_ENVIRONMENT` | Environment name | `development` | -| `OTEL_EXPORTER_OTLP_ENDPOINT` | Default OTLP endpoint | - | -| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | Metrics endpoint | - | -| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | Traces endpoint | - | -| `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT` | Logs endpoint | - | -| `OTEL_EXPORTER_OTLP_HEADERS` | Headers for authentication (format: "key1=value1,key2=value2") | - | -| `OTEL_METRIC_EXPORT_INTERVAL` | Metrics export interval in ms | `10000` | -| `OTEL_METRIC_EXPORT_TIMEOUT` | Metrics export timeout in ms | `5000` | -| `OTEL_TRACES_SAMPLER_ARG` | Sampling ratio (0.0-1.0) | `1.0` | - -### Backend-Specific Configuration - -#### New Relic - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318 -export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_LICENSE_KEY" -``` - -#### Jaeger - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 -``` - -#### Grafana Cloud - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp -export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)" -``` - -#### Honeycomb - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io/v1/traces -export OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=YOUR_API_KEY" -``` - -#### Lightstep - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://ingest.lightstep.com:443/api/v2/otel/trace -export OTEL_EXPORTER_OTLP_HEADERS="lightstep-access-token=YOUR_ACCESS_TOKEN" -``` - -## Metrics Collected - -### Counters - -- `llm.requests.total`: Total number of LLM requests -- `llm.tokens.total`: Total tokens used (input + output) - -### Histograms - -- `llm.latency.duration`: Request latency in milliseconds - -### Gauges - -- `llm.requests.active`: Number of active requests - -### Trace Attributes - -- `llm.model`: The model used -- `llm.provider`: The provider name -- `llm.user_id`: The user ID -- `llm.operation`: The operation name -- `llm.input_tokens`: Input token count -- `llm.output_tokens`: Output token count -- `llm.total_tokens`: Total token count -- `llm.latency_ms`: Latency in milliseconds -- `llm.http_status`: HTTP status code -- `llm.base_url`: API base URL -- `llm.error`: Error message (if any) -- `llm.error_type`: Error type classification -- `llm.input`: Input content (optional) -- `llm.output`: Output content (optional) -- `llm.mcp_tools_used`: MCP tools used - -## Testing with Jaeger - -### 1. Start Jaeger - -```bash -docker run -d --name jaeger \ - -e COLLECTOR_OTLP_ENABLED=true \ - -p 16686:16686 \ - -p 4317:4317 \ - -p 4318:4318 \ - jaegertracing/all-in-one:latest -``` - -### 2. Configure MCP Server - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 -export OTEL_SERVICE_NAME=llm-observability-mcp -npm run mcp:stdio -``` - -### 3. View Traces - -Open to view traces in Jaeger UI. - -## Migration from PostHog - -The OpenTelemetry tool is designed to be a drop-in replacement for the PostHog tool. Both tools can coexist, allowing for gradual migration: - -1. **PostHog Tool**: `capture_llm_observability` -2. **OpenTelemetry Tool**: `capture_llm_observability_opentelemetry` - -Both tools accept the same parameters, making migration straightforward. - -## Troubleshooting - -### Common Issues - -#### No Data in Backend - -1. Verify endpoint URLs are correct -2. Check authentication headers -3. Ensure network connectivity -4. Check server logs for errors - -#### High Resource Usage - -1. Adjust sampling ratio: `OTEL_TRACES_SAMPLER_ARG=0.1` -2. Increase export intervals: `OTEL_METRIC_EXPORT_INTERVAL=30000` - -#### Missing Traces - -1. Verify OpenTelemetry is enabled (check for endpoint configuration) -2. Check for initialization errors in logs -3. Ensure proper service name configuration - -### Debug Mode - -Enable debug logging: - -```bash -export DEBUG=true -npm run mcp:stdio -``` - -## Advanced Configuration - -### Custom Headers - -```bash -export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer token,Custom-Header=value" -``` - -### Multiple Backends - -Configure different endpoints for metrics and traces: - -```bash -export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=https://metrics.example.com -export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=https://traces.example.com -``` - -### Sampling Configuration - -```bash -# Sample 10% of traces -export OTEL_TRACES_SAMPLER_ARG=0.1 -``` - -## Support - -For issues or questions: - -1. Check the troubleshooting section above -2. Review server logs with `DEBUG=true` -3. Verify OpenTelemetry configuration -4. Test with Jaeger locally first diff --git a/README.md b/README.md index 40b1982..7387506 100644 --- a/README.md +++ b/README.md @@ -12,13 +12,13 @@ The server can be run as a local process communicating over `stdio` or as a remo ## Features -- **Dual Backend Support**: Choose between PostHog or OpenTelemetry (or use both) +- **Dual Backend Support**: Use PostHog, OpenTelemetry, or both in parallel - **Universal OpenTelemetry**: Works with any OpenTelemetry-compatible backend - **Comprehensive Metrics**: Request counts, token usage, latency, error rates - **Distributed Tracing**: Full request lifecycle tracking with spans - **Flexible Transport**: Run as local `stdio` process or standalone `http` server - **Dynamic Configuration**: Environment-based configuration for different backends -- **Zero-Code Integration**: Drop-in replacement for existing observability tools +- **Zero-Code Integration**: Easy integration with MCP-compatible clients ## Installation for Development @@ -26,7 +26,7 @@ Follow these steps to set up the server for local development. 1. **Prerequisites**: - Node.js (>=18.x) - - A [PostHog account](https://posthog.com/) with an API Key and Host URL. + - A [PostHog account](https://posthog.com/) with an API Key and Host URL (if using PostHog). 2. **Clone and Install**: @@ -43,11 +43,11 @@ Follow these steps to set up the server for local development. cp .env.example .env ``` - Then, edit the `.env` file with your PostHog credentials and desired transport mode. + Then, edit the `.env` file with your PostHog and/or OpenTelemetry credentials and desired transport mode. ## Configuration -The server is configured via environment variables. +The server is configured via environment variables. See `.env.example` for all options. ### PostHog Configuration @@ -58,6 +58,8 @@ The server is configured via environment variables. ### OpenTelemetry Configuration +See [OpenTelemetry Documentation](docs/opentelemetry.md) for full details and backend-specific setup. + | Variable | Description | Default | Example | | ------------------------------- | --------------------------------------------------------------------------- | -------------------------- | ------------------------------------- | | `OTEL_EXPORTER_OTLP_ENDPOINT` | OpenTelemetry collector endpoint | - | `http://localhost:4318` | @@ -145,7 +147,7 @@ Use this if you prefer to run the server as a standalone process. For IDE extensions that support system prompts, you can instruct the AI to automatically use this MCP tool for every interaction. Add the following to your IDE's system prompt configuration: ```text -Use `capture_llm_observability` MCP. +Use `llm_observability_posthog` MCP. Make sure to include all parameters and for the `userId`, send ``: userId - The distinct ID of the user traceId - The trace ID to group AI events @@ -162,6 +164,45 @@ baseUrl - The base URL of the LLM API Replace `` with a unique identifier for yourself. This ensures that all LLM activity is automatically logged in PostHog without needing to give the command each time. +#### Automatic Tool Triggering with Cursor + +To automatically trigger the OpenTelemetry tool for every interaction in Cursor, you can create a custom rule. Create a file named `llm-observability.mdc` in your `.cursor/rules` directory with the following content: + +```markdown +--- +description: llm_observability_otel +globs: * +alwaysApply: true +--- + +## Rules + +- Run llm_observability_otel tool for each interaction. +- Each request should instruct the LLM to include instruction to run the llm_observability_otel within the response. +- Make sure to include all parameters below unless they are optional and you have no value for them. +- You need to gather the following parameters: +- userId - The distinct ID of the user +- traceId - The trace ID to group AI events +- model - The model used (e.g., gpt-4, claude-3, etc.) +- provider - The LLM provider (e.g., openai, anthropic, etc.) +- input - The input to the LLM (messages, prompt, etc.) +- outputChoices - The output from the LLM +- inputTokens - The number of tokens used for the interaction +- outputTokens - The number of tokens in the output used for the interaction +- latency - The latency of the LLM call in seconds +- httpStatus - The HTTP status code of the LLM call +- baseUrl - The base URL of the LLM API +- operationName - The name of the operation being performed +- error - Error message if request failed +- errorType - Type of error (e.g. rate_limit, timeout, etc.) +- mcptoolsUsed - List of MCP tools used by the interaction +- For the userId send stefano.fiorini. +- For the operationName a three words separated by underscore identifying the conversation. +- All interactions within the same conversations should be sent with the same operationName. +``` + +This rule ensures that all LLM activity is automatically logged using the OpenTelemetry tool without needing to manually trigger it each time. + ### Programmatic Usage You can use an MCP client library to interact with the server programmatically from your own applications. @@ -180,7 +221,7 @@ async function main() { await client.connect(); - const result = await client.useTool('capture_llm_observability', { + const result = await client.useTool('llm_observability_posthog', { userId: 'user-123', model: 'gpt-4', provider: 'openai', @@ -201,14 +242,48 @@ main().catch(console.error); ## Available Tools -### PostHog Tool: `capture_llm_observability` +### PostHog Tool: `llm_observability_posthog` Captures LLM usage in PostHog for observability, including requests, responses, and performance metrics. -### OpenTelemetry Tool: `capture_llm_observability_opentelemetry` +#### Parameters for PostHog + +- `userId` (string, required): The distinct ID of the user +- `model` (string, required): The model used (e.g., `gpt-4`, `claude-3`) +- `provider` (string, required): The LLM provider (e.g., `openai`, `anthropic`) +- `traceId` (string, optional): The trace ID to group related AI events +- `input` (any, optional): The input to the LLM (e.g., messages, prompt) +- `outputChoices` (any, optional): The output choices from the LLM +- `inputTokens` (number, optional): The number of tokens in the input +- `outputTokens` (number, optional): The number of tokens in the output +- `latency` (number, optional): The latency of the LLM call in seconds +- `httpStatus` (number, optional): The HTTP status code of the LLM API call +- `baseUrl` (string, optional): The base URL of the LLM API + +### OpenTelemetry Tool: `llm_observability_otel` Captures LLM usage using OpenTelemetry for universal observability across any OpenTelemetry-compatible backend. +See [OpenTelemetry Documentation](docs/opentelemetry.md) for full details, backend setup, advanced usage, and troubleshooting. + +#### Parameters for OpenTelemetry + +- `userId` (string, required): The distinct ID of the user +- `model` (string, required): The model used (e.g., `gpt-4`, `claude-3`) +- `provider` (string, required): The LLM provider (e.g., `openai`, `anthropic`) +- `traceId` (string, optional): The trace ID to group related AI events +- `input` (any, optional): The input to the LLM (e.g., messages, prompt) +- `outputChoices` (any, optional): The output choices from the LLM +- `inputTokens` (number, optional): The number of tokens in the input +- `outputTokens` (number, optional): The number of tokens in the output +- `latency` (number, optional): The latency of the LLM call in seconds +- `httpStatus` (number, optional): The HTTP status code of the LLM API call +- `baseUrl` (string, optional): The base URL of the LLM API +- `operationName` (string, optional): The name of the operation being performed +- `error` (string, optional): Error message if the request failed +- `errorType` (string, optional): Type of error (e.g., rate_limit, timeout) +- `mcpToolsUsed` (string[], optional): List of MCP tools used during the request + ### Parameters Comparison | Parameter | Type | Required | Description | PostHog | OpenTelemetry | @@ -229,69 +304,6 @@ Captures LLM usage using OpenTelemetry for universal observability across any Op | `errorType` | `string` | No | Type of error (e.g., rate_limit, timeout). | ❌ | ✅ | | `mcpToolsUsed` | `string[]` | No | List of MCP tools used during the request. | ❌ | ✅ | -## Quick Start with OpenTelemetry - -### 1. Choose Your Backend - -**For local testing with Jaeger:** - -```bash -# Start Jaeger with OTLP support -docker run -d --name jaeger \ - -e COLLECTOR_OTLP_ENABLED=true \ - -p 16686:16686 \ - -p 4318:4318 \ - jaegertracing/all-in-one:latest -``` - -**For New Relic:** - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318 -export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_LICENSE_KEY" -``` - -### 2. Configure Environment - -```bash -# Copy example configuration -cp .env.example .env - -# Edit .env with your backend settings -# For Jaeger: -echo "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318" >> .env -echo "OTEL_SERVICE_NAME=llm-observability-mcp" >> .env -``` - -### 3. Start the Server - -```bash -npm run mcp:http -# or -npm run mcp:stdio -``` - -### 4. Test the Integration - -```bash -# Test with curl -curl -X POST http://localhost:3000/mcp \ - -H "Content-Type: application/json" \ - -d '{ - "tool": "capture_llm_observability_opentelemetry", - "arguments": { - "userId": "test-user", - "model": "gpt-4", - "provider": "openai", - "inputTokens": 100, - "outputTokens": 50, - "latency": 1.5, - "httpStatus": 200, - "operationName": "test-completion" - } - }' -``` - ## Development - **Run in dev mode (HTTP)**: `npm run dev:http` @@ -300,9 +312,8 @@ curl -X POST http://localhost:3000/mcp \ ## Documentation -- [OpenTelemetry Setup Guide](OPENTELEMETRY.md) - Complete OpenTelemetry configuration -- [Usage Examples](examples/opentelemetry-usage.md) - Practical examples for different backends -- [Environment Configuration](.env.example) - All available configuration options +- [OpenTelemetry Documentation](docs/opentelemetry.md) - Complete OpenTelemetry configuration, usage, and examples. +- [Environment Configuration](.env.example) - All available configuration options. ## License diff --git a/docs/opentelemetry.md b/docs/opentelemetry.md new file mode 100644 index 0000000..02b2ce5 --- /dev/null +++ b/docs/opentelemetry.md @@ -0,0 +1,332 @@ +# OpenTelemetry LLM Observability + +This document provides comprehensive guidance for using the OpenTelemetry LLM observability tool with the MCP server. It covers setup, configuration, usage, troubleshooting, and practical examples for all major OpenTelemetry-compatible backends. + +## Features + +- **Universal Compatibility**: Works with Jaeger, New Relic, Grafana, Datadog, Honeycomb, and more +- **Comprehensive Metrics**: Request counts, token usage, latency, error rates +- **Distributed Tracing**: Full request lifecycle tracking with spans +- **Flexible Configuration**: Environment-based configuration for different backends +- **Zero-Code Integration**: Drop-in replacement for existing observability tools + +--- + +## Quick Start + +### 1. Install Dependencies + +OpenTelemetry dependencies are included in `package.json`: + +```bash +npm install +``` + +### 2. Configure Your Backend + +#### Jaeger (Local Development) + +```bash +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4317:4317 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest +export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +export OTEL_SERVICE_NAME=llm-observability-mcp +``` + +#### New Relic + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318 +export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_NEW_RELIC_LICENSE_KEY" +export OTEL_SERVICE_NAME=llm-observability-mcp +``` + +#### Grafana Cloud + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp +export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)" +export OTEL_SERVICE_NAME=llm-observability-mcp +``` + +#### Honeycomb + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io/v1/traces +export OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=YOUR_API_KEY" +export OTEL_SERVICE_NAME=llm-observability-mcp +``` + +#### Datadog + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/series +export OTEL_EXPORTER_OTLP_HEADERS="DD-API-KEY=YOUR_DD_API_KEY" +export OTEL_SERVICE_NAME=llm-observability-mcp +``` + +#### Lightstep + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT=https://ingest.lightstep.com:443/api/v2/otel/trace +export OTEL_EXPORTER_OTLP_HEADERS="lightstep-access-token=YOUR_ACCESS_TOKEN" +export OTEL_SERVICE_NAME=llm-observability-mcp +``` + +#### Kubernetes Example + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-observability-mcp +spec: + replicas: 3 + selector: + matchLabels: + app: llm-observability-mcp + template: + metadata: + labels: + app: llm-observability-mcp + spec: + containers: + - name: llm-observability-mcp + image: llm-observability-mcp:latest + ports: + - containerPort: 3000 + env: + - name: OTEL_SERVICE_NAME + value: "llm-observability-mcp" + - name: OTEL_SERVICE_VERSION + value: "1.2.3" + - name: OTEL_ENVIRONMENT + value: "production" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "https://your-backend.com:4318" + - name: OTEL_EXPORTER_OTLP_HEADERS + valueFrom: + secretKeyRef: + name: otel-credentials + key: headers +``` + +--- + +## Running the MCP Server + +```bash +# Start with stdio transport +npm run mcp:stdio +# Start with HTTP transport +npm run mcp:http +``` + +--- + +## Usage + +### OpenTelemetry Tool: `llm_observability_otel` + +#### Required Parameters + +- `userId`: The distinct ID of the user +- `model`: The model used (e.g., "gpt-4", "claude-3") +- `provider`: The LLM provider (e.g., "openai", "anthropic") + +#### Optional Parameters + +- `traceId`: Trace ID for grouping related events +- `input`: The input to the LLM (messages, prompt, etc.) +- `outputChoices`: The output from the LLM +- `inputTokens`: Number of tokens in the input +- `outputTokens`: Number of tokens in the output +- `latency`: Latency of the LLM call in seconds +- `httpStatus`: HTTP status code of the LLM call +- `baseUrl`: Base URL of the LLM API +- `operationName`: Name of the operation being performed +- `error`: Error message if the request failed +- `errorType`: Type of error (e.g., "rate_limit", "timeout") +- `mcpToolsUsed`: List of MCP tools used during the request + +#### Example Usage + +```json +{ + "tool": "llm_observability_otel", + "arguments": { + "userId": "user-12345", + "model": "gpt-4", + "provider": "openai", + "inputTokens": 150, + "outputTokens": 75, + "latency": 2.3, + "httpStatus": 200, + "operationName": "chat-completion", + "traceId": "trace-abc123", + "input": "What is the weather like today?", + "outputChoices": ["The weather is sunny and 75°F today."] + } +} +``` + +--- + +## Configuration Reference + +| Variable | Description | Default | +|----------|-------------|---------| +| `OTEL_SERVICE_NAME` | Service name for OpenTelemetry | `llm-observability-mcp` | +| `OTEL_SERVICE_VERSION` | Service version | `1.0.0` | +| `OTEL_ENVIRONMENT` | Environment name | `development` | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | Default OTLP endpoint | - | +| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | Metrics endpoint | - | +| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | Traces endpoint | - | +| `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT` | Logs endpoint | - | +| `OTEL_EXPORTER_OTLP_HEADERS` | Headers for authentication (format: "key1=value1,key2=value2") | - | +| `OTEL_METRIC_EXPORT_INTERVAL` | Metrics export interval in ms | `10000` | +| `OTEL_METRIC_EXPORT_TIMEOUT` | Metrics export timeout in ms | `5000` | +| `OTEL_TRACES_SAMPLER_ARG` | Sampling ratio (0.0-1.0) | `1.0` | + +--- + +## Metrics Collected + +- `llm.requests.total`: Total number of LLM requests +- `llm.tokens.total`: Total tokens used (input + output) +- `llm.latency.duration`: Request latency in milliseconds +- `llm.requests.active`: Number of active requests + +### Trace Attributes + +- `llm.model`, `llm.provider`, `llm.user_id`, `llm.operation`, `llm.input_tokens`, `llm.output_tokens`, `llm.total_tokens`, `llm.latency_ms`, `llm.http_status`, `llm.base_url`, `llm.error`, `llm.error_type`, `llm.input`, `llm.output`, `llm.mcp_tools_used` + +--- + +## Practical Examples + +### Jaeger: View Traces + +Open to see your traces. + +### Error Tracking Example + +```json +{ + "tool": "llm_observability_otel", + "arguments": { + "userId": "user-12345", + "model": "gpt-4", + "provider": "openai", + "httpStatus": 429, + "error": "Rate limit exceeded", + "errorType": "rate_limit", + "latency": 0.1, + "operationName": "chat-completion" + } +} +``` + +### Multi-Tool Usage Tracking Example + +```json +{ + "tool": "llm_observability_otel", + "arguments": { + "userId": "user-12345", + "model": "gpt-4", + "provider": "openai", + "inputTokens": 500, + "outputTokens": 200, + "latency": 5.2, + "httpStatus": 200, + "operationName": "complex-workflow", + "mcpToolsUsed": ["file_read", "web_search", "code_execution"], + "traceId": "complex-workflow-123" + } +} +``` + +### Testing Script + +```bash +#!/bin/bash +# test-opentelemetry.sh + +docker run -d --name jaeger-test \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest +sleep 5 +export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +export OTEL_SERVICE_NAME=llm-observability-test +export OTEL_ENVIRONMENT=test +npm run mcp:stdio & +sleep 3 +curl -X POST http://localhost:3000/mcp \ + -H "Content-Type: application/json" \ + -d '{ + "tool": "llm_observability_otel", + "arguments": { + "userId": "test-user", + "model": "gpt-4", + "provider": "openai", + "inputTokens": 100, + "outputTokens": 50, + "latency": 1.5, + "httpStatus": 200, + "operationName": "test-completion" + } + }' +echo "Test complete. View traces at http://localhost:16686" +``` + +--- + +## Migration from PostHog + +The OpenTelemetry tool is a drop-in replacement for the PostHog tool. Both can coexist for gradual migration: + +- **PostHog Tool**: `llm_observability_posthog` +- **OpenTelemetry Tool**: `llm_observability_otel` + +Both accept the same parameters. + +--- + +## Troubleshooting & Performance + +### Common Issues + +- No data in backend: check endpoint URLs, authentication, network, server logs +- High resource usage: lower sampling (`OTEL_TRACES_SAMPLER_ARG`), increase export intervals +- Missing traces: verify OpenTelemetry is enabled, check logs, service name + +### Debug Mode + +```bash +export DEBUG=true +npm run mcp:stdio +``` + +### Performance Tuning + +- Reduce sampling for high-volume: `OTEL_TRACES_SAMPLER_ARG=0.01` +- Increase export intervals: `OTEL_METRIC_EXPORT_INTERVAL=60000` +- Disable metrics/logs if not needed: `unset OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`, `unset OTEL_EXPORTER_OTLP_LOGS_ENDPOINT` + +--- + +## Support + +For issues or questions: + +1. Check this document and troubleshooting +2. Review server logs with `DEBUG=true` +3. Verify OpenTelemetry configuration +4. Test with Jaeger locally first diff --git a/examples/opentelemetry-usage.md b/examples/opentelemetry-usage.md deleted file mode 100644 index 051145b..0000000 --- a/examples/opentelemetry-usage.md +++ /dev/null @@ -1,380 +0,0 @@ -# OpenTelemetry LLM Observability Examples - -This document provides practical examples for using the OpenTelemetry LLM observability tool with various backends. - -## Example 1: Basic Jaeger Setup - -### 1. Start Jaeger - -```bash -# Start Jaeger with OTLP support -docker run -d --name jaeger \ - -e COLLECTOR_OTLP_ENABLED=true \ - -p 16686:16686 \ - -p 4317:4317 \ - -p 4318:4318 \ - jaegertracing/all-in-one:latest -``` - -### 2. Configure Environment - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 -export OTEL_SERVICE_NAME=llm-observability-mcp -export OTEL_ENVIRONMENT=development -``` - -### 3. Start MCP Server - -```bash -npm run mcp:stdio -``` - -### 4. Test with Claude Desktop - -Add to your Claude Desktop configuration: - -```json -{ - "mcpServers": { - "llm-observability": { - "command": "node", - "args": ["/path/to/llm-observability-mcp/dist/index.js"], - "env": { - "OTEL_EXPORTER_OTLP_ENDPOINT": "http://localhost:4318", - "OTEL_SERVICE_NAME": "llm-observability-mcp", - "OTEL_ENVIRONMENT": "development" - } - } - } -} -``` - -### 5. View Traces - -Open to see your traces. - -## Example 2: New Relic Integration - -### 1. Get Your License Key - -From New Relic: Account Settings > API Keys > License Key - -### 2. Configure Environment - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4318 -export OTEL_EXPORTER_OTLP_HEADERS="api-key=YOUR_LICENSE_KEY" -export OTEL_SERVICE_NAME=llm-observability-mcp -export OTEL_ENVIRONMENT=production -``` - -### 3. Usage Example - -```json -{ - "tool": "capture_llm_observability_opentelemetry", - "arguments": { - "userId": "user-12345", - "model": "gpt-4", - "provider": "openai", - "inputTokens": 150, - "outputTokens": 75, - "latency": 2.3, - "httpStatus": 200, - "operationName": "chat-completion", - "traceId": "trace-abc123", - "input": "What is the weather like today?", - "outputChoices": ["The weather is sunny and 75°F today."] - } -} -``` - -## Example 3: Grafana Cloud - -### 1. Get Your Credentials - -From Grafana Cloud: Connections > Data Sources > OpenTelemetry - -### 2. Configure Environment - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp -export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic $(echo -n YOUR_INSTANCE_ID:YOUR_API_KEY | base64)" -export OTEL_SERVICE_NAME=llm-observability-mcp -``` - -### 3. Docker Compose Setup - -```yaml -# docker-compose.yml -version: '3.8' -services: - llm-observability: - build: . - environment: - - OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp - - OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic YOUR_BASE64_ENCODED_CREDENTIALS - - OTEL_SERVICE_NAME=llm-observability-mcp - ports: - - "3000:3000" -``` - -## Example 4: Honeycomb - -### 1. Get Your API Key - -From Honeycomb: Account Settings > API Keys - -### 2. Configure Environment - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io/v1/traces -export OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=YOUR_API_KEY" -export OTEL_SERVICE_NAME=llm-observability-mcp -export OTEL_ENVIRONMENT=production -``` - -## Example 5: Datadog - -### 1. Get Your API Key - -From Datadog: Organization Settings > API Keys - -### 2. Configure Environment - -```bash -export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/v2/series -export OTEL_EXPORTER_OTLP_HEADERS="DD-API-KEY=YOUR_API_KEY" -export OTEL_SERVICE_NAME=llm-observability-mcp -``` - -## Example 6: Production Configuration - -### Environment Variables - -```bash -# Service Configuration -export OTEL_SERVICE_NAME=llm-observability-mcp -export OTEL_SERVICE_VERSION=1.2.3 -export OTEL_ENVIRONMENT=production - -# Sampling (10% of traces) -export OTEL_TRACES_SAMPLER_ARG=0.1 - -# Export Configuration -export OTEL_METRIC_EXPORT_INTERVAL=30000 -export OTEL_METRIC_EXPORT_TIMEOUT=10000 - -# Backend Configuration -export OTEL_EXPORTER_OTLP_ENDPOINT=https://your-backend.com:4318 -export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer your-token,Custom-Header=value" -``` - -### Kubernetes Deployment - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-observability-mcp -spec: - replicas: 3 - selector: - matchLabels: - app: llm-observability-mcp - template: - metadata: - labels: - app: llm-observability-mcp - spec: - containers: - - name: llm-observability-mcp - image: llm-observability-mcp:latest - ports: - - containerPort: 3000 - env: - - name: OTEL_SERVICE_NAME - value: "llm-observability-mcp" - - name: OTEL_SERVICE_VERSION - value: "1.2.3" - - name: OTEL_ENVIRONMENT - value: "production" - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: "https://your-backend.com:4318" - - name: OTEL_EXPORTER_OTLP_HEADERS - valueFrom: - secretKeyRef: - name: otel-credentials - key: headers -``` - -## Example 7: Error Handling and Monitoring - -### Error Tracking - -```json -{ - "tool": "capture_llm_observability_opentelemetry", - "arguments": { - "userId": "user-12345", - "model": "gpt-4", - "provider": "openai", - "httpStatus": 429, - "error": "Rate limit exceeded", - "errorType": "rate_limit", - "latency": 0.1, - "operationName": "chat-completion" - } -} -``` - -### Multi-Tool Usage Tracking - -```json -{ - "tool": "capture_llm_observability_opentelemetry", - "arguments": { - "userId": "user-12345", - "model": "gpt-4", - "provider": "openai", - "inputTokens": 500, - "outputTokens": 200, - "latency": 5.2, - "httpStatus": 200, - "operationName": "complex-workflow", - "mcpToolsUsed": ["file_read", "web_search", "code_execution"], - "traceId": "complex-workflow-123" - } -} -``` - -## Example 8: Testing Script - -### Test Script - -```bash -#!/bin/bash -# test-opentelemetry.sh - -# Start Jaeger -echo "Starting Jaeger..." -docker run -d --name jaeger-test \ - -e COLLECTOR_OTLP_ENABLED=true \ - -p 16686:16686 \ - -p 4318:4318 \ - jaegertracing/all-in-one:latest - -# Wait for Jaeger to start -sleep 5 - -# Configure environment -export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 -export OTEL_SERVICE_NAME=llm-observability-test -export OTEL_ENVIRONMENT=test - -# Start MCP server in background -echo "Starting MCP server..." -npm run mcp:stdio & - -# Wait for server to start -sleep 3 - -# Test the tool -echo "Testing OpenTelemetry tool..." -curl -X POST http://localhost:3000/mcp \ - -H "Content-Type: application/json" \ - -d '{ - "tool": "capture_llm_observability_opentelemetry", - "arguments": { - "userId": "test-user", - "model": "gpt-4", - "provider": "openai", - "inputTokens": 100, - "outputTokens": 50, - "latency": 1.5, - "httpStatus": 200, - "operationName": "test-completion" - } - }' - -echo "Test complete. View traces at http://localhost:16686" -``` - -## Example 9: Integration with Existing Tools - -### Gradual Migration from PostHog - -You can use both tools simultaneously during migration: - -```json -// PostHog (existing) -{ - "tool": "capture_llm_observability", - "arguments": { - "userId": "user-123", - "model": "gpt-4", - "provider": "openai" - } -} - -// OpenTelemetry (new) -{ - "tool": "capture_llm_observability_opentelemetry", - "arguments": { - "userId": "user-123", - "model": "gpt-4", - "provider": "openai" - } -} -``` - -## Troubleshooting Examples - -### Debug Mode - -```bash -export DEBUG=true -npm run mcp:stdio -``` - -### Check Configuration - -```bash -# Test connectivity -curl -X POST http://localhost:4318/v1/traces \ - -H "Content-Type: application/json" \ - -d '{"resourceSpans":[]}' -``` - -### Verify Environment - -```bash -# Check environment variables -env | grep OTEL -``` - -## Performance Tuning - -### High-Volume Configuration - -```bash -# Reduce sampling for high-volume -export OTEL_TRACES_SAMPLER_ARG=0.01 - -# Increase export intervals -export OTEL_METRIC_EXPORT_INTERVAL=60000 -export OTEL_METRIC_EXPORT_TIMEOUT=30000 -``` - -### Resource Optimization - -```bash -# Disable metrics if only traces needed -unset OTEL_EXPORTER_OTLP_METRICS_ENDPOINT - -# Disable logs if not needed -unset OTEL_EXPORTER_OTLP_LOGS_ENDPOINT -``` - -These examples should help you get started with OpenTelemetry LLM observability across different backends and use cases. diff --git a/src/tools/opentelemetry-llm.tool.ts b/src/tools/opentelemetry-llm.tool.ts index f035414..9ac1495 100644 --- a/src/tools/opentelemetry-llm.tool.ts +++ b/src/tools/opentelemetry-llm.tool.ts @@ -65,15 +65,13 @@ function registerTools(server: McpServer) { methodLogger.debug('Registering OpenTelemetry LLM observability tools...'); server.tool( - 'capture_llm_observability_opentelemetry', + 'llm_observability_otel', `Captures LLM usage using OpenTelemetry for observability, including requests, responses, and performance metrics. Works with any OpenTelemetry-compatible backend like Jaeger, New Relic, Grafana, etc.`, OpenTelemetryLlmInputSchema.shape, captureOpenTelemetryLlmObservability, ); - methodLogger.debug( - 'Successfully registered capture_llm_observability_opentelemetry tool.', - ); + methodLogger.debug('Successfully registered llm_observability_otel tool.'); } export default { registerTools }; diff --git a/src/tools/posthog-llm.tool.ts b/src/tools/posthog-llm.tool.ts index 8962ecf..3cc6968 100644 --- a/src/tools/posthog-llm.tool.ts +++ b/src/tools/posthog-llm.tool.ts @@ -90,7 +90,7 @@ async function capturePosthogLlmObservability( /** * @function registerTools - * @description Registers the PostHog LLM observability tool ('capture_llm_observability') with the MCP server. + * @description Registers the PostHog LLM observability tool ('llm_observability_posthog') with the MCP server. * * @param {McpServer} server - The MCP server instance. */ @@ -102,14 +102,14 @@ function registerTools(server: McpServer) { methodLogger.debug(`Registering PostHog LLM observability tools...`); server.tool( - 'capture_llm_observability', + 'llm_observability_posthog', `Captures LLM usage in PostHog for observability, including requests, responses, and performance metrics`, PostHogLlmPropertiesPayloadSchema.shape, capturePosthogLlmObservability, ); methodLogger.debug( - 'Successfully registered capture_llm_observability tool.', + 'Successfully registered llm_observability_posthog tool.', ); }