From 37f0c4b643a2c21c58dfd09ec3f7558c24084219 Mon Sep 17 00:00:00 2001 From: Stefano Date: Mon, 30 Jun 2025 12:01:00 -0500 Subject: [PATCH] Added model and role translation. Rewrite of code's comments. --- .gitignore | 5 +- src/chatwrapper.ts | 131 ++++++++++++++++++++++++++++++++++++--------- src/config.ts | 17 ++++++ src/mapper.ts | 122 ++++++++++++++++++++++++++++------------- src/remoteimage.ts | 14 +++++ src/server.ts | 105 ++++++++++++++++++++++-------------- src/types.ts | 94 ++++++++++++++++++++++++++++---- 7 files changed, 379 insertions(+), 109 deletions(-) diff --git a/.gitignore b/.gitignore index e4abed4..1eed5b3 100644 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,7 @@ profile/ dist/ # Environment variables -.env \ No newline at end of file +.env + +# Roo Modes +.roomodes \ No newline at end of file diff --git a/src/chatwrapper.ts b/src/chatwrapper.ts index d98d434..41402d1 100644 --- a/src/chatwrapper.ts +++ b/src/chatwrapper.ts @@ -1,4 +1,7 @@ -// src/chatwrapper.ts +/** + * @fileoverview This file provides a wrapper around the Gemini API, handling + * content generation, model management, and retry logic. + */ import { AuthType, createContentGeneratorConfig, @@ -14,35 +17,86 @@ import { import { Content, GeminiResponse, Model } from './types.js'; import consola from 'consola'; -/* ------------------------------------------------------------------ */ -/* 1. Build the ContentGenerator exactly like the CLI does */ -/* ------------------------------------------------------------------ */ -let modelName: string; // we'll fill this once -const generatorPromise: Promise = (async () => { - // Pass undefined for model so the helper falls back to DEFAULT_GEMINI_MODEL - const cfg = await createContentGeneratorConfig( - undefined, // let helper pick default (Gemini-2.5-Pro) - AuthType.LOGIN_WITH_GOOGLE_PERSONAL, // same mode the CLI defaults to - ); - modelName = cfg.model; // remember the actual model string - return await createContentGenerator(cfg); -})(); +// ================================================================== +// 1. ContentGenerator Management +// ================================================================== -/* ------------------------------------------------------------------ */ -/* 2. Helpers consumed by server.ts */ -/* ------------------------------------------------------------------ */ +/** + * A cache for ContentGenerator instances to avoid re-creating them. + * The key is the model name, or 'default' for the default model. + */ +const generatorCache = new Map< + string, + Promise<{ + generator: ContentGenerator, + model: string, + }> +>(); + +/** + * Retrieves a ContentGenerator, creating and caching it if necessary. + * If an unsupported model is requested, it falls back to the default model. + * + * @param model - The name of the model to use. + * @returns A promise that resolves to an object containing + * the generator and the effective model name. + */ +function getGenerator( + model?: string, +): Promise<{ + generator: ContentGenerator, + model: string, +}> { + // Fallback to default if the specified model is not supported. + const modelToUse = + model === DEFAULT_GEMINI_MODEL || model === DEFAULT_GEMINI_FLASH_MODEL + ? model + : undefined; + + // Use the effective model name for the cache key. + const key = modelToUse ?? 'default'; + + if (generatorCache.has(key)) { + return generatorCache.get(key)!; + } + + // Create and cache a new generator. + const generatorPromise = (async () => { + const cfg = await createContentGeneratorConfig( + modelToUse, + AuthType.LOGIN_WITH_GOOGLE_PERSONAL, + ); + const generator = await createContentGenerator(cfg); + return { generator, model: cfg.model }; + })(); + + generatorCache.set(key, generatorPromise); + return generatorPromise; +} + +// ================================================================== +// 2. API Helpers +// ================================================================== type GenConfig = Record; const MAX_RETRIES = 3; const INITIAL_RETRY_DELAY = 1000; // 1 second +/** + * A higher-order function that adds retry logic with exponential backoff + * to an operation that may fail due to rate limiting. + * + * @param operation - The async operation to perform. + * @returns The result of the operation. + * @throws Throws an error if the operation fails after all retries. + */ async function withRetry(operation: () => Promise): Promise { let retries = 0; while (true) { try { return await operation(); } catch (error) { - // Check if it's an Error object with a message property + // Only retry on 'RESOURCE_EXHAUSTED' errors. if (!(error instanceof Error) || !error.message.includes('RESOURCE_EXHAUSTED') || retries >= MAX_RETRIES) { @@ -60,15 +114,26 @@ async function withRetry(operation: () => Promise): Promise { } } +/** + * Sends a chat request to the Gemini API. + * + * @param params - The request parameters. + * @param params.model - The model to use. + * @param params.contents - The chat history. + * @param params.generationConfig - Configuration for the generation. + * @returns The Gemini API response. + */ export async function sendChat({ + model, contents, generationConfig = {}, }: { + model?: string, contents: Content[], generationConfig?: GenConfig, - tools?: unknown, // accepted but ignored for now + tools?: unknown, // accepted but ignored for now }): Promise { - const generator = await generatorPromise; + const { generator, model: modelName } = await getGenerator(model); const gResp = await withRetry(() => generator.generateContent({ model: modelName, contents, @@ -84,15 +149,26 @@ export async function sendChat({ }; } +/** + * Sends a streaming chat request to the Gemini API. + * + * @param params - The request parameters. + * @param params.model - The model to use. + * @param params.contents - The chat history. + * @param params.generationConfig - Configuration for the generation. + * @yields Chunks of the Gemini API response. + */ export async function* sendChatStream({ + model, contents, generationConfig = {}, }: { + model?: string, contents: Content[], generationConfig?: GenConfig, tools?: unknown, }) { - const generator = await generatorPromise; + const { generator, model: modelName } = await getGenerator(model); const stream = await withRetry(() => generator.generateContentStream({ model: modelName, contents, @@ -101,6 +177,11 @@ export async function* sendChatStream({ for await (const chunk of stream) yield chunk; } +/** + * Lists the available models. + * + * @returns An array of available models. + */ export function listModels(): Model[] { return [ { @@ -116,9 +197,11 @@ export function listModels(): Model[] { ]; } -/* ------------------------------------------------------------------ */ -/* 3. Additional stubs to implement later */ -/* ------------------------------------------------------------------ */ +// ================================================================== +// 3. Future Implementations +// ================================================================== + +// The embeddings endpoint is not yet implemented. // export async function embed(_input: unknown) { // throw new Error('Embeddings endpoint not implemented yet.'); // } diff --git a/src/config.ts b/src/config.ts index 8f1e5a0..d5ef890 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,9 +1,26 @@ +/** + * @fileoverview This file manages the application's configuration, + * loading environment variables and providing them in a structured object. + */ /* eslint-disable n/no-process-env */ import dotenv from 'dotenv'; dotenv.config(); +/** + * Application configuration object. + */ export const config = { + /** + * The port number for the server to listen on. + * Defaults to 11434 if not specified in the environment. + * @type {number} + */ PORT: Number(process.env.PORT ?? 11434), + /** + * A flag to enable or disable verbose logging. + * Defaults to true if not specified in the environment. + * @type {boolean} + */ VERBOSE: Boolean(process.env.VERBOSE ?? true), }; \ No newline at end of file diff --git a/src/mapper.ts b/src/mapper.ts index 6776a2d..6450670 100644 --- a/src/mapper.ts +++ b/src/mapper.ts @@ -1,15 +1,28 @@ -/* ------------------------------------------------------------------ */ -/* mapper.ts – OpenAI ⇆ Gemini (with reasoning/1 M context) */ -/* ------------------------------------------------------------------ */ +/** + * @fileoverview This file contains the logic for mapping requests and + * responses between the OpenAI and Gemini API formats. It handles message + * conversion, vision support, and tool mapping. + */ import { fetchAndEncode } from './remoteimage'; import { z, ZodRawShape } from 'zod'; import { ToolRegistry } from '@google/gemini-cli-core/dist/src/tools/tool-registry.js'; import { Config } from '@google/gemini-cli-core/dist/src/config/config.js'; import { Tool } from '@google/gemini-cli-core/dist/src/tools/tools.js'; -import { Part, RequestBody, GeminiResponse, GeminiStreamChunk } from './types'; +import { + Part, + RequestBody, + GeminiResponse, + GeminiStreamChunk, + GeminiRequestBody, + Content, +} from './types'; -/* ----------------------------------------------------------------- */ +/** + * A placeholder for a local function call. + * + * @returns A promise that resolves to a successful execution result. + */ async function callLocalFunction(/*_name: string, _args: unknown*/) { return Promise.resolve({ ok: true, @@ -18,14 +31,22 @@ async function callLocalFunction(/*_name: string, _args: unknown*/) { }); } -/* ================================================================== */ -/* Request mapper: OpenAI ➞ Gemini */ -/* ================================================================== */ +// ================================================================== +// Request Mapper: OpenAI -> Gemini +// ================================================================== +/** + * Maps an OpenAI-compatible request body to a Gemini-compatible format. + * + * @param body - The incoming OpenAI request body. + * @returns An object containing the mapped Gemini request and tools. + */ export async function mapRequest(body: RequestBody) { - const parts: Part[] = []; + const contents: Content[] = []; + const systemParts: Part[] = []; - /* ---- convert messages & vision --------------------------------- */ + // Convert messages and handle vision content. for (const m of body.messages) { + const parts: Part[] = []; if (Array.isArray(m.content)) { for (const item of m.content) { if (item.type === 'image_url' && item.image_url) { @@ -34,39 +55,47 @@ export async function mapRequest(body: RequestBody) { parts.push({ text: item.text }); } } - } else { + } else if (m.content) { parts.push({ text: m.content }); } + + if (m.role === 'system') { + systemParts.push(...parts); + continue; + } + + if (m.role === 'user') { + contents.push({ role: 'user', parts: [...systemParts, ...parts] }); + systemParts.length = 0; + } else if (m.role === 'assistant') { + contents.push({ role: 'model', parts }); + } } - /* ---- base generationConfig ------------------------------------- */ + // Map generation configuration parameters. const generationConfig: Record = { temperature: body.temperature, maxOutputTokens: body.max_tokens, topP: body.top_p, - ...(body.generationConfig ?? {}), // copy anything ST already merged + ...(body.generationConfig ?? {}), // Preserve existing ST-merged config. }; if (body.include_reasoning === true) { - generationConfig.enable_thoughts = true; // ← current flag - generationConfig.thinking_budget ??= 2048; // optional limit + // The current flag for enabling thoughts. + generationConfig.enable_thoughts = true; + // Optional limit for thinking budget. + generationConfig.thinking_budget ??= 2048; } - /* ---- auto-enable reasoning & 1 M context ----------------------- */ + // Auto-enable reasoning and a 1 million token context window. if (body.include_reasoning === true && generationConfig.thinking !== true) { generationConfig.thinking = true; generationConfig.thinking_budget ??= 2048; } - generationConfig.maxInputTokens ??= 1_000_000; // lift context cap + generationConfig.maxInputTokens ??= 1_000_000; // Increase the context cap. - const geminiReq = { - contents: [{ role: 'user', parts }], - generationConfig, - stream: body.stream, - }; - - /* ---- Tool / function mapping ----------------------------------- */ - // Note: ToolRegistry expects a complex Config object that we don't have - // access to. Casting to `Config` is a workaround. + // Map tools and functions. + // Note: ToolRegistry expects a complex Config object that is not available + // here. Casting to `Config` is a necessary workaround. const tools = new ToolRegistry({} as Config); if (body.functions?.length) { @@ -87,13 +116,27 @@ export async function mapRequest(body: RequestBody) { } } - return { geminiReq, tools }; + return { + geminiReq: { + contents, + generationConfig, + stream: body.stream, + } as GeminiRequestBody, + tools, + }; } -/* ================================================================== */ -/* Non-stream response: Gemini ➞ OpenAI */ -/* ================================================================== */ -export function mapResponse(gResp: GeminiResponse) { +// ================================================================== +// Response Mapper: Gemini -> OpenAI (Non-Streaming) +// ================================================================== +/** + * Maps a Gemini API response to the OpenAI format for non-streaming responses. + * + * @param gResp - The response from the Gemini API. + * @param body - The original OpenAI request body. + * @returns An OpenAI-compatible chat completion object. + */ +export function mapResponse(gResp: GeminiResponse, body: RequestBody) { const usage = gResp.usageMetadata ?? { promptTokens: 0, candidatesTokens: 0, @@ -103,7 +146,7 @@ export function mapResponse(gResp: GeminiResponse) { id: `chatcmpl-${Date.now()}`, object: 'chat.completion', created: Math.floor(Date.now() / 1000), - model: 'gemini-2.5-pro-latest', + model: body.model, choices: [ { index: 0, @@ -119,16 +162,23 @@ export function mapResponse(gResp: GeminiResponse) { }; } -/* ================================================================== */ -/* Stream chunk mapper: Gemini ➞ OpenAI */ -/* ================================================================== */ +// ================================================================== +// Stream Chunk Mapper: Gemini -> OpenAI +// ================================================================== +/** + * Maps a Gemini stream chunk to the OpenAI format. + * + * @param chunk - A chunk from the Gemini API stream. + * @returns An OpenAI-compatible stream chunk. + */ export function mapStreamChunk(chunk: GeminiStreamChunk) { const part = chunk?.candidates?.[0]?.content?.parts?.[0] ?? {}; const delta: { role: 'assistant', content?: string } = { role: 'assistant' }; if (part.thought === true) { - delta.content = `${part.text ?? ''}`; // ST renders grey bubble + // Wrap thought content in tags for rendering. + delta.content = `${part.text ?? ''}`; } else if (typeof part.text === 'string') { delta.content = part.text; } diff --git a/src/remoteimage.ts b/src/remoteimage.ts index 896f0a5..440167d 100644 --- a/src/remoteimage.ts +++ b/src/remoteimage.ts @@ -1,3 +1,17 @@ +/** + * @fileoverview This file provides a utility function for fetching a remote + * image and encoding it in base64. + */ + +/** + * Fetches an image from a URL and returns + * its MIME type and base64-encoded data. + * + * @param url - The URL of the image to fetch. + * @returns A promise that resolves to an object containing the MIME type and + * base64-encoded image data. + * @throws Throws an error if the image fetch fails. + */ export async function fetchAndEncode(url: string) { const res = await fetch(url); if (!res.ok) throw new Error(`Failed to fetch image: ${url}`); diff --git a/src/server.ts b/src/server.ts index 1cb6c74..ab66426 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,3 +1,7 @@ +/** + * @fileoverview This file sets up and runs the HTTP server that acts as a + * proxy between an OpenAI-compatible client and the Gemini API. + */ import consola from 'consola'; import http from 'http'; import { listModels, sendChat, sendChatStream } from './chatwrapper'; @@ -5,11 +9,15 @@ import { mapRequest, mapResponse, mapStreamChunk } from './mapper.js'; import { RequestBody, GeminiResponse, GeminiStreamChunk, Part } from './types'; import { config } from './config'; -/* ── basic config ─────────────────────────────────────────────────── */ +// ================================================================== +// Server Configuration +// ================================================================== const PORT = config.PORT; const VERBOSE = config.VERBOSE; -/* ── Consola setup ────────────────────────────────────────────────── */ +// ================================================================== +// Logger Setup +// ================================================================== if (VERBOSE) { consola.level = 5; consola.info('Verbose logging enabled'); @@ -17,14 +25,27 @@ if (VERBOSE) { consola.info('Google CLI OpenAI proxy'); -/* ── CORS helper ──────────────────────────────────────────────────── */ +// ================================================================== +// HTTP Server Helpers +// ================================================================== + +/** + * Sets CORS headers to allow cross-origin requests. + * @param res - The HTTP server response object. + */ function allowCors(res: http.ServerResponse) { res.setHeader('Access-Control-Allow-Origin', '*'); res.setHeader('Access-Control-Allow-Headers', '*'); res.setHeader('Access-Control-Allow-Methods', 'GET,POST,OPTIONS'); } -/* ── JSON body helper ─────────────────────────────────────────────── */ +/** + * Reads and parses a JSON request body. + * @param req - The HTTP incoming message object. + * @param res - The HTTP server response object. + * @returns A promise that resolves to the parsed request body + * or null if invalid. + */ function readJSON( req: http.IncomingMessage, res: http.ServerResponse, @@ -50,7 +71,7 @@ function readJSON( try { resolve(JSON.parse(data) as RequestBody); } catch { - // malformed JSON + // Handle malformed JSON. res.writeHead(400, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: { message: 'Malformed JSON' } })); resolve(null); @@ -60,7 +81,9 @@ function readJSON( }); } -/* ── server ───────────────────────────────────────────────────────── */ +// ================================================================== +// Main Server Logic +// ================================================================== http .createServer(async (req, res) => { allowCors(res); @@ -68,13 +91,13 @@ http const pathname = url.pathname.replace(/\/$/, '') || '/'; consola.info(`${req.method} ${url.pathname}`); - /* -------- pre-flight ---------- */ + // Handle pre-flight CORS requests. if (req.method === 'OPTIONS') { res.writeHead(204).end(); return; } - /* -------- /v1/models ---------- */ + // Route for listing available models. if (pathname === '/v1/models' || pathname === '/models') { res.writeHead(200, { 'Content-Type': 'application/json' }); res.end( @@ -85,7 +108,7 @@ http return; } - /* ---- /v1/chat/completions ---- */ + // Route for chat completions. if ( (pathname === '/chat/completions' || pathname === '/v1/chat/completions') && @@ -105,32 +128,34 @@ http }); for await (const chunk of sendChatStream({ ...geminiReq, tools })) { - // Transform the chunk to match our expected type - const transformedParts = - chunk.candidates?.[0]?.content?.parts?.map(part => { - const transformedPart: Part = { - text: part.text, - thought: part.text?.startsWith?.('') ?? false, - }; - - if (part.inlineData?.data) { - transformedPart.inlineData = { - mimeType: part.inlineData.mimeType ?? 'text/plain', - data: part.inlineData.data, + // Transform the chunk to match the expected stream format. + const transformedParts = + chunk.candidates?.[0]?.content?.parts?.map((part) => { + const transformedPart: Part = { + text: part.text, + thought: part.text?.startsWith?.('') ?? false, }; - } - - return transformedPart; - }) ?? []; + + if (part.inlineData?.data) { + transformedPart.inlineData = { + mimeType: part.inlineData.mimeType ?? 'text/plain', + data: part.inlineData.data, + }; + } + + return transformedPart; + }) ?? []; const streamChunk: GeminiStreamChunk = { - candidates: [{ - content: { - parts: transformedParts, + candidates: [ + { + content: { + parts: transformedParts, + }, }, - }], + ], }; - + res.write( `data: ${JSON.stringify(mapStreamChunk(streamChunk))}\n\n`, ); @@ -139,24 +164,26 @@ http } else { const gResp: GeminiResponse = await sendChat({ ...geminiReq, tools }); res.writeHead(200, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify(mapResponse(gResp))); + res.end(JSON.stringify(mapResponse(gResp, body))); } } catch (err) { const error = err as Error; consola.error('Proxy error ➜', error); - // For streaming responses, send error in stream format + // Handle errors, sending them in the appropriate format for streaming + // or non-streaming responses. if (body.stream && res.headersSent) { - res.write(`data: ${JSON.stringify({ - error: { - message: error.message, - type: 'error', - }, - })}\n\n`); + res.write( + `data: ${JSON.stringify({ + error: { + message: error.message, + type: 'error', + }, + })}\n\n`, + ); res.end('data: [DONE]\n\n'); return; } else { - // For non-streaming responses or if headers haven't been sent yet res.writeHead(500, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: { message: error.message } })); } diff --git a/src/types.ts b/src/types.ts index f09cb64..50527cf 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,61 +1,137 @@ -/* ------------------------------------------------------------------ */ -/* types.ts - Type definitions for the application */ -/* ------------------------------------------------------------------ */ +/** + * @fileoverview This file contains type definitions for the data structures + * used throughout the application, including request and response bodies for + * both the OpenAI and Gemini APIs. + */ +/** + * Represents a model available in the API. + */ export interface Model { + /** The unique identifier for the model. */ id: string; + /** The type of object, always 'model'. */ object: 'model'; + /** The owner of the model, always 'google'. */ owned_by: 'google'; } +/** + * Represents inline data, such as an image. + */ interface InlineData { - mimeType: string; - data: string; + /** The MIME type of the data (e.g., 'image/png'). */ + mimeType: string; + /** The base64-encoded data. */ + data: string; } +/** + * Represents a part of a multi-part message. + */ export interface Part { - text?: string; - inlineData?: InlineData; - thought?: boolean; + /** The text content of the part. */ + text?: string; + /** The inline data content of the part. */ + inlineData?: InlineData; + /** A flag indicating if this part represents a thought process. */ + thought?: boolean; } +/** + * Represents a piece of content in a conversation. + */ export interface Content { - role: string; + /** + * The producer of the content. Must be either 'user' or 'model'. + * + * Useful to set for multi-turn conversations, otherwise can be empty. + * If role is not specified, SDK will determine the role. + */ + role?: 'user' | 'model'; + /** An array of parts that make up the content. */ parts: Part[]; } +/** + * Represents a function definition for tool use. + */ interface FunctionDef { + /** The name of the function. */ name: string; + /** A description of the function. */ description?: string; + /** The parameters of the function, described as a JSON schema. */ parameters?: { properties?: Record, }; } +/** + * Represents the body of an incoming OpenAI-compatible request. + */ export interface RequestBody { + /** The model to use for the request. */ + model: string; + /** A list of messages in the conversation history. */ messages: { + role: string, content: | string | { type: string, image_url?: { url: string }, text?: string }[], }[]; + /** The sampling temperature. */ temperature?: number; + /** The maximum number of tokens to generate. */ max_tokens?: number; + /** The nucleus sampling probability. */ top_p?: number; + /** Additional generation configuration for the Gemini API. */ generationConfig?: Record; + /** A flag to include reasoning/thoughts in the response. */ include_reasoning?: boolean; + /** A flag to indicate if the response should be streamed. */ stream?: boolean; + /** A list of functions the model can call. */ functions?: FunctionDef[]; } +/** + * Represents the request body for the Gemini API. + */ +export interface GeminiRequestBody { + /** The model to use. */ + model?: string; + /** The content of the conversation. */ + contents: Content[]; + /** Configuration for the generation process. */ + generationConfig: Record; + /** Whether to stream the response. */ + stream?: boolean; +} + + +/** + * Represents a non-streaming response from the Gemini API. + */ export interface GeminiResponse { + /** The generated text content. */ text: string; + /** Metadata about token usage. */ usageMetadata?: { + /** The number of tokens in the prompt. */ promptTokens: number, + /** The number of tokens in the generated candidates. */ candidatesTokens: number, + /** The total number of tokens used. */ totalTokens: number, }; } +/** + * Represents a chunk of a streaming response from the Gemini API. + */ export interface GeminiStreamChunk { + /** A list of candidate responses. */ candidates?: { content?: { parts?: Part[],