diff --git a/src/config/config.ts b/src/config/config.ts index 5ca2a7a..c5b7794 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -62,6 +62,12 @@ export function getDefaultConfig(): Config { baseUrl: "http://localhost:11434", model: "gemma3:12b", maxTokens: 3000 + }, + openrouter: { + apiKey: process.env.OPENROUTER_API_KEY, + model: "anthropic/claude-sonnet-4.5", + baseUrl: "https://openrouter.ai/api/v1", + maxTokens: 300 } }, diff --git a/src/providers/vision/index.ts b/src/providers/vision/index.ts index 9fb41a1..8ab7b07 100644 --- a/src/providers/vision/index.ts +++ b/src/providers/vision/index.ts @@ -1,4 +1,5 @@ export * from './visionProviderFactory'; export * from './openAIVisionProvider'; export * from './geminiVisionProvider'; -export * from './ollamaVisionProvider'; \ No newline at end of file +export * from './ollamaVisionProvider'; +export * from './openRouterVisionProvider'; \ No newline at end of file diff --git a/src/providers/vision/openRouterVisionProvider.ts b/src/providers/vision/openRouterVisionProvider.ts new file mode 100644 index 0000000..6c82ebd --- /dev/null +++ b/src/providers/vision/openRouterVisionProvider.ts @@ -0,0 +1,171 @@ +import fs from 'fs'; +import axios, { AxiosInstance } from 'axios'; +import { VisionProvider, VisionProviderConfig, VisionResult, BatchContext } from '../../interfaces'; + +export class OpenRouterVisionProvider implements VisionProvider { + private config: VisionProviderConfig; + private axiosInstance: AxiosInstance; + + constructor(config: VisionProviderConfig) { + this.config = config; + this.axiosInstance = axios.create({ + baseURL: config.baseUrl || 'https://openrouter.ai/api/v1', + headers: { + 'Authorization': `Bearer ${config.apiKey}`, + 'Content-Type': 'application/json', + 'HTTP-Referer': 'https://github.com/anomalyco/aidio-description', + 'X-Title': 'Aidio Description Generator' + } + }); + } + + async describeImage(imagePath: string, prompt: string): Promise { + try { + const imageData = fs.readFileSync(imagePath); + const base64Image = imageData.toString('base64'); + + const response = await this.axiosInstance.post('/chat/completions', { + model: this.config.model, + temperature: 0.1, + messages: [ + { + role: 'user', + content: [ + { type: 'text', text: prompt }, + { + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${base64Image}` + } + } + ] + } + ], + max_tokens: this.config.maxTokens || 300 + }); + + const data = response.data; + return { + description: data.choices?.[0]?.message?.content?.trim() || 'No description generated.', + usage: { + inputTokens: data.usage?.prompt_tokens || 0, + outputTokens: data.usage?.completion_tokens || 0, + totalTokens: data.usage?.total_tokens || 0 + } + }; + } catch (error: any) { + console.error('OpenRouter describeImage error:', error.response?.data || error.message); + return { + description: 'Unable to describe this image.', + usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 } + }; + } + } + + async compareImages(image1Path: string, image2Path: string, prompt: string): Promise { + try { + const image1Data = fs.readFileSync(image1Path); + const image2Data = fs.readFileSync(image2Path); + const base64Image1 = image1Data.toString('base64'); + const base64Image2 = image2Data.toString('base64'); + + const response = await this.axiosInstance.post('/chat/completions', { + model: this.config.model, + temperature: 0.1, + messages: [ + { + role: 'user', + content: [ + { type: 'text', text: prompt }, + { + type: 'image_url', + image_url: { url: `data:image/jpeg;base64,${base64Image1}` } + }, + { + type: 'image_url', + image_url: { url: `data:image/jpeg;base64,${base64Image2}` } + } + ] + } + ], + max_tokens: this.config.maxTokens || 300 + }); + + const data = response.data; + return { + description: data.choices?.[0]?.message?.content?.trim() || 'No description generated.', + usage: { + inputTokens: data.usage?.prompt_tokens || 0, + outputTokens: data.usage?.completion_tokens || 0, + totalTokens: data.usage?.total_tokens || 0 + } + }; + } catch (error: any) { + console.error('OpenRouter compareImages error:', error.response?.data || error.message); + return { + description: 'Unable to describe the differences between these images.', + usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 } + }; + } + } + + async describeBatch( + imagePaths: string[], + lastBatchContext: BatchContext, + prompt: string + ): Promise { + try { + const imagesBase64 = imagePaths.map(fp => { + const imageData = fs.readFileSync(fp); + return imageData.toString('base64'); + }); + + const messages: any[] = [ + { + role: 'user', + content: [ + { type: 'text', text: prompt } + ] + } + ]; + + if (lastBatchContext && lastBatchContext.lastDescription) { + messages.unshift({ + role: 'system', + content: `Previous batch summary: ${lastBatchContext.lastDescription}` + }); + } + + imagesBase64.forEach(base64 => { + messages[messages.length - 1].content.push({ + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${base64}` + } + }); + }); + + const response = await this.axiosInstance.post('/chat/completions', { + model: this.config.model, + messages, + max_tokens: this.config.maxTokens || 300 + }); + + const data = response.data; + return { + description: data.choices?.[0]?.message?.content?.trim() || 'No description generated.', + usage: { + inputTokens: data.usage?.prompt_tokens || 0, + outputTokens: data.usage?.completion_tokens || 0, + totalTokens: data.usage?.total_tokens || 0 + } + }; + } catch (error: any) { + console.error('OpenRouter describeBatch error:', error.response?.data || error.message); + return { + description: 'Unable to describe this batch of images.', + usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 } + }; + } + } +} diff --git a/src/providers/vision/visionProviderFactory.ts b/src/providers/vision/visionProviderFactory.ts index 1d3677a..11ee706 100644 --- a/src/providers/vision/visionProviderFactory.ts +++ b/src/providers/vision/visionProviderFactory.ts @@ -3,6 +3,7 @@ import { Config } from '../../config/config'; import { OpenAIVisionProvider } from './openAIVisionProvider'; import { GeminiVisionProvider } from './geminiVisionProvider'; import { OllamaVisionProvider } from './ollamaVisionProvider'; +import { OpenRouterVisionProvider } from './openRouterVisionProvider'; /** * Factory for creating vision AI providers @@ -23,6 +24,8 @@ export class VisionProviderFactory { return new GeminiVisionProvider(providerConfig); case "ollama": return new OllamaVisionProvider(providerConfig); + case 'openrouter': + return new OpenRouterVisionProvider(providerConfig); // Add other providers here default: throw new Error(`Vision provider "${providerName}" not implemented.`);