Add OpenRouter vision provider for multi-model access via unified API

This commit is contained in:
2026-05-13 02:40:03 +02:00
parent eb15af3a36
commit 6e9a26557f
4 changed files with 182 additions and 1 deletions

View File

@@ -62,6 +62,12 @@ export function getDefaultConfig(): Config {
baseUrl: "http://localhost:11434",
model: "gemma3:12b",
maxTokens: 3000
},
openrouter: {
apiKey: process.env.OPENROUTER_API_KEY,
model: "anthropic/claude-sonnet-4.5",
baseUrl: "https://openrouter.ai/api/v1",
maxTokens: 300
}
},

View File

@@ -2,3 +2,4 @@ export * from './visionProviderFactory';
export * from './openAIVisionProvider';
export * from './geminiVisionProvider';
export * from './ollamaVisionProvider';
export * from './openRouterVisionProvider';

View File

@@ -0,0 +1,171 @@
import fs from 'fs';
import axios, { AxiosInstance } from 'axios';
import { VisionProvider, VisionProviderConfig, VisionResult, BatchContext } from '../../interfaces';
export class OpenRouterVisionProvider implements VisionProvider {
private config: VisionProviderConfig;
private axiosInstance: AxiosInstance;
constructor(config: VisionProviderConfig) {
this.config = config;
this.axiosInstance = axios.create({
baseURL: config.baseUrl || 'https://openrouter.ai/api/v1',
headers: {
'Authorization': `Bearer ${config.apiKey}`,
'Content-Type': 'application/json',
'HTTP-Referer': 'https://github.com/anomalyco/aidio-description',
'X-Title': 'Aidio Description Generator'
}
});
}
async describeImage(imagePath: string, prompt: string): Promise<VisionResult> {
try {
const imageData = fs.readFileSync(imagePath);
const base64Image = imageData.toString('base64');
const response = await this.axiosInstance.post('/chat/completions', {
model: this.config.model,
temperature: 0.1,
messages: [
{
role: 'user',
content: [
{ type: 'text', text: prompt },
{
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${base64Image}`
}
}
]
}
],
max_tokens: this.config.maxTokens || 300
});
const data = response.data;
return {
description: data.choices?.[0]?.message?.content?.trim() || 'No description generated.',
usage: {
inputTokens: data.usage?.prompt_tokens || 0,
outputTokens: data.usage?.completion_tokens || 0,
totalTokens: data.usage?.total_tokens || 0
}
};
} catch (error: any) {
console.error('OpenRouter describeImage error:', error.response?.data || error.message);
return {
description: 'Unable to describe this image.',
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
};
}
}
async compareImages(image1Path: string, image2Path: string, prompt: string): Promise<VisionResult> {
try {
const image1Data = fs.readFileSync(image1Path);
const image2Data = fs.readFileSync(image2Path);
const base64Image1 = image1Data.toString('base64');
const base64Image2 = image2Data.toString('base64');
const response = await this.axiosInstance.post('/chat/completions', {
model: this.config.model,
temperature: 0.1,
messages: [
{
role: 'user',
content: [
{ type: 'text', text: prompt },
{
type: 'image_url',
image_url: { url: `data:image/jpeg;base64,${base64Image1}` }
},
{
type: 'image_url',
image_url: { url: `data:image/jpeg;base64,${base64Image2}` }
}
]
}
],
max_tokens: this.config.maxTokens || 300
});
const data = response.data;
return {
description: data.choices?.[0]?.message?.content?.trim() || 'No description generated.',
usage: {
inputTokens: data.usage?.prompt_tokens || 0,
outputTokens: data.usage?.completion_tokens || 0,
totalTokens: data.usage?.total_tokens || 0
}
};
} catch (error: any) {
console.error('OpenRouter compareImages error:', error.response?.data || error.message);
return {
description: 'Unable to describe the differences between these images.',
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
};
}
}
async describeBatch(
imagePaths: string[],
lastBatchContext: BatchContext,
prompt: string
): Promise<VisionResult> {
try {
const imagesBase64 = imagePaths.map(fp => {
const imageData = fs.readFileSync(fp);
return imageData.toString('base64');
});
const messages: any[] = [
{
role: 'user',
content: [
{ type: 'text', text: prompt }
]
}
];
if (lastBatchContext && lastBatchContext.lastDescription) {
messages.unshift({
role: 'system',
content: `Previous batch summary: ${lastBatchContext.lastDescription}`
});
}
imagesBase64.forEach(base64 => {
messages[messages.length - 1].content.push({
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${base64}`
}
});
});
const response = await this.axiosInstance.post('/chat/completions', {
model: this.config.model,
messages,
max_tokens: this.config.maxTokens || 300
});
const data = response.data;
return {
description: data.choices?.[0]?.message?.content?.trim() || 'No description generated.',
usage: {
inputTokens: data.usage?.prompt_tokens || 0,
outputTokens: data.usage?.completion_tokens || 0,
totalTokens: data.usage?.total_tokens || 0
}
};
} catch (error: any) {
console.error('OpenRouter describeBatch error:', error.response?.data || error.message);
return {
description: 'Unable to describe this batch of images.',
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
};
}
}
}

View File

@@ -3,6 +3,7 @@ import { Config } from '../../config/config';
import { OpenAIVisionProvider } from './openAIVisionProvider';
import { GeminiVisionProvider } from './geminiVisionProvider';
import { OllamaVisionProvider } from './ollamaVisionProvider';
import { OpenRouterVisionProvider } from './openRouterVisionProvider';
/**
* Factory for creating vision AI providers
@@ -23,6 +24,8 @@ export class VisionProviderFactory {
return new GeminiVisionProvider(providerConfig);
case "ollama":
return new OllamaVisionProvider(providerConfig);
case 'openrouter':
return new OpenRouterVisionProvider(providerConfig);
// Add other providers here
default:
throw new Error(`Vision provider "${providerName}" not implemented.`);