162 lines
6.6 KiB
JavaScript
162 lines
6.6 KiB
JavaScript
|
|
"use strict";
|
||
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||
|
|
};
|
||
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||
|
|
exports.GeminiVisionProvider = void 0;
|
||
|
|
const fs_1 = __importDefault(require("fs"));
|
||
|
|
const generative_ai_1 = require("@google/generative-ai");
|
||
|
|
/**
|
||
|
|
* Google Gemini Vision Provider Implementation
|
||
|
|
*/
|
||
|
|
class GeminiVisionProvider {
|
||
|
|
constructor(config) {
|
||
|
|
this.config = config;
|
||
|
|
this.genAI = new generative_ai_1.GoogleGenerativeAI(config.apiKey);
|
||
|
|
this.model = this.genAI.getGenerativeModel({ model: config.model });
|
||
|
|
}
|
||
|
|
/**
|
||
|
|
* Describe a single image
|
||
|
|
* @param imagePath - Path to the image file
|
||
|
|
* @param prompt - Prompt for the AI
|
||
|
|
* @returns Description and usage stats
|
||
|
|
*/
|
||
|
|
async describeImage(imagePath, prompt) {
|
||
|
|
try {
|
||
|
|
const imageData = fs_1.default.readFileSync(imagePath);
|
||
|
|
const mimeType = 'image/jpeg'; // Assuming JPEG, could be detected based on file extension
|
||
|
|
// Create a file part for the image
|
||
|
|
const imagePart = {
|
||
|
|
inlineData: {
|
||
|
|
data: imageData.toString('base64'),
|
||
|
|
mimeType
|
||
|
|
}
|
||
|
|
};
|
||
|
|
// Generate content using Gemini
|
||
|
|
const result = await this.model.generateContent([prompt, imagePart]);
|
||
|
|
const response = await result.response;
|
||
|
|
const text = response.text();
|
||
|
|
// Gemini doesn't provide token usage information in the same way as OpenAI
|
||
|
|
// We'll estimate based on prompt length and response length
|
||
|
|
const inputTokens = Math.ceil(prompt.length / 4) + 1000; // rough estimate for image
|
||
|
|
const outputTokens = Math.ceil(text.length / 4);
|
||
|
|
return {
|
||
|
|
description: text,
|
||
|
|
usage: {
|
||
|
|
inputTokens,
|
||
|
|
outputTokens,
|
||
|
|
totalTokens: inputTokens + outputTokens
|
||
|
|
}
|
||
|
|
};
|
||
|
|
}
|
||
|
|
catch (error) {
|
||
|
|
console.error("Error describing image with Gemini:", error);
|
||
|
|
return {
|
||
|
|
description: "Unable to describe this image.",
|
||
|
|
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
/**
|
||
|
|
* Compare two images and describe the differences
|
||
|
|
* @param image1Path - Path to the first image
|
||
|
|
* @param image2Path - Path to the second image
|
||
|
|
* @param prompt - Prompt for the AI
|
||
|
|
* @returns Description and usage stats
|
||
|
|
*/
|
||
|
|
async compareImages(image1Path, image2Path, prompt) {
|
||
|
|
try {
|
||
|
|
const image1Data = fs_1.default.readFileSync(image1Path);
|
||
|
|
const image2Data = fs_1.default.readFileSync(image2Path);
|
||
|
|
const mimeType = 'image/jpeg'; // Assuming JPEG, could be detected based on file extension
|
||
|
|
// Create file parts for both images
|
||
|
|
const image1Part = {
|
||
|
|
inlineData: {
|
||
|
|
data: image1Data.toString('base64'),
|
||
|
|
mimeType
|
||
|
|
}
|
||
|
|
};
|
||
|
|
const image2Part = {
|
||
|
|
inlineData: {
|
||
|
|
data: image2Data.toString('base64'),
|
||
|
|
mimeType
|
||
|
|
}
|
||
|
|
};
|
||
|
|
// Generate content using Gemini with both images
|
||
|
|
const result = await this.model.generateContent([prompt, image1Part, image2Part]);
|
||
|
|
const response = await result.response;
|
||
|
|
const text = response.text();
|
||
|
|
// Estimate token usage
|
||
|
|
const inputTokens = Math.ceil(prompt.length / 4) + 2000; // rough estimate for two images
|
||
|
|
const outputTokens = Math.ceil(text.length / 4);
|
||
|
|
return {
|
||
|
|
description: text,
|
||
|
|
usage: {
|
||
|
|
inputTokens,
|
||
|
|
outputTokens,
|
||
|
|
totalTokens: inputTokens + outputTokens
|
||
|
|
}
|
||
|
|
};
|
||
|
|
}
|
||
|
|
catch (error) {
|
||
|
|
console.error("Error comparing images with Gemini:", error);
|
||
|
|
return {
|
||
|
|
description: "Unable to describe the differences between these images.",
|
||
|
|
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
/**
|
||
|
|
* Describe a batch of images
|
||
|
|
* @param imagePaths - Array of paths to the images
|
||
|
|
* @param lastBatchContext - Context from the previous batch
|
||
|
|
* @param prompt - Prompt for the AI
|
||
|
|
* @returns Description and usage stats
|
||
|
|
*/
|
||
|
|
async describeBatch(imagePaths, lastBatchContext, prompt) {
|
||
|
|
try {
|
||
|
|
// Create a prompt that includes context from the last batch if available
|
||
|
|
let contextualPrompt = prompt;
|
||
|
|
if (lastBatchContext && lastBatchContext.lastDescription) {
|
||
|
|
contextualPrompt = `Previous batch summary: ${lastBatchContext.lastDescription}\n\n${prompt}`;
|
||
|
|
}
|
||
|
|
// Create content parts array starting with the prompt
|
||
|
|
const contentParts = [contextualPrompt];
|
||
|
|
// Add all images to the content parts
|
||
|
|
for (const imagePath of imagePaths) {
|
||
|
|
const imageData = fs_1.default.readFileSync(imagePath);
|
||
|
|
const mimeType = 'image/jpeg'; // Assuming JPEG, could be detected based on file extension
|
||
|
|
contentParts.push({
|
||
|
|
inlineData: {
|
||
|
|
data: imageData.toString('base64'),
|
||
|
|
mimeType
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
// Generate content using Gemini with all images
|
||
|
|
const result = await this.model.generateContent(contentParts);
|
||
|
|
const response = await result.response;
|
||
|
|
const text = response.text();
|
||
|
|
// Estimate token usage
|
||
|
|
const inputTokens = Math.ceil(contextualPrompt.length / 4) + (1000 * imagePaths.length); // rough estimate
|
||
|
|
const outputTokens = Math.ceil(text.length / 4);
|
||
|
|
return {
|
||
|
|
description: text,
|
||
|
|
usage: {
|
||
|
|
inputTokens,
|
||
|
|
outputTokens,
|
||
|
|
totalTokens: inputTokens + outputTokens
|
||
|
|
}
|
||
|
|
};
|
||
|
|
}
|
||
|
|
catch (error) {
|
||
|
|
console.error("Error describing batch of images with Gemini:", error);
|
||
|
|
return {
|
||
|
|
description: "Unable to describe this batch of images.",
|
||
|
|
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
exports.GeminiVisionProvider = GeminiVisionProvider;
|
||
|
|
//# sourceMappingURL=geminiVisionProvider.js.map
|