"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.GeminiVisionProvider = void 0; const fs_1 = __importDefault(require("fs")); const generative_ai_1 = require("@google/generative-ai"); /** * Google Gemini Vision Provider Implementation */ class GeminiVisionProvider { constructor(config) { this.config = config; this.genAI = new generative_ai_1.GoogleGenerativeAI(config.apiKey); this.model = this.genAI.getGenerativeModel({ model: config.model }); } /** * Describe a single image * @param imagePath - Path to the image file * @param prompt - Prompt for the AI * @returns Description and usage stats */ async describeImage(imagePath, prompt) { try { const imageData = fs_1.default.readFileSync(imagePath); const mimeType = 'image/jpeg'; // Assuming JPEG, could be detected based on file extension // Create a file part for the image const imagePart = { inlineData: { data: imageData.toString('base64'), mimeType } }; // Generate content using Gemini const result = await this.model.generateContent([prompt, imagePart]); const response = await result.response; const text = response.text(); // Gemini doesn't provide token usage information in the same way as OpenAI // We'll estimate based on prompt length and response length const inputTokens = Math.ceil(prompt.length / 4) + 1000; // rough estimate for image const outputTokens = Math.ceil(text.length / 4); return { description: text, usage: { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens } }; } catch (error) { console.error("Error describing image with Gemini:", error); return { description: "Unable to describe this image.", usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 } }; } } /** * Compare two images and describe the differences * @param image1Path - Path to the first image * @param image2Path - Path to the second image * @param prompt - Prompt for the AI * @returns Description and usage stats */ async compareImages(image1Path, image2Path, prompt) { try { const image1Data = fs_1.default.readFileSync(image1Path); const image2Data = fs_1.default.readFileSync(image2Path); const mimeType = 'image/jpeg'; // Assuming JPEG, could be detected based on file extension // Create file parts for both images const image1Part = { inlineData: { data: image1Data.toString('base64'), mimeType } }; const image2Part = { inlineData: { data: image2Data.toString('base64'), mimeType } }; // Generate content using Gemini with both images const result = await this.model.generateContent([prompt, image1Part, image2Part]); const response = await result.response; const text = response.text(); // Estimate token usage const inputTokens = Math.ceil(prompt.length / 4) + 2000; // rough estimate for two images const outputTokens = Math.ceil(text.length / 4); return { description: text, usage: { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens } }; } catch (error) { console.error("Error comparing images with Gemini:", error); return { description: "Unable to describe the differences between these images.", usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 } }; } } /** * Describe a batch of images * @param imagePaths - Array of paths to the images * @param lastBatchContext - Context from the previous batch * @param prompt - Prompt for the AI * @returns Description and usage stats */ async describeBatch(imagePaths, lastBatchContext, prompt) { try { // Create a prompt that includes context from the last batch if available let contextualPrompt = prompt; if (lastBatchContext && lastBatchContext.lastDescription) { contextualPrompt = `Previous batch summary: ${lastBatchContext.lastDescription}\n\n${prompt}`; } // Create content parts array starting with the prompt const contentParts = [contextualPrompt]; // Add all images to the content parts for (const imagePath of imagePaths) { const imageData = fs_1.default.readFileSync(imagePath); const mimeType = 'image/jpeg'; // Assuming JPEG, could be detected based on file extension contentParts.push({ inlineData: { data: imageData.toString('base64'), mimeType } }); } // Generate content using Gemini with all images const result = await this.model.generateContent(contentParts); const response = await result.response; const text = response.text(); // Estimate token usage const inputTokens = Math.ceil(contextualPrompt.length / 4) + (1000 * imagePaths.length); // rough estimate const outputTokens = Math.ceil(text.length / 4); return { description: text, usage: { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens } }; } catch (error) { console.error("Error describing batch of images with Gemini:", error); return { description: "Unable to describe this batch of images.", usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 } }; } } } exports.GeminiVisionProvider = GeminiVisionProvider; //# sourceMappingURL=geminiVisionProvider.js.map