141 lines
5.4 KiB
JavaScript
141 lines
5.4 KiB
JavaScript
|
|
"use strict";
|
||
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||
|
|
};
|
||
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||
|
|
exports.OllamaVisionProvider = void 0;
|
||
|
|
const fs_1 = __importDefault(require("fs"));
|
||
|
|
const axios_1 = __importDefault(require("axios"));
|
||
|
|
/**
|
||
|
|
* Ollama Vision Provider Implementation
|
||
|
|
* See: https://github.com/ollama/ollama/blob/main/docs/api.md
|
||
|
|
*/
|
||
|
|
class OllamaVisionProvider {
|
||
|
|
constructor(config) {
|
||
|
|
this.config = config;
|
||
|
|
this.axiosInstance = axios_1.default.create({
|
||
|
|
baseURL: config.baseUrl || "http://localhost:11434",
|
||
|
|
headers: { "Content-Type": "application/json" }
|
||
|
|
});
|
||
|
|
}
|
||
|
|
/**
|
||
|
|
* Describe a single image
|
||
|
|
* @param imagePath - Path to the image file
|
||
|
|
* @param prompt - Prompt for the AI
|
||
|
|
* @returns Description and usage stats
|
||
|
|
*/
|
||
|
|
async describeImage(imagePath, prompt) {
|
||
|
|
try {
|
||
|
|
const imageData = fs_1.default.readFileSync(imagePath);
|
||
|
|
const base64Image = imageData.toString('base64');
|
||
|
|
const response = await this.axiosInstance.post('/api/generate', {
|
||
|
|
model: this.config.model,
|
||
|
|
prompt: prompt,
|
||
|
|
images: [base64Image],
|
||
|
|
stream: false,
|
||
|
|
options: {
|
||
|
|
max_tokens: this.config.maxTokens || 300,
|
||
|
|
temperature: 0.1
|
||
|
|
}
|
||
|
|
});
|
||
|
|
const combinedText = response.data.response || "";
|
||
|
|
return {
|
||
|
|
description: combinedText.trim(),
|
||
|
|
usage: {
|
||
|
|
inputTokens: 0,
|
||
|
|
outputTokens: 0,
|
||
|
|
totalTokens: 0
|
||
|
|
}
|
||
|
|
};
|
||
|
|
}
|
||
|
|
catch (error) {
|
||
|
|
console.error("Ollama describeImage error:", error);
|
||
|
|
return {
|
||
|
|
description: "Unable to describe this image.",
|
||
|
|
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
/**
|
||
|
|
* Compare two images and describe differences
|
||
|
|
* @param image1Path - Path to the first image
|
||
|
|
* @param image2Path - Path to the second image
|
||
|
|
* @param prompt - Prompt for the AI
|
||
|
|
* @returns Description and usage stats
|
||
|
|
*/
|
||
|
|
async compareImages(image1Path, image2Path, prompt) {
|
||
|
|
try {
|
||
|
|
const image1Data = fs_1.default.readFileSync(image1Path).toString('base64');
|
||
|
|
const image2Data = fs_1.default.readFileSync(image2Path).toString('base64');
|
||
|
|
const response = await this.axiosInstance.post('/api/generate', {
|
||
|
|
model: this.config.model,
|
||
|
|
prompt: prompt,
|
||
|
|
images: [image1Data, image2Data],
|
||
|
|
stream: false,
|
||
|
|
options: {
|
||
|
|
max_tokens: this.config.maxTokens || 300,
|
||
|
|
temperature: 0.2
|
||
|
|
}
|
||
|
|
});
|
||
|
|
const combinedText = response.data.response || "";
|
||
|
|
return {
|
||
|
|
description: combinedText.trim(),
|
||
|
|
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
||
|
|
};
|
||
|
|
}
|
||
|
|
catch (error) {
|
||
|
|
console.error("Ollama compareImages error:", error);
|
||
|
|
return {
|
||
|
|
description: "Unable to describe the differences.",
|
||
|
|
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
/**
|
||
|
|
* Describe a batch of images
|
||
|
|
* @param imagePaths - Array of paths to the images
|
||
|
|
* @param lastBatchContext - Context from the previous batch (optional)
|
||
|
|
* @param prompt - Prompt for the AI
|
||
|
|
* @returns Description and usage stats
|
||
|
|
*/
|
||
|
|
async describeBatch(imagePaths, lastBatchContext, prompt) {
|
||
|
|
try {
|
||
|
|
let userPrompt = prompt;
|
||
|
|
// If there's context, prepend it. This helps maintain a storyline across batches.
|
||
|
|
if (lastBatchContext && lastBatchContext.lastDescription) {
|
||
|
|
userPrompt = `Previous batch summary: ${lastBatchContext.lastDescription}\n\n${prompt}`;
|
||
|
|
}
|
||
|
|
// Convert images to base64
|
||
|
|
const imagesBase64 = imagePaths.map(fp => {
|
||
|
|
const imageData = fs_1.default.readFileSync(fp);
|
||
|
|
return imageData.toString('base64');
|
||
|
|
});
|
||
|
|
const response = await this.axiosInstance.post('/api/generate', {
|
||
|
|
model: this.config.model,
|
||
|
|
prompt: userPrompt,
|
||
|
|
images: imagesBase64,
|
||
|
|
stream: false,
|
||
|
|
options: {
|
||
|
|
max_tokens: this.config.maxTokens || 300,
|
||
|
|
temperature: 0.2
|
||
|
|
}
|
||
|
|
}, {
|
||
|
|
timeout: 120000 // Timeout in milliseconds, e.g., 5000 ms = 5 seconds
|
||
|
|
});
|
||
|
|
const combinedText = response.data.response || "";
|
||
|
|
return {
|
||
|
|
description: combinedText.trim(),
|
||
|
|
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
||
|
|
};
|
||
|
|
}
|
||
|
|
catch (error) {
|
||
|
|
console.error("Ollama describeBatch error:", error);
|
||
|
|
return {
|
||
|
|
description: "Unable to describe this batch of images.",
|
||
|
|
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
exports.OllamaVisionProvider = OllamaVisionProvider;
|
||
|
|
//# sourceMappingURL=ollamaVisionProvider.js.map
|