mirror of https://github.com/TriliumNext/Notes
dynamically adjust context window sizes based on conversation context
parent
29845c343c
commit
f2cb013e14
@ -0,0 +1,138 @@
|
||||
import type { Message } from '../ai_interface.js';
|
||||
|
||||
/**
|
||||
* Interface for model capabilities information
|
||||
*/
|
||||
export interface ModelCapabilities {
|
||||
contextWindowTokens: number; // Context window size in tokens
|
||||
contextWindowChars: number; // Estimated context window size in characters (for planning)
|
||||
maxCompletionTokens: number; // Maximum completion length
|
||||
hasFunctionCalling: boolean; // Whether the model supports function calling
|
||||
hasVision: boolean; // Whether the model supports image input
|
||||
costPerInputToken: number; // Cost per input token (if applicable)
|
||||
costPerOutputToken: number; // Cost per output token (if applicable)
|
||||
}
|
||||
|
||||
/**
|
||||
* Default model capabilities for unknown models
|
||||
*/
|
||||
export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = {
|
||||
contextWindowTokens: 4096,
|
||||
contextWindowChars: 16000, // ~4 chars per token estimate
|
||||
maxCompletionTokens: 1024,
|
||||
hasFunctionCalling: false,
|
||||
hasVision: false,
|
||||
costPerInputToken: 0,
|
||||
costPerOutputToken: 0
|
||||
};
|
||||
|
||||
/**
|
||||
* Model capabilities for common models
|
||||
*/
|
||||
export const MODEL_CAPABILITIES: Record<string, Partial<ModelCapabilities>> = {
|
||||
// OpenAI models
|
||||
'gpt-3.5-turbo': {
|
||||
contextWindowTokens: 4096,
|
||||
contextWindowChars: 16000,
|
||||
hasFunctionCalling: true
|
||||
},
|
||||
'gpt-3.5-turbo-16k': {
|
||||
contextWindowTokens: 16384,
|
||||
contextWindowChars: 65000,
|
||||
hasFunctionCalling: true
|
||||
},
|
||||
'gpt-4': {
|
||||
contextWindowTokens: 8192,
|
||||
contextWindowChars: 32000,
|
||||
hasFunctionCalling: true
|
||||
},
|
||||
'gpt-4-32k': {
|
||||
contextWindowTokens: 32768,
|
||||
contextWindowChars: 130000,
|
||||
hasFunctionCalling: true
|
||||
},
|
||||
'gpt-4-turbo': {
|
||||
contextWindowTokens: 128000,
|
||||
contextWindowChars: 512000,
|
||||
hasFunctionCalling: true,
|
||||
hasVision: true
|
||||
},
|
||||
'gpt-4o': {
|
||||
contextWindowTokens: 128000,
|
||||
contextWindowChars: 512000,
|
||||
hasFunctionCalling: true,
|
||||
hasVision: true
|
||||
},
|
||||
|
||||
// Anthropic models
|
||||
'claude-3-haiku': {
|
||||
contextWindowTokens: 200000,
|
||||
contextWindowChars: 800000,
|
||||
hasVision: true
|
||||
},
|
||||
'claude-3-sonnet': {
|
||||
contextWindowTokens: 200000,
|
||||
contextWindowChars: 800000,
|
||||
hasVision: true
|
||||
},
|
||||
'claude-3-opus': {
|
||||
contextWindowTokens: 200000,
|
||||
contextWindowChars: 800000,
|
||||
hasVision: true
|
||||
},
|
||||
'claude-2': {
|
||||
contextWindowTokens: 100000,
|
||||
contextWindowChars: 400000
|
||||
},
|
||||
|
||||
// Ollama models (defaults, will be updated dynamically)
|
||||
'llama3': {
|
||||
contextWindowTokens: 8192,
|
||||
contextWindowChars: 32000
|
||||
},
|
||||
'mistral': {
|
||||
contextWindowTokens: 8192,
|
||||
contextWindowChars: 32000
|
||||
},
|
||||
'llama2': {
|
||||
contextWindowTokens: 4096,
|
||||
contextWindowChars: 16000
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Calculate available context window size for context generation
|
||||
* This takes into account expected message sizes and other overhead
|
||||
*
|
||||
* @param model Model name
|
||||
* @param messages Current conversation messages
|
||||
* @param expectedTurns Number of expected additional conversation turns
|
||||
* @returns Available context size in characters
|
||||
*/
|
||||
export function calculateAvailableContextSize(
|
||||
modelCapabilities: ModelCapabilities,
|
||||
messages: Message[],
|
||||
expectedTurns: number = 3
|
||||
): number {
|
||||
// Calculate current message token usage (rough estimate)
|
||||
let currentMessageChars = 0;
|
||||
for (const message of messages) {
|
||||
currentMessageChars += message.content.length;
|
||||
}
|
||||
|
||||
// Reserve space for system prompt and overhead
|
||||
const systemPromptReserve = 1000;
|
||||
|
||||
// Reserve space for expected conversation turns
|
||||
const turnReserve = expectedTurns * 2000; // Average 2000 chars per turn (including both user and assistant)
|
||||
|
||||
// Calculate available space
|
||||
const totalReserved = currentMessageChars + systemPromptReserve + turnReserve;
|
||||
const availableContextSize = Math.max(0, modelCapabilities.contextWindowChars - totalReserved);
|
||||
|
||||
// Use at most 70% of total context window size to be safe
|
||||
const maxSafeContextSize = Math.floor(modelCapabilities.contextWindowChars * 0.7);
|
||||
|
||||
// Return the smaller of available size or max safe size
|
||||
return Math.min(availableContextSize, maxSafeContextSize);
|
||||
}
|
||||
@ -0,0 +1,159 @@
|
||||
import log from '../log.js';
|
||||
import type { ModelCapabilities } from './interfaces/model_capabilities.js';
|
||||
import { MODEL_CAPABILITIES, DEFAULT_MODEL_CAPABILITIES } from './interfaces/model_capabilities.js';
|
||||
import aiServiceManager from './ai_service_manager.js';
|
||||
import { getEmbeddingProvider } from './providers/providers.js';
|
||||
import type { BaseEmbeddingProvider } from './embeddings/base_embeddings.js';
|
||||
import type { EmbeddingModelInfo } from './interfaces/embedding_interfaces.js';
|
||||
|
||||
// Define a type for embedding providers that might have the getModelInfo method
|
||||
interface EmbeddingProviderWithModelInfo {
|
||||
getModelInfo?: (modelName: string) => Promise<EmbeddingModelInfo>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Service for fetching and caching model capabilities
|
||||
*/
|
||||
export class ModelCapabilitiesService {
|
||||
// Cache model capabilities
|
||||
private capabilitiesCache: Map<string, ModelCapabilities> = new Map();
|
||||
|
||||
constructor() {
|
||||
// Initialize cache with known models
|
||||
this.initializeCache();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the cache with known model capabilities
|
||||
*/
|
||||
private initializeCache() {
|
||||
// Add all predefined model capabilities to cache
|
||||
for (const [model, capabilities] of Object.entries(MODEL_CAPABILITIES)) {
|
||||
this.capabilitiesCache.set(model, {
|
||||
...DEFAULT_MODEL_CAPABILITIES,
|
||||
...capabilities
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get model capabilities, fetching from provider if needed
|
||||
*
|
||||
* @param modelName Full model name (with or without provider prefix)
|
||||
* @returns Model capabilities
|
||||
*/
|
||||
async getModelCapabilities(modelName: string): Promise<ModelCapabilities> {
|
||||
// Handle provider-prefixed model names (e.g., "openai:gpt-4")
|
||||
let provider = 'default';
|
||||
let baseModelName = modelName;
|
||||
|
||||
if (modelName.includes(':')) {
|
||||
const parts = modelName.split(':');
|
||||
provider = parts[0];
|
||||
baseModelName = parts[1];
|
||||
}
|
||||
|
||||
// Check cache first
|
||||
const cacheKey = baseModelName;
|
||||
if (this.capabilitiesCache.has(cacheKey)) {
|
||||
return this.capabilitiesCache.get(cacheKey)!;
|
||||
}
|
||||
|
||||
// Fetch from provider if possible
|
||||
try {
|
||||
// Get provider service
|
||||
const providerService = aiServiceManager.getService(provider);
|
||||
|
||||
if (providerService && typeof (providerService as any).getModelCapabilities === 'function') {
|
||||
// If provider supports direct capability fetching, use it
|
||||
const capabilities = await (providerService as any).getModelCapabilities(baseModelName);
|
||||
|
||||
if (capabilities) {
|
||||
// Merge with defaults and cache
|
||||
const fullCapabilities = {
|
||||
...DEFAULT_MODEL_CAPABILITIES,
|
||||
...capabilities
|
||||
};
|
||||
|
||||
this.capabilitiesCache.set(cacheKey, fullCapabilities);
|
||||
log.info(`Fetched capabilities for ${modelName}: context window ${fullCapabilities.contextWindowTokens} tokens`);
|
||||
|
||||
return fullCapabilities;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to fetch from embedding provider if available
|
||||
const embeddingProvider = getEmbeddingProvider(provider);
|
||||
|
||||
if (embeddingProvider) {
|
||||
try {
|
||||
// Cast to a type that might have getModelInfo method
|
||||
const providerWithModelInfo = embeddingProvider as unknown as EmbeddingProviderWithModelInfo;
|
||||
|
||||
if (providerWithModelInfo.getModelInfo) {
|
||||
const modelInfo = await providerWithModelInfo.getModelInfo(baseModelName);
|
||||
|
||||
if (modelInfo && modelInfo.contextWidth) {
|
||||
// Convert to our capabilities format
|
||||
const capabilities: ModelCapabilities = {
|
||||
...DEFAULT_MODEL_CAPABILITIES,
|
||||
contextWindowTokens: modelInfo.contextWidth,
|
||||
contextWindowChars: modelInfo.contextWidth * 4 // Rough estimate: 4 chars per token
|
||||
};
|
||||
|
||||
this.capabilitiesCache.set(cacheKey, capabilities);
|
||||
log.info(`Derived capabilities for ${modelName} from embedding provider: context window ${capabilities.contextWindowTokens} tokens`);
|
||||
|
||||
return capabilities;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
log.info(`Could not get model info from embedding provider for ${modelName}: ${error}`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
log.error(`Error fetching model capabilities for ${modelName}: ${error}`);
|
||||
}
|
||||
|
||||
// If we get here, try to find a similar model in our predefined list
|
||||
for (const knownModel of Object.keys(MODEL_CAPABILITIES)) {
|
||||
// Check if the model name contains this known model (e.g., "gpt-4-1106-preview" contains "gpt-4")
|
||||
if (baseModelName.includes(knownModel)) {
|
||||
const capabilities = {
|
||||
...DEFAULT_MODEL_CAPABILITIES,
|
||||
...MODEL_CAPABILITIES[knownModel]
|
||||
};
|
||||
|
||||
this.capabilitiesCache.set(cacheKey, capabilities);
|
||||
log.info(`Using similar model (${knownModel}) capabilities for ${modelName}`);
|
||||
|
||||
return capabilities;
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to defaults if nothing else works
|
||||
log.info(`Using default capabilities for unknown model ${modelName}`);
|
||||
this.capabilitiesCache.set(cacheKey, DEFAULT_MODEL_CAPABILITIES);
|
||||
|
||||
return DEFAULT_MODEL_CAPABILITIES;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update model capabilities in the cache
|
||||
*
|
||||
* @param modelName Model name
|
||||
* @param capabilities Capabilities to update
|
||||
*/
|
||||
updateModelCapabilities(modelName: string, capabilities: Partial<ModelCapabilities>) {
|
||||
const currentCapabilities = this.capabilitiesCache.get(modelName) || DEFAULT_MODEL_CAPABILITIES;
|
||||
|
||||
this.capabilitiesCache.set(modelName, {
|
||||
...currentCapabilities,
|
||||
...capabilities
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Create and export singleton instance
|
||||
const modelCapabilitiesService = new ModelCapabilitiesService();
|
||||
export default modelCapabilitiesService;
|
||||
Loading…
Reference in New Issue