3848dd82d9
Add explicit chat capability tracking, expose formatted runtime capabilities in the info command, and support a max context size option for user AI settings. Also update Ollama base URL resolution to use OLLAMA_ADDRESS and simplify provider chat command execution.
336 lines
15 KiB
TypeScript
336 lines
15 KiB
TypeScript
import {AiProvider} from "../model/ai-provider";
|
|
import {AiModelCapabilities} from "../model/ai-model-capabilities";
|
|
import {Environment} from "../common/environment";
|
|
import {logError} from "../util/utils";
|
|
import {AiCapabilityInfo} from "../model/ai-capability-info";
|
|
import {isOllamaSpeechToTextModel} from "./speech-to-text-models";
|
|
import {
|
|
AiCapabilityName,
|
|
AiRuntimeTarget,
|
|
createGeminiOpenAiClient,
|
|
createGoogleGenAiClient,
|
|
createMistralClient,
|
|
createOllamaClient,
|
|
createOpenAiClient,
|
|
getGeminiApiMode,
|
|
resolveAiRuntimeTarget,
|
|
sameRuntimeEndpoint,
|
|
} from "./ai-runtime-target";
|
|
|
|
const CAPABILITY_NAMES: AiCapabilityName[] = [
|
|
"chat",
|
|
"vision",
|
|
"ocr",
|
|
"thinking",
|
|
"extendedThinking",
|
|
"tools",
|
|
"audio",
|
|
"documents",
|
|
"outputImages",
|
|
"speechToText",
|
|
"textToSpeech",
|
|
];
|
|
|
|
export function getRuntimeModel(provider: AiProvider): string {
|
|
switch (provider) {
|
|
case AiProvider.OLLAMA:
|
|
return Environment.OLLAMA_CHAT_MODEL;
|
|
case AiProvider.GEMINI:
|
|
return Environment.GEMINI_MODEL;
|
|
case AiProvider.MISTRAL:
|
|
return Environment.MISTRAL_MODEL;
|
|
case AiProvider.OPENAI:
|
|
return Environment.OPENAI_MODEL;
|
|
}
|
|
}
|
|
|
|
export function setRuntimeModel(provider: AiProvider, model: string): void {
|
|
switch (provider) {
|
|
case AiProvider.OLLAMA:
|
|
Environment.OLLAMA_CHAT_MODEL = model;
|
|
break;
|
|
case AiProvider.GEMINI:
|
|
Environment.GEMINI_MODEL = model;
|
|
break;
|
|
case AiProvider.MISTRAL:
|
|
Environment.MISTRAL_MODEL = model;
|
|
break;
|
|
case AiProvider.OPENAI:
|
|
Environment.OPENAI_MODEL = model;
|
|
break;
|
|
}
|
|
}
|
|
|
|
function capability(supported: boolean, target?: AiRuntimeTarget, runtimeTarget?: AiRuntimeTarget): AiCapabilityInfo {
|
|
const result: AiCapabilityInfo = {supported};
|
|
if (target?.model) result.model = target.model;
|
|
if (target) {
|
|
result.endpoint = {
|
|
provider: target.provider,
|
|
baseUrl: target.baseUrl,
|
|
external: runtimeTarget ? !sameRuntimeEndpoint(target, runtimeTarget) : false,
|
|
};
|
|
}
|
|
if (target && runtimeTarget && (target.model !== runtimeTarget.model || !sameRuntimeEndpoint(target, runtimeTarget))) {
|
|
result.external = true;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
function buildCapabilities(overrides: Partial<Record<AiCapabilityName, AiCapabilityInfo>>): AiModelCapabilities {
|
|
return Object.assign(new AiModelCapabilities(), {
|
|
chat: {supported: false},
|
|
vision: {supported: false},
|
|
ocr: {supported: false},
|
|
thinking: {supported: false},
|
|
extendedThinking: {supported: false},
|
|
tools: {supported: false},
|
|
audio: {supported: false},
|
|
documents: {supported: false},
|
|
outputImages: {supported: false},
|
|
speechToText: {supported: false},
|
|
textToSpeech: {supported: false},
|
|
...overrides,
|
|
});
|
|
}
|
|
|
|
function lowerModelName(model: string): string {
|
|
return model.toLowerCase();
|
|
}
|
|
|
|
function isOpenAiTextModel(model: string): boolean {
|
|
const name = lowerModelName(model);
|
|
if (!name) return false;
|
|
if (/^(gpt-image|dall-e|tts-|whisper|text-embedding|text-moderation|omni-moderation)/.test(name)) return false;
|
|
if (name.includes("transcribe")) return false;
|
|
return /^(gpt-|o\d|chatgpt-|codex-|computer-use)/.test(name);
|
|
}
|
|
|
|
function isOpenAiReasoningModel(model: string): boolean {
|
|
const name = lowerModelName(model);
|
|
return /^o\d/.test(name) || name.startsWith("gpt-5");
|
|
}
|
|
|
|
function isOpenAiVisionModel(model: string): boolean {
|
|
const name = lowerModelName(model);
|
|
if (!isOpenAiTextModel(model)) return false;
|
|
if (name.startsWith("gpt-3.5")) return false;
|
|
if (name.includes("audio-preview") || name.includes("search-preview")) return false;
|
|
return true;
|
|
}
|
|
|
|
function isGeminiNonChatModel(model: string): boolean {
|
|
const name = lowerModelName(model);
|
|
return name.includes("lyria") || name.includes("-tts") || name.includes("image-preview") || name.endsWith("-image");
|
|
}
|
|
|
|
function geminiSupportsAudioInput(model: string): boolean {
|
|
const name = lowerModelName(model);
|
|
return name.startsWith("gemini-") && !isGeminiNonChatModel(model);
|
|
}
|
|
|
|
export async function getModelCapabilities(
|
|
provider: AiProvider,
|
|
model: string,
|
|
purpose: AiCapabilityName | "chat" = "chat",
|
|
): Promise<AiModelCapabilities | undefined> {
|
|
if (!model) return undefined;
|
|
|
|
try {
|
|
const runtimeTarget = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider));
|
|
const target = resolveAiRuntimeTarget(provider, purpose, model);
|
|
|
|
switch (provider) {
|
|
case AiProvider.OLLAMA: {
|
|
const ollama = createOllamaClient(target);
|
|
const info = await ollama.show({model});
|
|
const modelCapabilities = Array.isArray(info.capabilities) ? info.capabilities : [];
|
|
const has = (cap: string): boolean => modelCapabilities.includes(cap);
|
|
const audioSupported = isOllamaSpeechToTextModel(model);
|
|
const documentsTarget = resolveAiRuntimeTarget(provider, "documents");
|
|
|
|
return buildCapabilities({
|
|
chat: capability(true, target, runtimeTarget),
|
|
vision: capability(has("vision"), target, runtimeTarget),
|
|
ocr: capability(has("ocr"), target, runtimeTarget),
|
|
thinking: capability(has("thinking"), target, runtimeTarget),
|
|
extendedThinking: capability(has("thinking") && model.includes("gpt-oss"), target, runtimeTarget),
|
|
tools: capability(has("tools"), target, runtimeTarget),
|
|
audio: capability(audioSupported, target, runtimeTarget),
|
|
documents: capability(!!documentsTarget.model, documentsTarget, runtimeTarget),
|
|
speechToText: capability(audioSupported, target, runtimeTarget),
|
|
});
|
|
}
|
|
case AiProvider.GEMINI: {
|
|
const chatLike = lowerModelName(model).startsWith("gemini-") && !isGeminiNonChatModel(model);
|
|
const reasoningModel = lowerModelName(model).includes("2.5") || lowerModelName(model).includes("thinking");
|
|
const imageTarget = resolveAiRuntimeTarget(provider, "outputImages");
|
|
const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
|
|
const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
|
|
|
|
return buildCapabilities({
|
|
chat: capability(true, target, runtimeTarget),
|
|
vision: capability(chatLike, target, runtimeTarget),
|
|
ocr: capability(chatLike, target, runtimeTarget),
|
|
thinking: capability(reasoningModel, target, runtimeTarget),
|
|
extendedThinking: capability(reasoningModel, target, runtimeTarget),
|
|
tools: capability(chatLike, target, runtimeTarget),
|
|
audio: capability(geminiSupportsAudioInput(model), target, runtimeTarget),
|
|
speechToText: capability(!!speechTarget.apiKey && geminiSupportsAudioInput(speechTarget.model), speechTarget, runtimeTarget),
|
|
outputImages: capability(!!imageTarget.apiKey && !!imageTarget.model, imageTarget, runtimeTarget),
|
|
textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
|
|
});
|
|
}
|
|
case AiProvider.MISTRAL: {
|
|
const mistral = createMistralClient(target);
|
|
const info = await mistral.models.retrieve({modelId: model});
|
|
const caps = info.type !== "UNKNOWN" ? info.capabilities : undefined;
|
|
const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
|
|
const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
|
|
|
|
return buildCapabilities({
|
|
chat: capability(true, target, runtimeTarget),
|
|
vision: capability(!!caps?.vision, target, runtimeTarget),
|
|
ocr: capability(!!caps?.ocr, target, runtimeTarget),
|
|
thinking: capability(!!caps?.reasoning, target, runtimeTarget),
|
|
tools: capability(!!caps?.functionCalling, target, runtimeTarget),
|
|
audio: capability(!!caps?.audio, target, runtimeTarget),
|
|
documents: capability(true, target, runtimeTarget),
|
|
speechToText: capability(!!speechTarget.model || !!caps?.audioTranscription, speechTarget, runtimeTarget),
|
|
textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
|
|
});
|
|
}
|
|
case AiProvider.OPENAI: {
|
|
const textModel = isOpenAiTextModel(model);
|
|
const reasoningModel = isOpenAiReasoningModel(model);
|
|
const imageTarget = resolveAiRuntimeTarget(provider, "outputImages");
|
|
const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
|
|
const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
|
|
|
|
return buildCapabilities({
|
|
chat: capability(true, target, runtimeTarget),
|
|
vision: capability(isOpenAiVisionModel(model), target, runtimeTarget),
|
|
ocr: capability(isOpenAiVisionModel(model), target, runtimeTarget),
|
|
thinking: capability(reasoningModel, target, runtimeTarget),
|
|
extendedThinking: capability(reasoningModel, target, runtimeTarget),
|
|
tools: capability(textModel, target, runtimeTarget),
|
|
outputImages: capability(!!imageTarget.model, imageTarget, runtimeTarget),
|
|
speechToText: capability(!!speechTarget.model, speechTarget, runtimeTarget),
|
|
textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
|
|
});
|
|
}
|
|
}
|
|
|
|
} catch (e) {
|
|
logError(e);
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
export async function getRuntimeCapabilities(
|
|
provider: AiProvider = Environment.DEFAULT_AI_PROVIDER,
|
|
model: string | undefined = getRuntimeModel(provider),
|
|
target?: AiRuntimeTarget
|
|
): Promise<AiModelCapabilities> {
|
|
const runtimeTarget = target ?? resolveAiRuntimeTarget(provider, "chat", model ?? getRuntimeModel(provider));
|
|
const result = await getModelCapabilities(provider, runtimeTarget.model, target?.purpose ?? "chat") ?? buildCapabilities({});
|
|
|
|
for (const capabilityName of CAPABILITY_NAMES) {
|
|
const target = resolveAiRuntimeTarget(provider, capabilityName);
|
|
if (target.model === runtimeTarget.model && sameRuntimeEndpoint(target, runtimeTarget)) continue;
|
|
|
|
const targetCapabilities = await getModelCapabilities(provider, target.model, capabilityName);
|
|
const capabilityInfo = targetCapabilities?.[capabilityName];
|
|
if (capabilityInfo) {
|
|
result[capabilityName] = capabilityInfo;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
export async function getFormattedCapabilities(
|
|
provider: AiProvider = Environment.DEFAULT_AI_PROVIDER,
|
|
model: string | undefined = getRuntimeModel(provider),
|
|
caps?: AiModelCapabilities,
|
|
): Promise<string[]> {
|
|
if (!caps) caps = await getRuntimeCapabilities(provider, model);
|
|
|
|
const line = (title: string, value?: AiCapabilityInfo) => {
|
|
const state = value?.supported ? "✅" : "❌";
|
|
const external = value?.external ?? (!!value?.model && value.model !== model);
|
|
return Environment.getRuntimeCapabilityLineText({
|
|
state,
|
|
title,
|
|
model: value?.model,
|
|
endpointBaseUrl: value?.endpoint?.baseUrl,
|
|
external,
|
|
});
|
|
};
|
|
|
|
return [
|
|
line(Environment.runtimeCapabilityChatText, caps.chat),
|
|
line(Environment.runtimeCapabilityVisionText, caps.vision),
|
|
line(Environment.runtimeCapabilityOcrText, caps.ocr),
|
|
line(Environment.runtimeCapabilityThinkingText, caps.thinking),
|
|
line(Environment.runtimeCapabilityExtendedThinkingText, caps.extendedThinking),
|
|
line(Environment.runtimeCapabilityToolsText, caps.tools),
|
|
line(Environment.runtimeCapabilityAudioText, caps.audio),
|
|
line(Environment.runtimeCapabilitySpeechToTextText, caps.speechToText),
|
|
line(Environment.runtimeCapabilityTextToSpeechText, caps.textToSpeech),
|
|
line(Environment.runtimeCapabilityDocumentsText, caps.documents),
|
|
line(Environment.runtimeCapabilityOutputImagesText, caps.outputImages),
|
|
];
|
|
}
|
|
|
|
export async function formatRuntimeModelInfo(
|
|
provider: AiProvider = Environment.DEFAULT_AI_PROVIDER,
|
|
model: string | undefined = getRuntimeModel(provider),
|
|
caps?: AiModelCapabilities,
|
|
): Promise<string> {
|
|
return Environment.getRuntimeModelInfoText(
|
|
provider.toString().toLowerCase(),
|
|
model,
|
|
await getFormattedCapabilities(provider, model, caps)
|
|
);
|
|
}
|
|
|
|
export async function listProviderModels(provider: AiProvider): Promise<string[]> {
|
|
const target = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider));
|
|
|
|
switch (provider) {
|
|
case AiProvider.OLLAMA: {
|
|
const ollama = createOllamaClient(target);
|
|
const result: any = await ollama.list();
|
|
return (result.models ?? []).map((m: any) => m.model || m.name).filter(Boolean);
|
|
}
|
|
case AiProvider.GEMINI: {
|
|
const models: string[] = [];
|
|
if (getGeminiApiMode(target) === "openai") {
|
|
const geminiAi = createGeminiOpenAiClient(target);
|
|
const iterable: any = await geminiAi.models.list();
|
|
for await (const model of iterable) models.push(model.name || model.id || String(model));
|
|
return models;
|
|
}
|
|
|
|
const geminiAi = createGoogleGenAiClient(target);
|
|
const iterable: any = await geminiAi.models.list();
|
|
for await (const model of iterable) {
|
|
const name = model.name || model.id || String(model);
|
|
models.push(String(name).replace(/^models\//, ""));
|
|
}
|
|
return models;
|
|
}
|
|
case AiProvider.MISTRAL: {
|
|
const mistralAi = createMistralClient(target);
|
|
const result: any = await mistralAi.models.list();
|
|
return (result.data ?? result.models ?? result ?? []).map((m: any) => m.id || m.name || String(m)).filter(Boolean);
|
|
}
|
|
case AiProvider.OPENAI: {
|
|
const openAi = createOpenAiClient(target);
|
|
const result: any = await openAi.models.list();
|
|
return (result.data ?? []).map((m: any) => m.id).filter(Boolean);
|
|
}
|
|
}
|
|
}
|