ai: add unified runtime and provider adapters

2026-05-10 22:52:35 +03:00
parent 4c2a5471df
commit 32c35f54aa
15 changed files with 4038 additions and 8 deletions
@@ -0,0 +1,213 @@
+import {Mistral} from "@mistralai/mistralai";
+import {GoogleGenAI} from "@google/genai";
+import {Ollama} from "ollama";
+import {OpenAI} from "openai";
+import {Environment} from "../common/environment";
+import {AiModelCapabilities} from "../model/ai-model-capabilities";
+import {AiProvider} from "../model/ai-provider";
+
+export type AiCapabilityName = keyof AiModelCapabilities;
+export type AiRuntimePurpose = AiCapabilityName | "chat";
+
+export type AiRuntimeTarget = {
+    provider: AiProvider;
+    purpose: AiRuntimePurpose;
+    model: string;
+    baseUrl?: string;
+    apiKey?: string;
+};
+
+export type GeminiApiMode = "google" | "openai";
+
+const GEMINI_OPENAI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/";
+
+const PURPOSE_SUFFIXES: Record<AiRuntimePurpose, string[]> = {
+    chat: ["CHAT"],
+    vision: ["VISION", "IMAGE"],
+    ocr: ["OCR", "VISION", "IMAGE"],
+    thinking: ["THINKING", "THINK"],
+    extendedThinking: ["EXTENDED_THINKING", "THINKING", "THINK"],
+    tools: ["TOOLS", "CHAT"],
+    audio: ["AUDIO"],
+    documents: ["DOCUMENTS", "RAG", "EMBEDDING"],
+    outputImages: ["OUTPUT_IMAGES", "IMAGE"],
+    speechToText: ["SPEECH_TO_TEXT", "TRANSCRIPTION", "STT", "AUDIO"],
+    textToSpeech: ["TEXT_TO_SPEECH", "TTS"],
+};
+
+function providerPrefix(provider: AiProvider): string {
+    return provider.toString();
+}
+
+function env(name: string): string | undefined {
+    return Environment.getOptionalConfigValue(name);
+}
+
+function firstEnv(names: string[]): string | undefined {
+    for (const name of names) {
+        const value = env(name);
+        if (value) return value;
+    }
+
+    return undefined;
+}
+
+function endpointEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] {
+    const prefix = providerPrefix(provider);
+    return PURPOSE_SUFFIXES[purpose].flatMap(suffix => [
+        `${prefix}_${suffix}_BASE_URL`,
+        `${prefix}_${suffix}_ENDPOINT`,
+        `${prefix}_${suffix}_ADDRESS`,
+    ]);
+}
+
+function apiKeyEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] {
+    const prefix = providerPrefix(provider);
+    return PURPOSE_SUFFIXES[purpose].map(suffix => `${prefix}_${suffix}_API_KEY`);
+}
+
+function modelEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] {
+    const prefix = providerPrefix(provider);
+    return PURPOSE_SUFFIXES[purpose].map(suffix => `${prefix}_${suffix}_MODEL`);
+}
+
+export function getProviderBaseUrl(provider: AiProvider): string | undefined {
+    switch (provider) {
+        case AiProvider.OLLAMA:
+            return env("OLLAMA_ENDPOINT");
+        case AiProvider.GEMINI:
+            return env("GEMINI_BASE_URL") ?? env("GEMINI_ENDPOINT")
+                ?? (Environment.GEMINI_API_MODE === "openai" ? GEMINI_OPENAI_BASE_URL : undefined);
+        case AiProvider.MISTRAL:
+            return env("MISTRAL_BASE_URL") ?? env("MISTRAL_ENDPOINT");
+        case AiProvider.OPENAI:
+            return env("OPENAI_BASE_URL") ?? env("OPENAI_ENDPOINT");
+    }
+}
+
+export function getProviderApiKey(provider: AiProvider): string | undefined {
+    switch (provider) {
+        case AiProvider.OLLAMA:
+            return Environment.OLLAMA_API_KEY;
+        case AiProvider.GEMINI:
+            return Environment.GEMINI_API_KEY;
+        case AiProvider.MISTRAL:
+            return Environment.MISTRAL_API_KEY;
+        case AiProvider.OPENAI:
+            return Environment.OPENAI_API_KEY;
+    }
+}
+
+export function getDefaultModelForPurpose(provider: AiProvider, purpose: AiRuntimePurpose): string {
+    switch (provider) {
+        case AiProvider.OLLAMA:
+            switch (purpose) {
+                case "vision":
+                case "ocr":
+                case "outputImages":
+                    return Environment.OLLAMA_IMAGE_MODEL;
+                case "thinking":
+                case "extendedThinking":
+                    return Environment.OLLAMA_THINK_MODEL;
+                case "audio":
+                case "speechToText":
+                    return Environment.OLLAMA_AUDIO_MODEL;
+                case "documents":
+                    return Environment.OLLAMA_EMBEDDING_MODEL;
+                default:
+                    return Environment.OLLAMA_CHAT_MODEL;
+            }
+        case AiProvider.GEMINI:
+            switch (purpose) {
+                case "outputImages":
+                    return Environment.GEMINI_IMAGE_MODEL;
+                case "speechToText":
+                    return Environment.GEMINI_TRANSCRIPTION_MODEL;
+                case "textToSpeech":
+                    return Environment.GEMINI_TTS_MODEL;
+                default:
+                    return Environment.GEMINI_MODEL;
+            }
+        case AiProvider.MISTRAL:
+            switch (purpose) {
+                case "speechToText":
+                    return Environment.MISTRAL_TRANSCRIPTION_MODEL;
+                case "textToSpeech":
+                    return Environment.MISTRAL_TTS_MODEL || Environment.MISTRAL_MODEL;
+                default:
+                    return Environment.MISTRAL_MODEL;
+            }
+        case AiProvider.OPENAI:
+            switch (purpose) {
+                case "outputImages":
+                    return Environment.OPENAI_IMAGE_MODEL;
+                case "speechToText":
+                    return Environment.OPENAI_TRANSCRIPTION_MODEL;
+                case "textToSpeech":
+                    return Environment.OPENAI_TTS_MODEL;
+                default:
+                    return Environment.OPENAI_MODEL;
+            }
+    }
+}
+
+export function resolveAiRuntimeTarget(
+    provider: AiProvider,
+    purpose: AiRuntimePurpose,
+    modelOverride?: string,
+): AiRuntimeTarget {
+    const model = modelOverride
+        ?? firstEnv(modelEnvNames(provider, purpose))
+        ?? getDefaultModelForPurpose(provider, purpose);
+    const baseUrl = firstEnv(endpointEnvNames(provider, purpose)) ?? getProviderBaseUrl(provider);
+    const apiKey = firstEnv(apiKeyEnvNames(provider, purpose)) ?? getProviderApiKey(provider);
+
+    return {provider, purpose, model, baseUrl, apiKey};
+}
+
+export function sameRuntimeEndpoint(left: AiRuntimeTarget, right: AiRuntimeTarget): boolean {
+    return left.provider === right.provider
+        && (left.baseUrl ?? "") === (right.baseUrl ?? "")
+        && (left.apiKey ?? "") === (right.apiKey ?? "");
+}
+
+export function createOpenAiClient(target: AiRuntimeTarget): OpenAI {
+    return new OpenAI({
+        apiKey: target.apiKey,
+        baseURL: target.baseUrl,
+    });
+}
+
+export function getGeminiApiMode(target?: AiRuntimeTarget): GeminiApiMode {
+    if (Environment.GEMINI_API_MODE === "openai") return "openai";
+    if (Environment.GEMINI_API_MODE === "google") return "google";
+    if ((target?.baseUrl ?? "").includes("/openai")) return "openai";
+    return "google";
+}
+
+export function createGeminiOpenAiClient(target: AiRuntimeTarget): OpenAI {
+    return createOpenAiClient({
+        ...target,
+        baseUrl: target.baseUrl ?? GEMINI_OPENAI_BASE_URL,
+    });
+}
+
+export function createGoogleGenAiClient(target: AiRuntimeTarget): GoogleGenAI {
+    return new GoogleGenAI({
+        apiKey: target.apiKey,
+    });
+}
+
+export function createMistralClient(target: AiRuntimeTarget): Mistral {
+    return new Mistral({
+        apiKey: target.apiKey,
+        serverURL: target.baseUrl,
+    });
+}
+
+export function createOllamaClient(target: AiRuntimeTarget): Ollama {
+    return new Ollama({
+        host: target.baseUrl?.endsWith(":11434") ? target.baseUrl : target.baseUrl + ":11434",
+        headers: target.apiKey ? {"Authorization": `Bearer ${target.apiKey}`} : undefined,
+    });
+}
@@ -0,0 +1,55 @@
+import {randomUUID} from "node:crypto";
+
+export type AiCancelRequest = {
+    id: string;
+    chatId: number;
+    messageId?: number;
+    fromId: number;
+    provider: string;
+    controller: AbortController;
+    onCancel?: () => Promise<void> | void;
+};
+
+const requests = new Map<string, AiCancelRequest>();
+
+export function createAiCancelRequest(params: Omit<AiCancelRequest, "id" | "controller"> & { controller?: AbortController }): AiCancelRequest {
+    const request: AiCancelRequest = {
+        id: randomUUID(),
+        controller: params.controller ?? new AbortController(),
+        chatId: params.chatId,
+        messageId: params.messageId,
+        fromId: params.fromId,
+        provider: params.provider,
+        onCancel: params.onCancel,
+    };
+    requests.set(request.id, request);
+    return request;
+}
+
+export function setAiCancelMessageId(id: string, messageId: number): void {
+    const request = requests.get(id);
+    if (request) request.messageId = messageId;
+}
+
+export function getAiCancelRequest(id: string): AiCancelRequest | undefined {
+    return requests.get(id);
+}
+
+export async function abortAiRequest(id: string): Promise<boolean> {
+    const request = requests.get(id);
+    if (!request) return false;
+
+    request.controller.abort();
+
+    try {
+        await request.onCancel?.();
+    } finally {
+        requests.delete(id);
+    }
+
+    return true;
+}
+
+export function finishAiRequest(id: string): void {
+    requests.delete(id);
+}
@@ -0,0 +1,90 @@
+import {AiToolCall} from "./tool-types";
+import {OllamaChatMessage} from "./ollama-chat-message";
+import {GeminiMessage} from "./gemini-chat-message";
+import {MistralChatMessage} from "./mistral-chat-message";
+import {MessageAudioPart, MessageImagePart} from "../common/message-part";
+import {OpenAIChatMessage} from "./openai-chat-message";
+
+export type ChatMessage = {
+    role: "system" | "user" | "assistant" | "tool";
+    content: string;
+    images?: string[];
+    imageParts?: MessageImagePart[];
+    documents?: string[];
+    audios?: string[];
+    audioParts?: MessageAudioPart[];
+    videos?: string[];
+    videoNotes?: string[];
+    thinking?: string;
+    tool_calls?: AiToolCall[];
+    tool_name?: string;
+}
+
+export function asOllamaChatMessage(message: ChatMessage): OllamaChatMessage {
+    return {
+        role: message.role,
+        content: message.content,
+        thinking: message.thinking,
+        images: message.images,
+        tool_calls: message.tool_calls,
+        tool_name: message.tool_name
+    };
+}
+
+// export function asGeminiChatMessage(message: ChatMessage): GeminiMessage {
+//     if (message.images) {
+//         return {
+//             role: message.role,
+//             content: message.images.map(() => {
+//                 return {
+//                     type: "image",
+//                 };
+//             })
+//         };
+//     }
+//
+//     return {
+//         role: message.role,
+//         content: {
+//             type: "text",
+//             text: message.content,
+//         },
+//     };
+// }
+
+export function asMistralChatMessage(message: ChatMessage): MistralChatMessage {
+    return {
+        role: message.role,
+        content: message.content,
+    };
+}
+
+// export function asOpenAIChatMessage(message: ChatMessage): OpenAIChatMessage {
+//     return {
+//
+//     }
+// }
+
+/*
+ const messages: any[] = ordered.map(part => {
+            const content: any[] = [{
+                type: "input_text",
+                text: (Environment.USE_NAMES_IN_PROMPT && !part.bot ? `MESSAGE FROM USER \"${part.name}\":\n` : "") + part.content,
+            }];
+
+            if (!part.bot) {
+                for (const image of part.images ?? []) {
+                    content.push({type: "input_image", image_url: `data:image/jpeg;base64,${image}`, detail: "auto"});
+                }
+            }
+
+            return {role: part.bot ? "assistant" : "user", content};
+        });
+
+        if (Environment.SYSTEM_PROMPT && Environment.USE_SYSTEM_PROMPT) {
+            messages.unshift({role: "system", content: Environment.SYSTEM_PROMPT});
+        }
+        return {parts: messages, imageCount};
+ */
+
+export type AiChatMessage = | OpenAIChatMessage | OllamaChatMessage | MistralChatMessage | GeminiMessage;
@@ -0,0 +1,84 @@
+export type GeminiUserInputStep = {
+    type: "user_input";
+    content?: Array<GeminiContent>;
+}
+
+export type GeminiModelOutputStep = {
+    type: "model_output";
+    content?: Array<GeminiContent>;
+}
+
+export type GeminiFunctionCallStep = {
+    id: string;
+    arguments: {
+        [key: string]: unknown;
+    };
+    name: string;
+    type: "function_call";
+    signature?: string;
+}
+
+export type GeminiFunctionResultStep = {
+    call_id: string;
+    result: unknown | Array<GeminiTextContent | GeminiImageContent> | string;
+    type: "function_result";
+    is_error?: boolean;
+    name?: string;
+    signature?: string;
+}
+
+export type GeminiStep =
+    | GeminiUserInputStep
+    | GeminiModelOutputStep
+    | GeminiFunctionCallStep
+    | GeminiFunctionResultStep;
+
+export type GeminiTextContent = {
+    text: string;
+}
+
+export type GeminiInlineContent = {
+    inlineData: {
+        data: string;
+        mimeType: string;
+    };
+}
+
+export type GeminiImageContent = GeminiInlineContent;
+export type GeminiAudioContent = GeminiInlineContent;
+export type GeminiDocumentContent = GeminiInlineContent;
+export type GeminiVideoContent = GeminiInlineContent;
+
+export type GeminiFunctionCallContent = {
+    functionCall: {
+        id?: string;
+        name?: string;
+        args?: Record<string, unknown>;
+    };
+}
+
+export type GeminiFunctionResponseContent = {
+    functionResponse: {
+        id?: string;
+        name?: string;
+        response: Record<string, unknown>;
+    };
+}
+
+export type GeminiContent =
+    | GeminiTextContent
+    | GeminiInlineContent
+    | GeminiFunctionCallContent
+    | GeminiFunctionResponseContent;
+
+export type GeminiTurn = {
+    content?: Array<GeminiContent> | GeminiContent;
+    role?: string;
+}
+
+export type GeminiInput = string | Array<GeminiStep> | Array<GeminiContent> | GeminiContent;
+
+export type GeminiMessage = {
+    role: "user" | "model";
+    parts: GeminiContent[];
+};
@@ -0,0 +1,112 @@
+export const MistralImageDetail = {
+    Low: "low",
+    Auto: "auto",
+    High: "high",
+} as const;
+export type MistralImageDetail = OpenEnum<typeof MistralImageDetail>;
+
+declare const __brand: unique symbol;
+export type Unrecognized<T> = T & { [__brand]: "unrecognized" };
+
+export type OpenEnum<T extends Readonly<Record<string, string | number>>> =
+    | T[keyof T]
+    | Unrecognized<T[keyof T] extends number ? number : string>;
+
+export const BuiltInConnectors = {
+    WebSearch: "web_search",
+    WebSearchPremium: "web_search_premium",
+    CodeInterpreter: "code_interpreter",
+    ImageGeneration: "image_generation",
+    DocumentLibrary: "document_library",
+} as const;
+export type BuiltInConnectors = OpenEnum<typeof BuiltInConnectors>;
+
+export type MistralTextChunk = {
+    type: "text";
+    text: string;
+};
+
+export type MistralToolReferenceChunk = {
+    type: "tool_reference" | undefined;
+    tool: BuiltInConnectors | string;
+    title: string;
+    url?: string | null | undefined;
+    favicon?: string | null | undefined;
+    description?: string | null | undefined;
+};
+
+export type MistralThinkChunk = {
+    type: "thinking";
+    thinking: Array<MistralToolReferenceChunk | MistralTextChunk>;
+    signature?: string | null | undefined;
+    closed?: boolean | undefined;
+};
+
+export type MistralImageURLChunk = {
+    type: "image_url";
+    imageUrl: string | {
+        url: string;
+        detail?: MistralImageDetail | null | undefined;
+    };
+}
+
+export type MistralContentChunk =
+    | MistralTextChunk
+    | MistralThinkChunk
+    | MistralImageURLChunk
+
+/*
+ | (ImageURLChunk & { type: "image_url" })
+  | (DocumentURLChunk & { type: "document_url" })
+  | (TextChunk & { type: "text" })
+  | (ReferenceChunk & { type: "reference" })
+  | (FileChunk & { type: "file" })
+  | (ThinkChunk & { type: "thinking" })
+  | AudioChunk
+ */
+
+export type MistralFunctionCall = {
+    name: string;
+    arguments: { [k: string]: any } | string;
+};
+
+export type MistralToolCall = {
+    id?: string | undefined;
+    type?: string | undefined;
+    function: MistralFunctionCall;
+    index?: number | undefined;
+};
+
+export type MistralAssistantMessage = {
+    role: "assistant";
+    content?: string | Array<MistralContentChunk> | null | undefined;
+    toolCalls?: Array<MistralToolCall> | null | undefined;
+    prefix?: boolean | undefined;
+}
+
+export type MistralSystemMessageContentChunks =
+    | MistralTextChunk
+    | MistralThinkChunk;
+
+export type MistralSystemMessage = {
+    role: "system";
+    content: string;
+}
+
+export type MistralToolMessage = {
+    role: "tool";
+    content: string | Array<MistralContentChunk> | null;
+    toolCallId?: string | null | undefined;
+    name?: string | null | undefined;
+};
+
+export type MistralUserMessage = {
+    role: "user";
+    content: string | Array<MistralContentChunk> | null;
+};
+
+export type MistralChatMessage =
+    | MistralAssistantMessage
+    | MistralSystemMessage
+    | MistralToolMessage
+    | MistralUserMessage
@@ -0,0 +1,3 @@
+import {Message} from "ollama";
+
+export type OllamaChatMessage = Message;
@@ -0,0 +1,3 @@
+import {ResponseInputItem} from "openai/resources/responses/responses";
+
+export type OpenAIChatMessage = ResponseInputItem
@@ -0,0 +1,325 @@
+import {AiProvider} from "../model/ai-provider";
+import {AiModelCapabilities} from "../model/ai-model-capabilities";
+import {Environment} from "../common/environment";
+import {logError} from "../util/utils";
+import {AiCapabilityInfo} from "../model/ai-capability-info";
+import {isOllamaSpeechToTextModel} from "./speech-to-text-models";
+import {
+    AiCapabilityName,
+    AiRuntimeTarget,
+    createGeminiOpenAiClient,
+    createGoogleGenAiClient,
+    createMistralClient,
+    createOllamaClient,
+    createOpenAiClient,
+    getGeminiApiMode,
+    resolveAiRuntimeTarget,
+    sameRuntimeEndpoint,
+} from "./ai-runtime-target";
+
+export type RuntimeModelInfo = {
+    provider: AiProvider;
+    model: string;
+    capabilities: AiModelCapabilities;
+};
+
+const CAPABILITY_NAMES: AiCapabilityName[] = [
+    "vision",
+    "ocr",
+    "thinking",
+    "extendedThinking",
+    "tools",
+    "audio",
+    "documents",
+    "outputImages",
+    "speechToText",
+    "textToSpeech",
+];
+
+export function getRuntimeModel(provider: AiProvider): string {
+    switch (provider) {
+        case AiProvider.OLLAMA:
+            return Environment.OLLAMA_CHAT_MODEL;
+        case AiProvider.GEMINI:
+            return Environment.GEMINI_MODEL;
+        case AiProvider.MISTRAL:
+            return Environment.MISTRAL_MODEL;
+        case AiProvider.OPENAI:
+            return Environment.OPENAI_MODEL;
+    }
+}
+
+export function setRuntimeModel(provider: AiProvider, model: string): void {
+    switch (provider) {
+        case AiProvider.OLLAMA:
+            Environment.OLLAMA_CHAT_MODEL = model;
+            break;
+        case AiProvider.GEMINI:
+            Environment.GEMINI_MODEL = model;
+            break;
+        case AiProvider.MISTRAL:
+            Environment.MISTRAL_MODEL = model;
+            break;
+        case AiProvider.OPENAI:
+            Environment.OPENAI_MODEL = model;
+            break;
+    }
+}
+
+function capability(supported: boolean, target?: AiRuntimeTarget, runtimeTarget?: AiRuntimeTarget): AiCapabilityInfo {
+    const result: AiCapabilityInfo = {supported};
+    if (target?.model) result.model = target.model;
+    if (target) {
+        result.endpoint = {
+            provider: target.provider,
+            baseUrl: target.baseUrl,
+            external: runtimeTarget ? !sameRuntimeEndpoint(target, runtimeTarget) : false,
+        };
+    }
+    if (target && runtimeTarget && (target.model !== runtimeTarget.model || !sameRuntimeEndpoint(target, runtimeTarget))) {
+        result.external = true;
+    }
+    return result;
+}
+
+function buildCapabilities(overrides: Partial<Record<AiCapabilityName, AiCapabilityInfo>>): AiModelCapabilities {
+    return Object.assign(new AiModelCapabilities(), {
+        vision: {supported: false},
+        ocr: {supported: false},
+        thinking: {supported: false},
+        extendedThinking: {supported: false},
+        tools: {supported: false},
+        audio: {supported: false},
+        documents: {supported: false},
+        outputImages: {supported: false},
+        speechToText: {supported: false},
+        textToSpeech: {supported: false},
+        ...overrides,
+    });
+}
+
+function lowerModelName(model: string): string {
+    return model.toLowerCase();
+}
+
+function isOpenAiTextModel(model: string): boolean {
+    const name = lowerModelName(model);
+    if (!name) return false;
+    if (/^(gpt-image|dall-e|tts-|whisper|text-embedding|text-moderation|omni-moderation)/.test(name)) return false;
+    if (name.includes("transcribe")) return false;
+    return /^(gpt-|o\d|chatgpt-|codex-|computer-use)/.test(name);
+}
+
+function isOpenAiReasoningModel(model: string): boolean {
+    const name = lowerModelName(model);
+    return /^o\d/.test(name) || name.startsWith("gpt-5");
+}
+
+function isOpenAiVisionModel(model: string): boolean {
+    const name = lowerModelName(model);
+    if (!isOpenAiTextModel(model)) return false;
+    if (name.startsWith("gpt-3.5")) return false;
+    if (name.includes("audio-preview") || name.includes("search-preview")) return false;
+    return true;
+}
+
+function isGeminiNonChatModel(model: string): boolean {
+    const name = lowerModelName(model);
+    return name.includes("lyria") || name.includes("-tts") || name.includes("image-preview") || name.endsWith("-image");
+}
+
+function geminiSupportsAudioInput(model: string): boolean {
+    const name = lowerModelName(model);
+    return name.startsWith("gemini-") && !isGeminiNonChatModel(model);
+}
+
+export async function getModelCapabilities(
+    provider: AiProvider,
+    model: string,
+    purpose: AiCapabilityName | "chat" = "chat",
+): Promise<AiModelCapabilities | undefined> {
+    if (!model) return undefined;
+
+    try {
+        const runtimeTarget = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider));
+        const target = resolveAiRuntimeTarget(provider, purpose, model);
+
+        switch (provider) {
+            case AiProvider.OLLAMA: {
+                const ollama = createOllamaClient(target);
+                const info = await ollama.show({model});
+                const modelCapabilities = Array.isArray(info.capabilities) ? info.capabilities : [];
+                const has = (cap: string): boolean => modelCapabilities.includes(cap);
+                const audioSupported = isOllamaSpeechToTextModel(model);
+                const documentsTarget = resolveAiRuntimeTarget(provider, "documents");
+
+                return buildCapabilities({
+                    vision: capability(has("vision"), target, runtimeTarget),
+                    ocr: capability(has("ocr"), target, runtimeTarget),
+                    thinking: capability(has("thinking"), target, runtimeTarget),
+                    extendedThinking: capability(has("thinking") && model.includes("gpt-oss"), target, runtimeTarget),
+                    tools: capability(has("tools"), target, runtimeTarget),
+                    audio: capability(audioSupported, target, runtimeTarget),
+                    documents: capability(!!documentsTarget.model, documentsTarget, runtimeTarget),
+                    speechToText: capability(audioSupported, target, runtimeTarget),
+                });
+            }
+            case AiProvider.GEMINI: {
+                const chatLike = lowerModelName(model).startsWith("gemini-") && !isGeminiNonChatModel(model);
+                const reasoningModel = lowerModelName(model).includes("2.5") || lowerModelName(model).includes("thinking");
+                const imageTarget = resolveAiRuntimeTarget(provider, "outputImages");
+                const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
+                const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
+
+                return buildCapabilities({
+                    vision: capability(chatLike, target, runtimeTarget),
+                    ocr: capability(chatLike, target, runtimeTarget),
+                    thinking: capability(reasoningModel, target, runtimeTarget),
+                    extendedThinking: capability(reasoningModel, target, runtimeTarget),
+                    tools: capability(chatLike, target, runtimeTarget),
+                    audio: capability(geminiSupportsAudioInput(model), target, runtimeTarget),
+                    speechToText: capability(!!speechTarget.apiKey && geminiSupportsAudioInput(speechTarget.model), speechTarget, runtimeTarget),
+                    outputImages: capability(!!imageTarget.apiKey && !!imageTarget.model, imageTarget, runtimeTarget),
+                    textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
+                });
+            }
+            case AiProvider.MISTRAL: {
+                const mistral = createMistralClient(target);
+                const info = await mistral.models.retrieve({modelId: model});
+                const caps = info.type !== "UNKNOWN" ? info.capabilities : undefined;
+                const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
+                const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
+
+                return buildCapabilities({
+                    vision: capability(!!caps?.vision, target, runtimeTarget),
+                    ocr: capability(!!caps?.ocr, target, runtimeTarget),
+                    thinking: capability(!!caps?.reasoning, target, runtimeTarget),
+                    tools: capability(!!caps?.functionCalling, target, runtimeTarget),
+                    audio: capability(!!caps?.audio, target, runtimeTarget),
+                    documents: capability(true, target, runtimeTarget),
+                    speechToText: capability(!!speechTarget.model || !!caps?.audioTranscription, speechTarget, runtimeTarget),
+                    textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
+                });
+            }
+            case AiProvider.OPENAI: {
+                const textModel = isOpenAiTextModel(model);
+                const reasoningModel = isOpenAiReasoningModel(model);
+                const imageTarget = resolveAiRuntimeTarget(provider, "outputImages");
+                const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
+                const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
+
+                return buildCapabilities({
+                    vision: capability(isOpenAiVisionModel(model), target, runtimeTarget),
+                    ocr: capability(isOpenAiVisionModel(model), target, runtimeTarget),
+                    thinking: capability(reasoningModel, target, runtimeTarget),
+                    extendedThinking: capability(reasoningModel, target, runtimeTarget),
+                    tools: capability(textModel, target, runtimeTarget),
+                    outputImages: capability(!!imageTarget.model, imageTarget, runtimeTarget),
+                    speechToText: capability(!!speechTarget.model, speechTarget, runtimeTarget),
+                    textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
+                });
+            }
+        }
+
+    } catch (e) {
+        logError(e);
+        return undefined;
+    }
+}
+
+
+export async function getRuntimeCapabilities(
+    provider: AiProvider = Environment.DEFAULT_AI_PROVIDER,
+    model: string | undefined = getRuntimeModel(provider)
+): Promise<AiModelCapabilities> {
+    const runtimeTarget = resolveAiRuntimeTarget(provider, "chat", model ?? getRuntimeModel(provider));
+    const result = await getModelCapabilities(provider, runtimeTarget.model, "chat") ?? buildCapabilities({});
+
+    for (const capabilityName of CAPABILITY_NAMES) {
+        const target = resolveAiRuntimeTarget(provider, capabilityName);
+        if (target.model === runtimeTarget.model && sameRuntimeEndpoint(target, runtimeTarget)) continue;
+
+        const targetCapabilities = await getModelCapabilities(provider, target.model, capabilityName);
+        const capabilityInfo = targetCapabilities?.[capabilityName];
+        if (capabilityInfo) {
+            result[capabilityName] = capabilityInfo;
+        }
+    }
+
+    return result;
+}
+
+export async function formatRuntimeModelInfo(
+    provider: AiProvider = Environment.DEFAULT_AI_PROVIDER,
+    model: string | undefined = getRuntimeModel(provider),
+    caps?: AiModelCapabilities
+): Promise<string> {
+    if (!caps) caps = await getRuntimeCapabilities(provider, model);
+    const line = (title: string, value?: AiCapabilityInfo) => {
+        const state = value?.supported ? "✅" : "❌";
+        const external = value?.external ?? (!!value?.model && value.model !== model);
+        return Environment.getRuntimeCapabilityLineText({
+            state,
+            title,
+            model: value?.model,
+            endpointBaseUrl: value?.endpoint?.baseUrl,
+            external,
+        });
+    };
+
+    return Environment.getRuntimeModelInfoText(
+        provider.toString().toLowerCase(),
+        model,
+        [
+            line(Environment.runtimeCapabilityVisionText, caps.vision),
+            line(Environment.runtimeCapabilityOcrText, caps.ocr),
+            line(Environment.runtimeCapabilityThinkingText, caps.thinking),
+            line(Environment.runtimeCapabilityExtendedThinkingText, caps.extendedThinking),
+            line(Environment.runtimeCapabilityToolsText, caps.tools),
+            line(Environment.runtimeCapabilityAudioText, caps.audio),
+            line(Environment.runtimeCapabilitySpeechToTextText, caps.speechToText),
+            line(Environment.runtimeCapabilityTextToSpeechText, caps.textToSpeech),
+            line(Environment.runtimeCapabilityDocumentsText, caps.documents),
+            line(Environment.runtimeCapabilityOutputImagesText, caps.outputImages),
+        ],
+    );
+}
+
+export async function listProviderModels(provider: AiProvider): Promise<string[]> {
+    const target = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider));
+
+    switch (provider) {
+        case AiProvider.OLLAMA: {
+            const ollama = createOllamaClient(target);
+            const result: any = await ollama.list();
+            return (result.models ?? []).map((m: any) => m.model || m.name).filter(Boolean);
+        }
+        case AiProvider.GEMINI: {
+            const models: string[] = [];
+            if (getGeminiApiMode(target) === "openai") {
+                const geminiAi = createGeminiOpenAiClient(target);
+                const iterable: any = await geminiAi.models.list();
+                for await (const model of iterable) models.push(model.name || model.id || String(model));
+                return models;
+            }
+
+            const geminiAi = createGoogleGenAiClient(target);
+            const iterable: any = await geminiAi.models.list();
+            for await (const model of iterable) {
+                const name = model.name || model.id || String(model);
+                models.push(String(name).replace(/^models\//, ""));
+            }
+            return models;
+        }
+        case AiProvider.MISTRAL: {
+            const mistralAi = createMistralClient(target);
+            const result: any = await mistralAi.models.list();
+            return (result.data ?? result.models ?? result ?? []).map((m: any) => m.id || m.name || String(m)).filter(Boolean);
+        }
+        case AiProvider.OPENAI: {
+            const openAi = createOpenAiClient(target);
+            const result: any = await openAi.models.list();
+            return (result.data ?? []).map((m: any) => m.id).filter(Boolean);
+        }
+    }
+}
@@ -0,0 +1,184 @@
+import {Environment} from "../common/environment";
+import {AiProvider} from "../model/ai-provider";
+
+export type AiRequestQueueTarget = {
+    provider: AiProvider;
+    model: string;
+    baseUrl?: string;
+};
+
+type QueueEntry<T> = {
+    target: AiRequestQueueTarget;
+    queueKey: string;
+    run: () => Promise<T>;
+    resolve: (value: T | PromiseLike<T>) => void;
+    reject: (reason?: unknown) => void;
+    onPositionChange: (requestsBefore: number) => Promise<void> | void;
+    signal?: AbortSignal;
+    abortHandler?: () => void;
+    started: boolean;
+};
+
+type EnqueueOptions<T> = {
+    signal?: AbortSignal;
+    onPositionChange: (requestsBefore: number) => Promise<void> | void;
+    run: () => Promise<T>;
+};
+
+class AiProviderRequestQueue {
+    private readonly waiting = new Map<string, Array<QueueEntry<any>>>();
+    private readonly active = new Map<string, number>();
+
+    enqueue<T>(target: AiRequestQueueTarget, options: EnqueueOptions<T>): Promise<T> {
+        if (options.signal?.aborted) {
+            return Promise.reject(new Error("Aborted"));
+        }
+
+        return new Promise<T>((resolve, reject) => {
+            const queueKey = this.queueKey(target);
+            const entry: QueueEntry<T> = {
+                target,
+                queueKey,
+                run: options.run,
+                resolve,
+                reject,
+                onPositionChange: options.onPositionChange,
+                signal: options.signal,
+                started: false,
+            };
+
+            entry.abortHandler = () => {
+                if (entry.started) return;
+
+                const removed = this.removeWaitingEntry(entry);
+                if (!removed) return;
+
+                reject(new Error("Aborted"));
+                this.schedule(target);
+            };
+
+            options.signal?.addEventListener("abort", entry.abortHandler, {once: true});
+            this.getOrCreateQueue(queueKey).push(entry);
+            this.schedule(target);
+        });
+    }
+
+    private getQueue(queueKey: string): Array<QueueEntry<any>> | undefined {
+        return this.waiting.get(queueKey);
+    }
+
+    private getOrCreateQueue(queueKey: string): Array<QueueEntry<any>> {
+        let queue = this.waiting.get(queueKey);
+        if (!queue) {
+            queue = [];
+            this.waiting.set(queueKey, queue);
+        }
+        return queue;
+    }
+
+    private activeCount(queueKey: string): number {
+        return this.active.get(queueKey) ?? 0;
+    }
+
+    private setActiveCount(queueKey: string, count: number): void {
+        if (count <= 0) {
+            this.active.delete(queueKey);
+            return;
+        }
+        this.active.set(queueKey, count);
+    }
+
+    private maxActive(target: AiRequestQueueTarget): number {
+        return Math.max(1, Environment.getAiProviderMaxConcurrentRequests(target.provider));
+    }
+
+    private normalizeBaseUrl(baseUrl: string | undefined): string {
+        return (baseUrl ?? "").trim().replace(/\/+$/, "");
+    }
+
+    private queueKey(target: AiRequestQueueTarget): string {
+        return JSON.stringify([
+            target.provider,
+            this.normalizeBaseUrl(target.baseUrl),
+            target.model.trim(),
+        ]);
+    }
+
+    private removeWaitingEntry(entry: QueueEntry<any>): boolean {
+        const queue = this.getQueue(entry.queueKey);
+        if (!queue) return false;
+
+        const index = queue.indexOf(entry);
+        if (index < 0) return false;
+
+        queue.splice(index, 1);
+        if (entry.abortHandler) {
+            entry.signal?.removeEventListener("abort", entry.abortHandler);
+        }
+        this.deleteQueueIfIdle(entry.queueKey, queue);
+        return true;
+    }
+
+    private schedule(target: AiRequestQueueTarget): void {
+        const queueKey = this.queueKey(target);
+        const queue = this.getOrCreateQueue(queueKey);
+
+        while (queue.length && this.activeCount(queueKey) < this.maxActive(target)) {
+            const entry = queue.shift();
+            if (!entry) continue;
+
+            if (entry.abortHandler) {
+                entry.signal?.removeEventListener("abort", entry.abortHandler);
+            }
+
+            if (entry.signal?.aborted) {
+                entry.reject(new Error("Aborted"));
+                continue;
+            }
+
+            entry.started = true;
+            this.setActiveCount(queueKey, this.activeCount(queueKey) + 1);
+            void this.runEntry(entry);
+        }
+
+        this.updateWaitingMessages(target);
+        if (!queue.length && this.activeCount(queueKey) <= 0) {
+            this.waiting.delete(queueKey);
+        }
+    }
+
+    private async runEntry(entry: QueueEntry<any>): Promise<void> {
+        try {
+            entry.resolve(await entry.run());
+        } catch (e) {
+            entry.reject(e);
+        } finally {
+            this.setActiveCount(entry.queueKey, this.activeCount(entry.queueKey) - 1);
+            this.schedule(entry.target);
+        }
+    }
+
+    private updateWaitingMessages(target: AiRequestQueueTarget): void {
+        const queueKey = this.queueKey(target);
+        const active = this.activeCount(queueKey);
+        const queue = [...(this.getQueue(queueKey) ?? [])];
+
+        Promise.allSettled(queue.map((entry, index) => {
+            return entry.onPositionChange(active + index);
+        })).then(results => {
+            for (const result of results) {
+                if (result.status === "rejected") {
+                    console.error(result.reason);
+                }
+            }
+        }).catch(console.error);
+    }
+
+    private deleteQueueIfIdle(queueKey: string, queue: Array<QueueEntry<any>>): void {
+        if (!queue.length && this.activeCount(queueKey) <= 0) {
+            this.waiting.delete(queueKey);
+        }
+    }
+}
+
+export const aiProviderRequestQueue = new AiProviderRequestQueue();
@@ -0,0 +1,24 @@
+import {AiProvider} from "../model/ai-provider";
+
+export const AI_REGENERATE_CALLBACK = "/regenerate_ai";
+
+export type AiRegenerateCallbackData = {
+    provider: AiProvider;
+    think: boolean;
+};
+
+export function buildAiRegenerateCallbackData(provider: AiProvider, think = false): string {
+    return `${AI_REGENERATE_CALLBACK} ${provider} ${think ? "1" : "0"}`;
+}
+
+export function parseAiRegenerateCallbackData(data: string): AiRegenerateCallbackData | null {
+    if (!data.startsWith(AI_REGENERATE_CALLBACK)) return null;
+
+    const [, provider, think] = data.split(/\s+/);
+    if (!Object.values(AiProvider).includes(provider as AiProvider)) return null;
+
+    return {
+        provider: provider as AiProvider,
+        think: think === "1" || think === "true",
+    };
+}
@@ -0,0 +1,227 @@
+import {Message} from "typescript-telegram-bot-api";
+import {bot} from "../index";
+import {downloadTelegramFile, logError} from "../util/utils";
+import fs from "node:fs";
+import path from "node:path";
+import {Environment} from "../common/environment";
+import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment";
+import {performFFmpeg} from "../util/ffmpeg";
+import ffmpeg from "fluent-ffmpeg";
+import {AsyncSemaphore, KeyedAsyncLock} from "../util/async-lock";
+
+export type AiDownloadedFile = {
+    kind: StoredAttachmentKind;
+    fileId: string;
+    fileName: string;
+    mimeType?: string;
+    buffer: Buffer;
+    path: string;
+};
+
+const cachePathLocks = new KeyedAsyncLock();
+const ffmpegSemaphore = new AsyncSemaphore(2);
+
+function safeFileName(value: string): string {
+    return value.replace(/[\\/:*?"<>|\u0000-\u001F]/g, "_").slice(0, 180);
+}
+
+function extensionFromMimeType(mimeType?: string): string {
+    switch ((mimeType || "").toLowerCase()) {
+        case "audio/ogg":
+        case "audio/opus":
+            return ".ogg";
+        case "audio/mpeg":
+        case "audio/mp3":
+            return ".mp3";
+        case "audio/mp4":
+        case "audio/x-m4a":
+            return ".m4a";
+        case "audio/wav":
+        case "audio/wave":
+        case "audio/x-wav":
+            return ".wav";
+        case "audio/webm":
+            return ".webm";
+        case "image/jpeg":
+            return ".jpg";
+        case "image/png":
+            return ".png";
+        case "image/webp":
+            return ".webp";
+        case "application/pdf":
+            return ".pdf";
+        case "text/plain":
+            return ".txt";
+        case "application/zip":
+        case "application/x-zip":
+        case "application/x-zip-compressed":
+            return ".zip";
+        case "application/x-tar":
+        case "application/tar":
+            return ".tar";
+        case "application/gzip":
+        case "application/x-gzip":
+        case "application/gzip-compressed":
+            return ".gz";
+        case "video/mp4":
+            return ".mp4";
+        default:
+            return "";
+    }
+}
+
+function fileNameWithExtension(fileName: string, mimeType?: string, telegramFilePath?: string): string {
+    if (path.extname(fileName)) return fileName;
+
+    const telegramExt = telegramFilePath ? path.extname(telegramFilePath) : "";
+    const ext = telegramExt || extensionFromMimeType(mimeType);
+    return ext ? `${fileName}${ext}` : fileName;
+}
+
+function cacheDirFor(kind: StoredAttachmentKind): string {
+    const dirName = kind === "image" ? "photo" : kind;
+    return path.join(Environment.DATA_PATH, "cache", dirName);
+}
+
+function cachePathFor(kind: StoredAttachmentKind, fileUniqueId: string | undefined, fileId: string, fileName: string): string {
+    const base = safeFileName(fileUniqueId || fileId);
+    const ext = path.extname(fileName);
+    return path.join(cacheDirFor(kind), `${base}${ext || ""}`);
+}
+
+async function downloadToCache(kind: StoredAttachmentKind, fileId: string, fileName: string, mimeType?: string, fileUniqueId?: string): Promise<StoredAttachment | null> {
+    const file = await bot.getFile({file_id: fileId});
+    const finalFileName = fileNameWithExtension(fileName, mimeType, file.file_path);
+    const location = cachePathFor(kind, fileUniqueId, fileId, finalFileName);
+
+    await cachePathLocks.runExclusive(location, async () => {
+        if (fs.existsSync(location)) return;
+
+        const buffer = await downloadTelegramFile(file.file_path);
+        if (!buffer) return;
+
+        const tempLocation = `${location}.${process.pid}.${Date.now()}.tmp`;
+        fs.mkdirSync(path.dirname(location), {recursive: true});
+        fs.writeFileSync(tempLocation, buffer);
+        fs.renameSync(tempLocation, location);
+    });
+
+    return {kind, fileId, fileUniqueId, fileName: finalFileName, mimeType, cachePath: location};
+}
+
+async function convertAudioToWav(input: string, output: string, noVideo = false): Promise<void> {
+    await cachePathLocks.runExclusive(output, async () => {
+        if (fs.existsSync(output)) return;
+
+        await ffmpegSemaphore.runExclusive(async () => {
+            if (fs.existsSync(output)) return;
+
+            const tempOutput = `${output}.${process.pid}.${Date.now()}.tmp.wav`;
+            try {
+                await performFFmpeg(() => {
+                    const command = ffmpeg(input);
+                    if (noVideo) command.noVideo();
+                    return command
+                        .toFormat("wav")
+                        .save(tempOutput)
+                        .on("progress", (progress) => {
+                            console.log("progress", progress);
+                        });
+                });
+                fs.renameSync(tempOutput, output);
+            } catch (e) {
+                if (fs.existsSync(tempOutput)) {
+                    fs.rmSync(tempOutput, {force: true});
+                }
+                throw e;
+            }
+        });
+    });
+}
+
+export async function cacheMessageAttachments(msg: Message): Promise<StoredAttachment[]> {
+    const result: StoredAttachment[] = [];
+
+    try {
+        if (msg.photo?.length) {
+            const size = msg.photo[msg.photo.length - 1]!;
+            const file = await downloadToCache("image", size.file_id, `${size.file_unique_id || size.file_id}.jpg`, "image/jpeg", size.file_unique_id);
+            if (file) result.push(file);
+        }
+
+        if (msg.document) {
+            const doc = msg.document;
+            const kind: StoredAttachmentKind = doc.mime_type?.startsWith("image/")
+                ? "image"
+                : doc.mime_type?.startsWith("audio/")
+                    ? "audio"
+                    : "document";
+            const file = await downloadToCache(kind, doc.file_id, doc.file_name || `${doc.file_unique_id || doc.file_id}`, doc.mime_type, doc.file_unique_id);
+            if (file) result.push(file);
+        }
+
+        if (msg.voice) {
+            const file = await downloadToCache("audio", msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.ogg`, msg.voice.mime_type || "audio/ogg", msg.voice.file_unique_id);
+            if (file) {
+                const output = cachePathFor("audio", msg.voice.file_unique_id, msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.wav`);
+                try {
+                    await convertAudioToWav(file.cachePath, output);
+                    file.cachePath = output;
+                    file.fileName = file?.fileName?.replace(".ogg", ".wav");
+                    file.mimeType = "audio/wav";
+                } catch (e) {
+                    logError(e);
+                }
+            }
+
+            if (file) result.push(file);
+        }
+
+        if (msg.audio) {
+            const file = await downloadToCache("audio", msg.audio.file_id, msg.audio.file_name || `${msg.audio.file_unique_id || msg.audio.file_id}.mp3`, msg.audio.mime_type, msg.audio.file_unique_id);
+            if (file) result.push(file);
+        }
+
+        if (msg.video_note) {
+            const file = await downloadToCache("video-note", msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.mp4`, "video/mp4", msg.video_note.file_unique_id);
+            if (file) {
+                const output = cachePathFor("audio", msg.video_note.file_unique_id, msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.wav`);
+                try {
+                    await convertAudioToWav(file.cachePath, output, true);
+                    file.cachePath = output;
+                    file.fileName = file?.fileName?.replace(".mp4", ".wav");
+                    file.mimeType = "audio/wav";
+                } catch (e) {
+                    logError(e);
+                }
+            }
+
+            if (file) result.push(file);
+        }
+    } catch (e) {
+        logError(e);
+    }
+
+    return result;
+}
+
+export function attachmentsToDownloadedFiles(attachments: StoredAttachment[]): AiDownloadedFile[] {
+    return attachments
+        .filter(attachment => fs.existsSync(attachment.cachePath))
+        .map(attachment => ({
+            kind: attachment.kind,
+            fileId: attachment.fileId,
+            fileName: attachment.fileName,
+            mimeType: attachment.mimeType,
+            buffer: fs.readFileSync(attachment.cachePath),
+            path: attachment.cachePath,
+        }));
+}
+
+export function cleanupDownloads(files: AiDownloadedFile[]): void {
+    // Files stay on disk in the message cache; drop in-memory buffers eagerly.
+    for (const file of files) {
+        file.buffer = Buffer.alloc(0);
+    }
+    files.length = 0;
+}
@@ -0,0 +1,541 @@
+import {FileOptions, InlineKeyboardMarkup, Message} from "typescript-telegram-bot-api";
+import {bot} from "../index";
+import {buildCancelledGenerationText, logError, replyToMessage} from "../util/utils";
+import {Environment} from "../common/environment";
+import {MessageStore} from "../common/message-store";
+import {createQueuedFunction} from "../util/async-lock";
+import {enqueueTelegramApiCall} from "../util/telegram-api-queue";
+import fs from "node:fs";
+import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment";
+import {StoredMessage} from "../model/stored-message";
+import {prepareTelegramMarkdownV2} from "../util/markdown-v2-renderer";
+import {AiProvider} from "../model/ai-provider";
+
+const TELEGRAM_LIMIT = 4096;
+const TELEGRAM_CAPTION_LIMIT = 1024;
+const TELEGRAM_FILE_LIMIT_BYTES = 50 * 1024 * 1024;
+const TELEGRAM_PHOTO_LIMIT_BYTES = 10 * 1024 * 1024;
+const EDIT_INTERVAL_MS = 4500;
+
+export type TelegramArtifactFile = {
+    kind: "image" | "file";
+    path: string;
+    fileName: string;
+    mimeType?: string;
+    sizeBytes: number;
+};
+
+export class TelegramStreamMessage {
+    private waitMessage: Message | null = null;
+    private timer: NodeJS.Timeout | null = null;
+    private lastSent = "";
+    private text = "";
+    private status = "";
+    private mediaMode = false;
+    private cancelled = false;
+    private cancelledProvider = "";
+    private readonly startedAt = Date.now();
+    private readonly enqueueEdit = createQueuedFunction();
+
+    constructor(
+        private readonly sourceMessage: Message,
+        private readonly cancelRequestId: string,
+        private readonly stream: boolean,
+        private readonly regenerateCallbackData?: string,
+        private readonly targetMessage?: Message,
+        private readonly cancelProvider?: AiProvider,
+        private readonly isGuest?: boolean,
+    ) {
+    }
+
+    keyboard(): InlineKeyboardMarkup {
+        return {
+            inline_keyboard: [[{
+                text: Environment.cancelText,
+                callback_data: this.cancelProvider
+                    ? `/cancel_ai ${this.cancelRequestId} ${this.cancelProvider}`
+                    : `/cancel_ai ${this.cancelRequestId}`,
+            }]],
+        };
+    }
+
+    emptyKeyboard(): InlineKeyboardMarkup {
+        return {inline_keyboard: []};
+    }
+
+    regenerateKeyboard(): InlineKeyboardMarkup | null {
+        if (!this.regenerateCallbackData) return null;
+
+        return {
+            inline_keyboard: [[{
+                text: Environment.regenerateText,
+                callback_data: this.regenerateCallbackData,
+            }]],
+        };
+    }
+
+    private isMessageNotModified(error: unknown): boolean {
+        const textToLookUp = "message is not modified";
+
+        if (error && error instanceof Error) {
+            return String(error.message).includes(textToLookUp);
+        }
+
+        if (error && error instanceof String) {
+            return error.includes(textToLookUp);
+        }
+
+        return false;
+    }
+
+    private async updateKeyboard(replyMarkup: InlineKeyboardMarkup): Promise<void> {
+        if (!this.waitMessage) return;
+
+        try {
+            await enqueueTelegramApiCall(
+                () => bot.editMessageReplyMarkup({
+                    chat_id: this.waitMessage!.chat.id,
+                    message_id: this.waitMessage!.message_id,
+                    reply_markup: replyMarkup,
+                }),
+                {
+                    method: "editMessageReplyMarkup",
+                    chatId: this.waitMessage.chat.id,
+                    chatType: this.waitMessage.chat.type,
+                }
+            );
+        } catch (e) {
+            if (!this.isMessageNotModified(e)) logError(e);
+        }
+    }
+
+    private async removeKeyboard(): Promise<void> {
+        await this.updateKeyboard(this.emptyKeyboard());
+    }
+
+    private startFlushTimer(): void {
+        if (this.timer) clearInterval(this.timer);
+        this.timer = setInterval(() => this.flush().catch(logError), EDIT_INTERVAL_MS);
+    }
+
+    private visibleText(): string {
+        const parts = [this.text, this.status].filter(v => v && v.trim().length);
+        let value = parts.join("\n\n").trim() || Environment.waitThinkText;
+        if (value.length > TELEGRAM_LIMIT) {
+            value = value.substring(0, TELEGRAM_LIMIT - 1);
+        }
+        return value;
+    }
+
+    private visibleCaption(): string {
+        let value = this.visibleText();
+        if (value.length > TELEGRAM_CAPTION_LIMIT) {
+            value = value.substring(0, TELEGRAM_CAPTION_LIMIT - 1);
+        }
+        return value;
+    }
+
+    async start(initialStatus: string): Promise<Message> {
+        this.status = initialStatus;
+        const rawText = this.visibleText();
+        const formatted = prepareTelegramMarkdownV2(rawText, {mode: "draft"});
+
+        if (this.targetMessage) {
+            this.waitMessage = this.targetMessage;
+
+            try {
+                await MessageStore.put(this.targetMessage).catch(logError);
+                const result = await enqueueTelegramApiCall(
+                    () => bot.editMessageText({
+                        chat_id: this.targetMessage!.chat.id,
+                        message_id: this.targetMessage!.message_id,
+                        text: formatted,
+                        parse_mode: "MarkdownV2",
+                        reply_markup: this.keyboard(),
+                    }),
+                    {
+                        method: "editMessageText",
+                        chatId: this.targetMessage.chat.id,
+                        chatType: this.targetMessage.chat.type,
+                    }
+                );
+                if (result && result !== true) this.waitMessage = result;
+                this.mediaMode = false;
+                this.lastSent = rawText;
+                await this.store();
+                this.startFlushTimer();
+                return this.waitMessage;
+            } catch (e) {
+                if (this.isMessageNotModified(e)) {
+                    this.lastSent = rawText;
+                    await this.updateKeyboard(this.keyboard());
+                    await this.store();
+                    this.startFlushTimer();
+                    return this.waitMessage;
+                }
+
+                logError(e);
+                this.waitMessage = null;
+                this.mediaMode = false;
+            }
+        }
+
+        this.waitMessage = await replyToMessage({
+            message: this.sourceMessage,
+            text: formatted,
+            reply_markup: this.keyboard(),
+            parse_mode: "MarkdownV2"
+        });
+        this.lastSent = rawText;
+        this.startFlushTimer();
+        return this.waitMessage;
+    }
+
+    setStatus(status: string): void {
+        if (this.cancelled) return;
+        this.status = status;
+    }
+
+    getStatus(): string {
+        return this.status;
+    }
+
+    clearStatus(): void {
+        if (this.cancelled) return;
+        this.status = "";
+    }
+
+    append(delta: string): void {
+        if (this.cancelled) return;
+        if (!delta) return;
+        this.text += delta;
+    }
+
+    replaceText(text: string): void {
+        if (this.cancelled) return;
+        this.text = text;
+    }
+
+    getText(): string {
+        return this.text;
+    }
+
+    async flush(replyMarkup: InlineKeyboardMarkup | null = this.keyboard(), end?: boolean): Promise<void> {
+        return this.enqueueEdit(() => this.flushUnsafe(replyMarkup, end));
+    }
+
+    private async flushUnsafe(replyMarkup: InlineKeyboardMarkup | null = this.keyboard(), end?: boolean): Promise<void> {
+        if (!this.waitMessage && this.stream) return;
+
+        const next = this.mediaMode ? this.visibleCaption() : this.visibleText();
+        const shouldRemoveKeyboard = replyMarkup === null;
+        if (next === this.lastSent && shouldRemoveKeyboard) {
+            await this.removeKeyboard();
+            return;
+        }
+
+        const formatted = prepareTelegramMarkdownV2(next, {mode: end ? "final" : "draft"});
+
+        if (next === this.lastSent && replyMarkup !== null) {
+            if (end) await this.updateKeyboard(replyMarkup);
+            return;
+        }
+
+        try {
+            if (!this.stream && end && !this.waitMessage) {
+                if (this.isGuest) {
+                    // await enqueueTelegramApiCall(() => bot.answerGuestQuery({
+                    //         guest_query_id: this.sourceMessage.guest_query_id ?? "",
+                    //         result: {}
+                    //     }),
+                    //     {});
+                } else {
+                    await replyToMessage({
+                        message: this.sourceMessage,
+                        text: formatted,
+                        parse_mode: "MarkdownV2",
+                    });
+                }
+            } else {
+                if (this.waitMessage) {
+                    const result = this.mediaMode
+                        ? await enqueueTelegramApiCall(
+                            () => bot.editMessageCaption({
+                                chat_id: this.waitMessage!.chat.id,
+                                message_id: this.waitMessage!.message_id,
+                                caption: formatted,
+                                parse_mode: "MarkdownV2",
+                                reply_markup: replyMarkup ?? this.emptyKeyboard(),
+                            }),
+                            {
+                                method: "editMessageCaption",
+                                chatId: this.waitMessage.chat.id,
+                                chatType: this.waitMessage.chat.type,
+                            }
+                        )
+                        : await enqueueTelegramApiCall(
+                            () => bot.editMessageText({
+                                chat_id: this.waitMessage!.chat.id,
+                                message_id: this.waitMessage!.message_id,
+                                text: formatted,
+                                parse_mode: "MarkdownV2",
+                                reply_markup: replyMarkup ?? this.emptyKeyboard(),
+                            }),
+                            {
+                                method: "editMessageText",
+                                chatId: this.waitMessage.chat.id,
+                                chatType: this.waitMessage.chat.type,
+                            }
+                        );
+                    if (result && result !== true) this.waitMessage = result;
+                }
+            }
+            if (shouldRemoveKeyboard) await this.removeKeyboard();
+            this.lastSent = next;
+        } catch (e: any) {
+            if (shouldRemoveKeyboard && this.isMessageNotModified(e)) {
+                await this.removeKeyboard();
+                this.lastSent = next;
+                return;
+            }
+            if (!this.isMessageNotModified(e)) logError(e);
+        }
+    }
+
+    async cancel(provider: string): Promise<void> {
+        if (this.timer) clearInterval(this.timer);
+        this.timer = null;
+        this.cancelled = true;
+        this.cancelledProvider = provider;
+        this.status = "";
+        this.text = buildCancelledGenerationText(this.text, this.cancelledProvider, this.mediaMode ? TELEGRAM_CAPTION_LIMIT : TELEGRAM_LIMIT);
+        await this.flush(this.regenerateKeyboard(), true);
+        await this.store();
+    }
+
+    async showImage(image: Buffer): Promise<void> {
+        return this.enqueueEdit(() => this.showImageUnsafe(image));
+    }
+
+    async sendArtifact(file: TelegramArtifactFile): Promise<Message | null> {
+        return this.enqueueEdit(() => this.sendArtifactUnsafe(file));
+    }
+
+    private async showImageUnsafe(image: Buffer): Promise<void> {
+        if (this.cancelled) return;
+        const next = this.visibleCaption();
+
+        if (!this.waitMessage) {
+            if (this.stream) return;
+
+            this.waitMessage = await enqueueTelegramApiCall(
+                () => bot.sendPhoto({
+                    chat_id: this.sourceMessage.chat.id,
+                    photo: image,
+                    caption: prepareTelegramMarkdownV2(next, {mode: "final"}),
+                    parse_mode: "MarkdownV2",
+                    reply_parameters: {message_id: this.sourceMessage.message_id},
+                }),
+                {
+                    method: "sendPhoto",
+                    chatId: this.sourceMessage.chat.id,
+                    chatType: this.sourceMessage.chat.type,
+                }
+            );
+            this.mediaMode = true;
+            this.lastSent = next;
+            return;
+        }
+
+        try {
+            const result = await enqueueTelegramApiCall(
+                () => bot.editMessageMedia({
+                    chat_id: this.waitMessage!.chat.id,
+                    message_id: this.waitMessage!.message_id,
+                    media: {
+                        type: "photo",
+                        media: image,
+                        caption: prepareTelegramMarkdownV2(next, {mode: "final"}),
+                        parse_mode: "MarkdownV2",
+                    },
+                    reply_markup: this.keyboard(),
+                }),
+                {
+                    method: "editMessageMedia",
+                    chatId: this.waitMessage.chat.id,
+                    chatType: this.waitMessage.chat.type,
+                }
+            );
+            if (result && result !== true) this.waitMessage = result;
+            this.mediaMode = true;
+            this.lastSent = next;
+        } catch (e: any) {
+            if (!String(e?.message ?? e).includes("message is not modified")) logError(e);
+        }
+    }
+
+    private async sendArtifactUnsafe(file: TelegramArtifactFile): Promise<Message | null> {
+        if (this.cancelled) return null;
+
+        if (file.sizeBytes > TELEGRAM_FILE_LIMIT_BYTES) {
+            throw new Error(Environment.getTelegramFileTooLargeText(
+                file.fileName,
+                TELEGRAM_FILE_LIMIT_BYTES / 1024 / 1024,
+            ));
+        }
+
+        const caption = file.fileName.slice(0, TELEGRAM_CAPTION_LIMIT);
+        const isPhoto = this.isPhotoArtifact(file);
+
+        await enqueueTelegramApiCall(
+            () => bot.sendChatAction({
+                chat_id: this.sourceMessage.chat.id,
+                action: isPhoto ? "upload_photo" : "upload_document",
+            }),
+            {
+                method: "sendChatAction",
+                chatId: this.sourceMessage.chat.id,
+                chatType: this.sourceMessage.chat.type,
+            }
+        ).catch(logError);
+
+        let sent: Message;
+        if (isPhoto) {
+            try {
+                sent = await enqueueTelegramApiCall(
+                    async () => {
+                        const upload = this.createArtifactUpload(file);
+                        try {
+                            return await bot.sendPhoto({
+                                chat_id: this.sourceMessage.chat.id,
+                                photo: upload,
+                                caption,
+                                reply_parameters: {message_id: this.sourceMessage.message_id},
+                            });
+                        } finally {
+                            this.destroyUpload(upload);
+                        }
+                    },
+                    {
+                        method: "sendPhoto",
+                        chatId: this.sourceMessage.chat.id,
+                        chatType: this.sourceMessage.chat.type,
+                    }
+                );
+            } catch (e) {
+                logError(e);
+                sent = await this.sendArtifactAsDocument(file, caption);
+            }
+        } else {
+            sent = await this.sendArtifactAsDocument(file, caption);
+        }
+
+        await this.storeArtifactMessage(sent, file);
+        return sent;
+    }
+
+    private isPhotoArtifact(file: TelegramArtifactFile): boolean {
+        return file.kind === "image"
+            && file.sizeBytes <= TELEGRAM_PHOTO_LIMIT_BYTES
+            && ["image/jpeg", "image/png", "image/webp"].includes((file.mimeType || "").toLowerCase());
+    }
+
+    private createArtifactUpload(file: TelegramArtifactFile): FileOptions {
+        return new FileOptions(fs.createReadStream(file.path), {
+            filename: file.fileName,
+            contentType: file.mimeType || "application/octet-stream",
+        });
+    }
+
+    private destroyUpload(upload: FileOptions): void {
+        if ("destroy" in upload.file && typeof upload.file.destroy === "function") {
+            upload.file.destroy();
+        }
+    }
+
+    private async sendArtifactAsDocument(file: TelegramArtifactFile, caption: string): Promise<Message> {
+        return enqueueTelegramApiCall(
+            async () => {
+                const upload = this.createArtifactUpload(file);
+                try {
+                    return await bot.sendDocument({
+                        chat_id: this.sourceMessage.chat.id,
+                        document: upload,
+                        caption,
+                        reply_parameters: {message_id: this.sourceMessage.message_id},
+                    });
+                } finally {
+                    this.destroyUpload(upload);
+                }
+            },
+            {
+                method: "sendDocument",
+                chatId: this.sourceMessage.chat.id,
+                chatType: this.sourceMessage.chat.type,
+            }
+        );
+    }
+
+    private async storeArtifactMessage(sent: Message, file: TelegramArtifactFile): Promise<void> {
+        const photo = sent.photo?.[sent.photo.length - 1];
+        const attachmentKind: StoredAttachmentKind = file.kind === "image" ? "image" : "document";
+        const attachment: StoredAttachment = {
+            kind: attachmentKind,
+            fileId: sent.document?.file_id ?? photo?.file_id ?? file.path,
+            fileUniqueId: sent.document?.file_unique_id ?? photo?.file_unique_id,
+            fileName: file.fileName,
+            mimeType: file.mimeType,
+            cachePath: file.path,
+        };
+
+        const stored: StoredMessage = {
+            chatId: sent.chat.id,
+            id: sent.message_id,
+            replyToMessageId: sent.reply_to_message?.message_id ?? this.sourceMessage.message_id,
+            fromId: sent.from?.id ?? 0,
+            text: sent.caption ?? file.fileName,
+            date: sent.date ?? Math.floor(Date.now() / 1000),
+            attachments: [attachment],
+        };
+
+        await MessageStore.put(stored);
+    }
+
+    async finish(removeKeyboard = true): Promise<void> {
+        if (this.timer) clearInterval(this.timer);
+        this.timer = null;
+
+        if (this.cancelled) {
+            await this.flush(removeKeyboard ? this.regenerateKeyboard() : this.keyboard(), true);
+            await this.store();
+            return;
+        }
+
+        if (Environment.SEND_TIME_TOOK) {
+            const diff = Date.now() - this.startedAt;
+            if (this.text.length + 32 < TELEGRAM_LIMIT) this.text += `\n\n⏱️ ${diff}ms`;
+        }
+
+        this.clearStatus();
+        await this.flush(removeKeyboard ? this.regenerateKeyboard() : this.keyboard(), true);
+
+        await this.store();
+    }
+
+    async fail(error: unknown): Promise<void> {
+        if (this.timer) clearInterval(this.timer);
+        this.timer = null;
+        this.status = "";
+        this.text = `${Environment.errorText}\n${error instanceof Error ? error.message : String(error)}`;
+        await this.flush(this.regenerateKeyboard(), true);
+    }
+
+    private async store(): Promise<void> {
+        if (!this.waitMessage) return;
+        try {
+            await MessageStore.put({...this.waitMessage, text: this.visibleText()} as Message);
+        } catch (e) {
+            logError(e);
+        }
+    }
+}
@@ -1,5 +1,14 @@
+import {AiProvider} from "./ai-provider";
+
+export type AiEndpointInfo = {
+    provider?: AiProvider;
+    baseUrl?: string;
+    external?: boolean;
+};
+
 export type AiCapabilityInfo = {
    supported?: boolean,
    external?: boolean,
-    model?: string
+    model?: string,
+    endpoint?: AiEndpointInfo,
 };
@@ -1,9 +1,14 @@
 import {AiCapabilityInfo} from "./ai-capability-info";

 export class AiModelCapabilities {
-    vision?: AiCapabilityInfo;
-    ocr?: AiCapabilityInfo;
-    thinking?: AiCapabilityInfo;
-    tools?: AiCapabilityInfo;
-    audio?: AiCapabilityInfo;
+    vision: AiCapabilityInfo | undefined;
+    ocr: AiCapabilityInfo | undefined;
+    thinking: AiCapabilityInfo | undefined;
+    extendedThinking: AiCapabilityInfo | undefined;
+    tools: AiCapabilityInfo | undefined;
+    audio: AiCapabilityInfo | undefined;
+    documents: AiCapabilityInfo | undefined;
+    outputImages: AiCapabilityInfo | undefined;
+    speechToText: AiCapabilityInfo | undefined;
+    textToSpeech: AiCapabilityInfo | undefined;
 }