From 32c35f54aa7e1488080ca709e0243ef69634250c Mon Sep 17 00:00:00 2001 From: Danil Nikolaev Date: Sun, 10 May 2026 22:52:35 +0300 Subject: [PATCH] ai: add unified runtime and provider adapters --- src/ai/ai-runtime-target.ts | 213 +++ src/ai/cancel-registry.ts | 55 + src/ai/chat-messages-types.ts | 90 ++ src/ai/gemini-chat-message.ts | 84 ++ src/ai/mistral-chat-message.ts | 112 ++ src/ai/ollama-chat-message.ts | 3 + src/ai/openai-chat-message.ts | 3 + src/ai/provider-model-runtime.ts | 325 +++++ src/ai/provider-request-queue.ts | 184 +++ src/ai/regenerate-callback.ts | 24 + src/ai/telegram-attachments.ts | 227 +++ src/ai/telegram-stream-message.ts | 541 +++++++ src/ai/unified-ai-runner.ts | 2155 ++++++++++++++++++++++++++++ src/model/ai-capability-info.ts | 13 +- src/model/ai-model-capabilities.ts | 17 +- 15 files changed, 4038 insertions(+), 8 deletions(-) create mode 100644 src/ai/ai-runtime-target.ts create mode 100644 src/ai/cancel-registry.ts create mode 100644 src/ai/chat-messages-types.ts create mode 100644 src/ai/gemini-chat-message.ts create mode 100644 src/ai/mistral-chat-message.ts create mode 100644 src/ai/ollama-chat-message.ts create mode 100644 src/ai/openai-chat-message.ts create mode 100644 src/ai/provider-model-runtime.ts create mode 100644 src/ai/provider-request-queue.ts create mode 100644 src/ai/regenerate-callback.ts create mode 100644 src/ai/telegram-attachments.ts create mode 100644 src/ai/telegram-stream-message.ts create mode 100644 src/ai/unified-ai-runner.ts diff --git a/src/ai/ai-runtime-target.ts b/src/ai/ai-runtime-target.ts new file mode 100644 index 0000000..2926462 --- /dev/null +++ b/src/ai/ai-runtime-target.ts @@ -0,0 +1,213 @@ +import {Mistral} from "@mistralai/mistralai"; +import {GoogleGenAI} from "@google/genai"; +import {Ollama} from "ollama"; +import {OpenAI} from "openai"; +import {Environment} from "../common/environment"; +import {AiModelCapabilities} from "../model/ai-model-capabilities"; +import {AiProvider} from "../model/ai-provider"; + +export type AiCapabilityName = keyof AiModelCapabilities; +export type AiRuntimePurpose = AiCapabilityName | "chat"; + +export type AiRuntimeTarget = { + provider: AiProvider; + purpose: AiRuntimePurpose; + model: string; + baseUrl?: string; + apiKey?: string; +}; + +export type GeminiApiMode = "google" | "openai"; + +const GEMINI_OPENAI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"; + +const PURPOSE_SUFFIXES: Record = { + chat: ["CHAT"], + vision: ["VISION", "IMAGE"], + ocr: ["OCR", "VISION", "IMAGE"], + thinking: ["THINKING", "THINK"], + extendedThinking: ["EXTENDED_THINKING", "THINKING", "THINK"], + tools: ["TOOLS", "CHAT"], + audio: ["AUDIO"], + documents: ["DOCUMENTS", "RAG", "EMBEDDING"], + outputImages: ["OUTPUT_IMAGES", "IMAGE"], + speechToText: ["SPEECH_TO_TEXT", "TRANSCRIPTION", "STT", "AUDIO"], + textToSpeech: ["TEXT_TO_SPEECH", "TTS"], +}; + +function providerPrefix(provider: AiProvider): string { + return provider.toString(); +} + +function env(name: string): string | undefined { + return Environment.getOptionalConfigValue(name); +} + +function firstEnv(names: string[]): string | undefined { + for (const name of names) { + const value = env(name); + if (value) return value; + } + + return undefined; +} + +function endpointEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] { + const prefix = providerPrefix(provider); + return PURPOSE_SUFFIXES[purpose].flatMap(suffix => [ + `${prefix}_${suffix}_BASE_URL`, + `${prefix}_${suffix}_ENDPOINT`, + `${prefix}_${suffix}_ADDRESS`, + ]); +} + +function apiKeyEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] { + const prefix = providerPrefix(provider); + return PURPOSE_SUFFIXES[purpose].map(suffix => `${prefix}_${suffix}_API_KEY`); +} + +function modelEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] { + const prefix = providerPrefix(provider); + return PURPOSE_SUFFIXES[purpose].map(suffix => `${prefix}_${suffix}_MODEL`); +} + +export function getProviderBaseUrl(provider: AiProvider): string | undefined { + switch (provider) { + case AiProvider.OLLAMA: + return env("OLLAMA_ENDPOINT"); + case AiProvider.GEMINI: + return env("GEMINI_BASE_URL") ?? env("GEMINI_ENDPOINT") + ?? (Environment.GEMINI_API_MODE === "openai" ? GEMINI_OPENAI_BASE_URL : undefined); + case AiProvider.MISTRAL: + return env("MISTRAL_BASE_URL") ?? env("MISTRAL_ENDPOINT"); + case AiProvider.OPENAI: + return env("OPENAI_BASE_URL") ?? env("OPENAI_ENDPOINT"); + } +} + +export function getProviderApiKey(provider: AiProvider): string | undefined { + switch (provider) { + case AiProvider.OLLAMA: + return Environment.OLLAMA_API_KEY; + case AiProvider.GEMINI: + return Environment.GEMINI_API_KEY; + case AiProvider.MISTRAL: + return Environment.MISTRAL_API_KEY; + case AiProvider.OPENAI: + return Environment.OPENAI_API_KEY; + } +} + +export function getDefaultModelForPurpose(provider: AiProvider, purpose: AiRuntimePurpose): string { + switch (provider) { + case AiProvider.OLLAMA: + switch (purpose) { + case "vision": + case "ocr": + case "outputImages": + return Environment.OLLAMA_IMAGE_MODEL; + case "thinking": + case "extendedThinking": + return Environment.OLLAMA_THINK_MODEL; + case "audio": + case "speechToText": + return Environment.OLLAMA_AUDIO_MODEL; + case "documents": + return Environment.OLLAMA_EMBEDDING_MODEL; + default: + return Environment.OLLAMA_CHAT_MODEL; + } + case AiProvider.GEMINI: + switch (purpose) { + case "outputImages": + return Environment.GEMINI_IMAGE_MODEL; + case "speechToText": + return Environment.GEMINI_TRANSCRIPTION_MODEL; + case "textToSpeech": + return Environment.GEMINI_TTS_MODEL; + default: + return Environment.GEMINI_MODEL; + } + case AiProvider.MISTRAL: + switch (purpose) { + case "speechToText": + return Environment.MISTRAL_TRANSCRIPTION_MODEL; + case "textToSpeech": + return Environment.MISTRAL_TTS_MODEL || Environment.MISTRAL_MODEL; + default: + return Environment.MISTRAL_MODEL; + } + case AiProvider.OPENAI: + switch (purpose) { + case "outputImages": + return Environment.OPENAI_IMAGE_MODEL; + case "speechToText": + return Environment.OPENAI_TRANSCRIPTION_MODEL; + case "textToSpeech": + return Environment.OPENAI_TTS_MODEL; + default: + return Environment.OPENAI_MODEL; + } + } +} + +export function resolveAiRuntimeTarget( + provider: AiProvider, + purpose: AiRuntimePurpose, + modelOverride?: string, +): AiRuntimeTarget { + const model = modelOverride + ?? firstEnv(modelEnvNames(provider, purpose)) + ?? getDefaultModelForPurpose(provider, purpose); + const baseUrl = firstEnv(endpointEnvNames(provider, purpose)) ?? getProviderBaseUrl(provider); + const apiKey = firstEnv(apiKeyEnvNames(provider, purpose)) ?? getProviderApiKey(provider); + + return {provider, purpose, model, baseUrl, apiKey}; +} + +export function sameRuntimeEndpoint(left: AiRuntimeTarget, right: AiRuntimeTarget): boolean { + return left.provider === right.provider + && (left.baseUrl ?? "") === (right.baseUrl ?? "") + && (left.apiKey ?? "") === (right.apiKey ?? ""); +} + +export function createOpenAiClient(target: AiRuntimeTarget): OpenAI { + return new OpenAI({ + apiKey: target.apiKey, + baseURL: target.baseUrl, + }); +} + +export function getGeminiApiMode(target?: AiRuntimeTarget): GeminiApiMode { + if (Environment.GEMINI_API_MODE === "openai") return "openai"; + if (Environment.GEMINI_API_MODE === "google") return "google"; + if ((target?.baseUrl ?? "").includes("/openai")) return "openai"; + return "google"; +} + +export function createGeminiOpenAiClient(target: AiRuntimeTarget): OpenAI { + return createOpenAiClient({ + ...target, + baseUrl: target.baseUrl ?? GEMINI_OPENAI_BASE_URL, + }); +} + +export function createGoogleGenAiClient(target: AiRuntimeTarget): GoogleGenAI { + return new GoogleGenAI({ + apiKey: target.apiKey, + }); +} + +export function createMistralClient(target: AiRuntimeTarget): Mistral { + return new Mistral({ + apiKey: target.apiKey, + serverURL: target.baseUrl, + }); +} + +export function createOllamaClient(target: AiRuntimeTarget): Ollama { + return new Ollama({ + host: target.baseUrl?.endsWith(":11434") ? target.baseUrl : target.baseUrl + ":11434", + headers: target.apiKey ? {"Authorization": `Bearer ${target.apiKey}`} : undefined, + }); +} diff --git a/src/ai/cancel-registry.ts b/src/ai/cancel-registry.ts new file mode 100644 index 0000000..f48cf9b --- /dev/null +++ b/src/ai/cancel-registry.ts @@ -0,0 +1,55 @@ +import {randomUUID} from "node:crypto"; + +export type AiCancelRequest = { + id: string; + chatId: number; + messageId?: number; + fromId: number; + provider: string; + controller: AbortController; + onCancel?: () => Promise | void; +}; + +const requests = new Map(); + +export function createAiCancelRequest(params: Omit & { controller?: AbortController }): AiCancelRequest { + const request: AiCancelRequest = { + id: randomUUID(), + controller: params.controller ?? new AbortController(), + chatId: params.chatId, + messageId: params.messageId, + fromId: params.fromId, + provider: params.provider, + onCancel: params.onCancel, + }; + requests.set(request.id, request); + return request; +} + +export function setAiCancelMessageId(id: string, messageId: number): void { + const request = requests.get(id); + if (request) request.messageId = messageId; +} + +export function getAiCancelRequest(id: string): AiCancelRequest | undefined { + return requests.get(id); +} + +export async function abortAiRequest(id: string): Promise { + const request = requests.get(id); + if (!request) return false; + + request.controller.abort(); + + try { + await request.onCancel?.(); + } finally { + requests.delete(id); + } + + return true; +} + +export function finishAiRequest(id: string): void { + requests.delete(id); +} diff --git a/src/ai/chat-messages-types.ts b/src/ai/chat-messages-types.ts new file mode 100644 index 0000000..ba97239 --- /dev/null +++ b/src/ai/chat-messages-types.ts @@ -0,0 +1,90 @@ +import {AiToolCall} from "./tool-types"; +import {OllamaChatMessage} from "./ollama-chat-message"; +import {GeminiMessage} from "./gemini-chat-message"; +import {MistralChatMessage} from "./mistral-chat-message"; +import {MessageAudioPart, MessageImagePart} from "../common/message-part"; +import {OpenAIChatMessage} from "./openai-chat-message"; + +export type ChatMessage = { + role: "system" | "user" | "assistant" | "tool"; + content: string; + images?: string[]; + imageParts?: MessageImagePart[]; + documents?: string[]; + audios?: string[]; + audioParts?: MessageAudioPart[]; + videos?: string[]; + videoNotes?: string[]; + thinking?: string; + tool_calls?: AiToolCall[]; + tool_name?: string; +} + +export function asOllamaChatMessage(message: ChatMessage): OllamaChatMessage { + return { + role: message.role, + content: message.content, + thinking: message.thinking, + images: message.images, + tool_calls: message.tool_calls, + tool_name: message.tool_name + }; +} + +// export function asGeminiChatMessage(message: ChatMessage): GeminiMessage { +// if (message.images) { +// return { +// role: message.role, +// content: message.images.map(() => { +// return { +// type: "image", +// }; +// }) +// }; +// } +// +// return { +// role: message.role, +// content: { +// type: "text", +// text: message.content, +// }, +// }; +// } + +export function asMistralChatMessage(message: ChatMessage): MistralChatMessage { + return { + role: message.role, + content: message.content, + }; +} + +// export function asOpenAIChatMessage(message: ChatMessage): OpenAIChatMessage { +// return { +// +// } +// } + +/* + const messages: any[] = ordered.map(part => { + const content: any[] = [{ + type: "input_text", + text: (Environment.USE_NAMES_IN_PROMPT && !part.bot ? `MESSAGE FROM USER \"${part.name}\":\n` : "") + part.content, + }]; + + if (!part.bot) { + for (const image of part.images ?? []) { + content.push({type: "input_image", image_url: `data:image/jpeg;base64,${image}`, detail: "auto"}); + } + } + + return {role: part.bot ? "assistant" : "user", content}; + }); + + if (Environment.SYSTEM_PROMPT && Environment.USE_SYSTEM_PROMPT) { + messages.unshift({role: "system", content: Environment.SYSTEM_PROMPT}); + } + return {parts: messages, imageCount}; + */ + +export type AiChatMessage = | OpenAIChatMessage | OllamaChatMessage | MistralChatMessage | GeminiMessage; diff --git a/src/ai/gemini-chat-message.ts b/src/ai/gemini-chat-message.ts new file mode 100644 index 0000000..776f750 --- /dev/null +++ b/src/ai/gemini-chat-message.ts @@ -0,0 +1,84 @@ +export type GeminiUserInputStep = { + type: "user_input"; + content?: Array; +} + +export type GeminiModelOutputStep = { + type: "model_output"; + content?: Array; +} + +export type GeminiFunctionCallStep = { + id: string; + arguments: { + [key: string]: unknown; + }; + name: string; + type: "function_call"; + signature?: string; +} + +export type GeminiFunctionResultStep = { + call_id: string; + result: unknown | Array | string; + type: "function_result"; + is_error?: boolean; + name?: string; + signature?: string; +} + +export type GeminiStep = + | GeminiUserInputStep + | GeminiModelOutputStep + | GeminiFunctionCallStep + | GeminiFunctionResultStep; + +export type GeminiTextContent = { + text: string; +} + +export type GeminiInlineContent = { + inlineData: { + data: string; + mimeType: string; + }; +} + +export type GeminiImageContent = GeminiInlineContent; +export type GeminiAudioContent = GeminiInlineContent; +export type GeminiDocumentContent = GeminiInlineContent; +export type GeminiVideoContent = GeminiInlineContent; + +export type GeminiFunctionCallContent = { + functionCall: { + id?: string; + name?: string; + args?: Record; + }; +} + +export type GeminiFunctionResponseContent = { + functionResponse: { + id?: string; + name?: string; + response: Record; + }; +} + +export type GeminiContent = + | GeminiTextContent + | GeminiInlineContent + | GeminiFunctionCallContent + | GeminiFunctionResponseContent; + +export type GeminiTurn = { + content?: Array | GeminiContent; + role?: string; +} + +export type GeminiInput = string | Array | Array | GeminiContent; + +export type GeminiMessage = { + role: "user" | "model"; + parts: GeminiContent[]; +}; diff --git a/src/ai/mistral-chat-message.ts b/src/ai/mistral-chat-message.ts new file mode 100644 index 0000000..c767a84 --- /dev/null +++ b/src/ai/mistral-chat-message.ts @@ -0,0 +1,112 @@ +export const MistralImageDetail = { + Low: "low", + Auto: "auto", + High: "high", +} as const; +export type MistralImageDetail = OpenEnum; + +declare const __brand: unique symbol; +export type Unrecognized = T & { [__brand]: "unrecognized" }; + +export type OpenEnum>> = + | T[keyof T] + | Unrecognized; + +export const BuiltInConnectors = { + WebSearch: "web_search", + WebSearchPremium: "web_search_premium", + CodeInterpreter: "code_interpreter", + ImageGeneration: "image_generation", + DocumentLibrary: "document_library", +} as const; +export type BuiltInConnectors = OpenEnum; + +export type MistralTextChunk = { + type: "text"; + text: string; +}; + +export type MistralToolReferenceChunk = { + type: "tool_reference" | undefined; + tool: BuiltInConnectors | string; + title: string; + url?: string | null | undefined; + favicon?: string | null | undefined; + description?: string | null | undefined; +}; + +export type MistralThinkChunk = { + type: "thinking"; + thinking: Array; + signature?: string | null | undefined; + closed?: boolean | undefined; +}; + +export type MistralImageURLChunk = { + type: "image_url"; + imageUrl: string | { + url: string; + detail?: MistralImageDetail | null | undefined; + }; +} + +export type MistralContentChunk = + | MistralTextChunk + | MistralThinkChunk + | MistralImageURLChunk + +/* + | (ImageURLChunk & { type: "image_url" }) + | (DocumentURLChunk & { type: "document_url" }) + | (TextChunk & { type: "text" }) + | (ReferenceChunk & { type: "reference" }) + | (FileChunk & { type: "file" }) + | (ThinkChunk & { type: "thinking" }) + | AudioChunk + */ + +export type MistralFunctionCall = { + name: string; + arguments: { [k: string]: any } | string; +}; + +export type MistralToolCall = { + id?: string | undefined; + type?: string | undefined; + function: MistralFunctionCall; + index?: number | undefined; +}; + +export type MistralAssistantMessage = { + role: "assistant"; + content?: string | Array | null | undefined; + toolCalls?: Array | null | undefined; + prefix?: boolean | undefined; +} + +export type MistralSystemMessageContentChunks = + | MistralTextChunk + | MistralThinkChunk; + +export type MistralSystemMessage = { + role: "system"; + content: string; +} + +export type MistralToolMessage = { + role: "tool"; + content: string | Array | null; + toolCallId?: string | null | undefined; + name?: string | null | undefined; +}; + +export type MistralUserMessage = { + role: "user"; + content: string | Array | null; +}; + +export type MistralChatMessage = + | MistralAssistantMessage + | MistralSystemMessage + | MistralToolMessage + | MistralUserMessage \ No newline at end of file diff --git a/src/ai/ollama-chat-message.ts b/src/ai/ollama-chat-message.ts new file mode 100644 index 0000000..5e6c3c8 --- /dev/null +++ b/src/ai/ollama-chat-message.ts @@ -0,0 +1,3 @@ +import {Message} from "ollama"; + +export type OllamaChatMessage = Message; \ No newline at end of file diff --git a/src/ai/openai-chat-message.ts b/src/ai/openai-chat-message.ts new file mode 100644 index 0000000..e96cc59 --- /dev/null +++ b/src/ai/openai-chat-message.ts @@ -0,0 +1,3 @@ +import {ResponseInputItem} from "openai/resources/responses/responses"; + +export type OpenAIChatMessage = ResponseInputItem \ No newline at end of file diff --git a/src/ai/provider-model-runtime.ts b/src/ai/provider-model-runtime.ts new file mode 100644 index 0000000..6da2c13 --- /dev/null +++ b/src/ai/provider-model-runtime.ts @@ -0,0 +1,325 @@ +import {AiProvider} from "../model/ai-provider"; +import {AiModelCapabilities} from "../model/ai-model-capabilities"; +import {Environment} from "../common/environment"; +import {logError} from "../util/utils"; +import {AiCapabilityInfo} from "../model/ai-capability-info"; +import {isOllamaSpeechToTextModel} from "./speech-to-text-models"; +import { + AiCapabilityName, + AiRuntimeTarget, + createGeminiOpenAiClient, + createGoogleGenAiClient, + createMistralClient, + createOllamaClient, + createOpenAiClient, + getGeminiApiMode, + resolveAiRuntimeTarget, + sameRuntimeEndpoint, +} from "./ai-runtime-target"; + +export type RuntimeModelInfo = { + provider: AiProvider; + model: string; + capabilities: AiModelCapabilities; +}; + +const CAPABILITY_NAMES: AiCapabilityName[] = [ + "vision", + "ocr", + "thinking", + "extendedThinking", + "tools", + "audio", + "documents", + "outputImages", + "speechToText", + "textToSpeech", +]; + +export function getRuntimeModel(provider: AiProvider): string { + switch (provider) { + case AiProvider.OLLAMA: + return Environment.OLLAMA_CHAT_MODEL; + case AiProvider.GEMINI: + return Environment.GEMINI_MODEL; + case AiProvider.MISTRAL: + return Environment.MISTRAL_MODEL; + case AiProvider.OPENAI: + return Environment.OPENAI_MODEL; + } +} + +export function setRuntimeModel(provider: AiProvider, model: string): void { + switch (provider) { + case AiProvider.OLLAMA: + Environment.OLLAMA_CHAT_MODEL = model; + break; + case AiProvider.GEMINI: + Environment.GEMINI_MODEL = model; + break; + case AiProvider.MISTRAL: + Environment.MISTRAL_MODEL = model; + break; + case AiProvider.OPENAI: + Environment.OPENAI_MODEL = model; + break; + } +} + +function capability(supported: boolean, target?: AiRuntimeTarget, runtimeTarget?: AiRuntimeTarget): AiCapabilityInfo { + const result: AiCapabilityInfo = {supported}; + if (target?.model) result.model = target.model; + if (target) { + result.endpoint = { + provider: target.provider, + baseUrl: target.baseUrl, + external: runtimeTarget ? !sameRuntimeEndpoint(target, runtimeTarget) : false, + }; + } + if (target && runtimeTarget && (target.model !== runtimeTarget.model || !sameRuntimeEndpoint(target, runtimeTarget))) { + result.external = true; + } + return result; +} + +function buildCapabilities(overrides: Partial>): AiModelCapabilities { + return Object.assign(new AiModelCapabilities(), { + vision: {supported: false}, + ocr: {supported: false}, + thinking: {supported: false}, + extendedThinking: {supported: false}, + tools: {supported: false}, + audio: {supported: false}, + documents: {supported: false}, + outputImages: {supported: false}, + speechToText: {supported: false}, + textToSpeech: {supported: false}, + ...overrides, + }); +} + +function lowerModelName(model: string): string { + return model.toLowerCase(); +} + +function isOpenAiTextModel(model: string): boolean { + const name = lowerModelName(model); + if (!name) return false; + if (/^(gpt-image|dall-e|tts-|whisper|text-embedding|text-moderation|omni-moderation)/.test(name)) return false; + if (name.includes("transcribe")) return false; + return /^(gpt-|o\d|chatgpt-|codex-|computer-use)/.test(name); +} + +function isOpenAiReasoningModel(model: string): boolean { + const name = lowerModelName(model); + return /^o\d/.test(name) || name.startsWith("gpt-5"); +} + +function isOpenAiVisionModel(model: string): boolean { + const name = lowerModelName(model); + if (!isOpenAiTextModel(model)) return false; + if (name.startsWith("gpt-3.5")) return false; + if (name.includes("audio-preview") || name.includes("search-preview")) return false; + return true; +} + +function isGeminiNonChatModel(model: string): boolean { + const name = lowerModelName(model); + return name.includes("lyria") || name.includes("-tts") || name.includes("image-preview") || name.endsWith("-image"); +} + +function geminiSupportsAudioInput(model: string): boolean { + const name = lowerModelName(model); + return name.startsWith("gemini-") && !isGeminiNonChatModel(model); +} + +export async function getModelCapabilities( + provider: AiProvider, + model: string, + purpose: AiCapabilityName | "chat" = "chat", +): Promise { + if (!model) return undefined; + + try { + const runtimeTarget = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider)); + const target = resolveAiRuntimeTarget(provider, purpose, model); + + switch (provider) { + case AiProvider.OLLAMA: { + const ollama = createOllamaClient(target); + const info = await ollama.show({model}); + const modelCapabilities = Array.isArray(info.capabilities) ? info.capabilities : []; + const has = (cap: string): boolean => modelCapabilities.includes(cap); + const audioSupported = isOllamaSpeechToTextModel(model); + const documentsTarget = resolveAiRuntimeTarget(provider, "documents"); + + return buildCapabilities({ + vision: capability(has("vision"), target, runtimeTarget), + ocr: capability(has("ocr"), target, runtimeTarget), + thinking: capability(has("thinking"), target, runtimeTarget), + extendedThinking: capability(has("thinking") && model.includes("gpt-oss"), target, runtimeTarget), + tools: capability(has("tools"), target, runtimeTarget), + audio: capability(audioSupported, target, runtimeTarget), + documents: capability(!!documentsTarget.model, documentsTarget, runtimeTarget), + speechToText: capability(audioSupported, target, runtimeTarget), + }); + } + case AiProvider.GEMINI: { + const chatLike = lowerModelName(model).startsWith("gemini-") && !isGeminiNonChatModel(model); + const reasoningModel = lowerModelName(model).includes("2.5") || lowerModelName(model).includes("thinking"); + const imageTarget = resolveAiRuntimeTarget(provider, "outputImages"); + const speechTarget = resolveAiRuntimeTarget(provider, "speechToText"); + const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech"); + + return buildCapabilities({ + vision: capability(chatLike, target, runtimeTarget), + ocr: capability(chatLike, target, runtimeTarget), + thinking: capability(reasoningModel, target, runtimeTarget), + extendedThinking: capability(reasoningModel, target, runtimeTarget), + tools: capability(chatLike, target, runtimeTarget), + audio: capability(geminiSupportsAudioInput(model), target, runtimeTarget), + speechToText: capability(!!speechTarget.apiKey && geminiSupportsAudioInput(speechTarget.model), speechTarget, runtimeTarget), + outputImages: capability(!!imageTarget.apiKey && !!imageTarget.model, imageTarget, runtimeTarget), + textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget), + }); + } + case AiProvider.MISTRAL: { + const mistral = createMistralClient(target); + const info = await mistral.models.retrieve({modelId: model}); + const caps = info.type !== "UNKNOWN" ? info.capabilities : undefined; + const speechTarget = resolveAiRuntimeTarget(provider, "speechToText"); + const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech"); + + return buildCapabilities({ + vision: capability(!!caps?.vision, target, runtimeTarget), + ocr: capability(!!caps?.ocr, target, runtimeTarget), + thinking: capability(!!caps?.reasoning, target, runtimeTarget), + tools: capability(!!caps?.functionCalling, target, runtimeTarget), + audio: capability(!!caps?.audio, target, runtimeTarget), + documents: capability(true, target, runtimeTarget), + speechToText: capability(!!speechTarget.model || !!caps?.audioTranscription, speechTarget, runtimeTarget), + textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget), + }); + } + case AiProvider.OPENAI: { + const textModel = isOpenAiTextModel(model); + const reasoningModel = isOpenAiReasoningModel(model); + const imageTarget = resolveAiRuntimeTarget(provider, "outputImages"); + const speechTarget = resolveAiRuntimeTarget(provider, "speechToText"); + const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech"); + + return buildCapabilities({ + vision: capability(isOpenAiVisionModel(model), target, runtimeTarget), + ocr: capability(isOpenAiVisionModel(model), target, runtimeTarget), + thinking: capability(reasoningModel, target, runtimeTarget), + extendedThinking: capability(reasoningModel, target, runtimeTarget), + tools: capability(textModel, target, runtimeTarget), + outputImages: capability(!!imageTarget.model, imageTarget, runtimeTarget), + speechToText: capability(!!speechTarget.model, speechTarget, runtimeTarget), + textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget), + }); + } + } + + } catch (e) { + logError(e); + return undefined; + } +} + + +export async function getRuntimeCapabilities( + provider: AiProvider = Environment.DEFAULT_AI_PROVIDER, + model: string | undefined = getRuntimeModel(provider) +): Promise { + const runtimeTarget = resolveAiRuntimeTarget(provider, "chat", model ?? getRuntimeModel(provider)); + const result = await getModelCapabilities(provider, runtimeTarget.model, "chat") ?? buildCapabilities({}); + + for (const capabilityName of CAPABILITY_NAMES) { + const target = resolveAiRuntimeTarget(provider, capabilityName); + if (target.model === runtimeTarget.model && sameRuntimeEndpoint(target, runtimeTarget)) continue; + + const targetCapabilities = await getModelCapabilities(provider, target.model, capabilityName); + const capabilityInfo = targetCapabilities?.[capabilityName]; + if (capabilityInfo) { + result[capabilityName] = capabilityInfo; + } + } + + return result; +} + +export async function formatRuntimeModelInfo( + provider: AiProvider = Environment.DEFAULT_AI_PROVIDER, + model: string | undefined = getRuntimeModel(provider), + caps?: AiModelCapabilities +): Promise { + if (!caps) caps = await getRuntimeCapabilities(provider, model); + const line = (title: string, value?: AiCapabilityInfo) => { + const state = value?.supported ? "✅" : "❌"; + const external = value?.external ?? (!!value?.model && value.model !== model); + return Environment.getRuntimeCapabilityLineText({ + state, + title, + model: value?.model, + endpointBaseUrl: value?.endpoint?.baseUrl, + external, + }); + }; + + return Environment.getRuntimeModelInfoText( + provider.toString().toLowerCase(), + model, + [ + line(Environment.runtimeCapabilityVisionText, caps.vision), + line(Environment.runtimeCapabilityOcrText, caps.ocr), + line(Environment.runtimeCapabilityThinkingText, caps.thinking), + line(Environment.runtimeCapabilityExtendedThinkingText, caps.extendedThinking), + line(Environment.runtimeCapabilityToolsText, caps.tools), + line(Environment.runtimeCapabilityAudioText, caps.audio), + line(Environment.runtimeCapabilitySpeechToTextText, caps.speechToText), + line(Environment.runtimeCapabilityTextToSpeechText, caps.textToSpeech), + line(Environment.runtimeCapabilityDocumentsText, caps.documents), + line(Environment.runtimeCapabilityOutputImagesText, caps.outputImages), + ], + ); +} + +export async function listProviderModels(provider: AiProvider): Promise { + const target = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider)); + + switch (provider) { + case AiProvider.OLLAMA: { + const ollama = createOllamaClient(target); + const result: any = await ollama.list(); + return (result.models ?? []).map((m: any) => m.model || m.name).filter(Boolean); + } + case AiProvider.GEMINI: { + const models: string[] = []; + if (getGeminiApiMode(target) === "openai") { + const geminiAi = createGeminiOpenAiClient(target); + const iterable: any = await geminiAi.models.list(); + for await (const model of iterable) models.push(model.name || model.id || String(model)); + return models; + } + + const geminiAi = createGoogleGenAiClient(target); + const iterable: any = await geminiAi.models.list(); + for await (const model of iterable) { + const name = model.name || model.id || String(model); + models.push(String(name).replace(/^models\//, "")); + } + return models; + } + case AiProvider.MISTRAL: { + const mistralAi = createMistralClient(target); + const result: any = await mistralAi.models.list(); + return (result.data ?? result.models ?? result ?? []).map((m: any) => m.id || m.name || String(m)).filter(Boolean); + } + case AiProvider.OPENAI: { + const openAi = createOpenAiClient(target); + const result: any = await openAi.models.list(); + return (result.data ?? []).map((m: any) => m.id).filter(Boolean); + } + } +} diff --git a/src/ai/provider-request-queue.ts b/src/ai/provider-request-queue.ts new file mode 100644 index 0000000..b1394e7 --- /dev/null +++ b/src/ai/provider-request-queue.ts @@ -0,0 +1,184 @@ +import {Environment} from "../common/environment"; +import {AiProvider} from "../model/ai-provider"; + +export type AiRequestQueueTarget = { + provider: AiProvider; + model: string; + baseUrl?: string; +}; + +type QueueEntry = { + target: AiRequestQueueTarget; + queueKey: string; + run: () => Promise; + resolve: (value: T | PromiseLike) => void; + reject: (reason?: unknown) => void; + onPositionChange: (requestsBefore: number) => Promise | void; + signal?: AbortSignal; + abortHandler?: () => void; + started: boolean; +}; + +type EnqueueOptions = { + signal?: AbortSignal; + onPositionChange: (requestsBefore: number) => Promise | void; + run: () => Promise; +}; + +class AiProviderRequestQueue { + private readonly waiting = new Map>>(); + private readonly active = new Map(); + + enqueue(target: AiRequestQueueTarget, options: EnqueueOptions): Promise { + if (options.signal?.aborted) { + return Promise.reject(new Error("Aborted")); + } + + return new Promise((resolve, reject) => { + const queueKey = this.queueKey(target); + const entry: QueueEntry = { + target, + queueKey, + run: options.run, + resolve, + reject, + onPositionChange: options.onPositionChange, + signal: options.signal, + started: false, + }; + + entry.abortHandler = () => { + if (entry.started) return; + + const removed = this.removeWaitingEntry(entry); + if (!removed) return; + + reject(new Error("Aborted")); + this.schedule(target); + }; + + options.signal?.addEventListener("abort", entry.abortHandler, {once: true}); + this.getOrCreateQueue(queueKey).push(entry); + this.schedule(target); + }); + } + + private getQueue(queueKey: string): Array> | undefined { + return this.waiting.get(queueKey); + } + + private getOrCreateQueue(queueKey: string): Array> { + let queue = this.waiting.get(queueKey); + if (!queue) { + queue = []; + this.waiting.set(queueKey, queue); + } + return queue; + } + + private activeCount(queueKey: string): number { + return this.active.get(queueKey) ?? 0; + } + + private setActiveCount(queueKey: string, count: number): void { + if (count <= 0) { + this.active.delete(queueKey); + return; + } + this.active.set(queueKey, count); + } + + private maxActive(target: AiRequestQueueTarget): number { + return Math.max(1, Environment.getAiProviderMaxConcurrentRequests(target.provider)); + } + + private normalizeBaseUrl(baseUrl: string | undefined): string { + return (baseUrl ?? "").trim().replace(/\/+$/, ""); + } + + private queueKey(target: AiRequestQueueTarget): string { + return JSON.stringify([ + target.provider, + this.normalizeBaseUrl(target.baseUrl), + target.model.trim(), + ]); + } + + private removeWaitingEntry(entry: QueueEntry): boolean { + const queue = this.getQueue(entry.queueKey); + if (!queue) return false; + + const index = queue.indexOf(entry); + if (index < 0) return false; + + queue.splice(index, 1); + if (entry.abortHandler) { + entry.signal?.removeEventListener("abort", entry.abortHandler); + } + this.deleteQueueIfIdle(entry.queueKey, queue); + return true; + } + + private schedule(target: AiRequestQueueTarget): void { + const queueKey = this.queueKey(target); + const queue = this.getOrCreateQueue(queueKey); + + while (queue.length && this.activeCount(queueKey) < this.maxActive(target)) { + const entry = queue.shift(); + if (!entry) continue; + + if (entry.abortHandler) { + entry.signal?.removeEventListener("abort", entry.abortHandler); + } + + if (entry.signal?.aborted) { + entry.reject(new Error("Aborted")); + continue; + } + + entry.started = true; + this.setActiveCount(queueKey, this.activeCount(queueKey) + 1); + void this.runEntry(entry); + } + + this.updateWaitingMessages(target); + if (!queue.length && this.activeCount(queueKey) <= 0) { + this.waiting.delete(queueKey); + } + } + + private async runEntry(entry: QueueEntry): Promise { + try { + entry.resolve(await entry.run()); + } catch (e) { + entry.reject(e); + } finally { + this.setActiveCount(entry.queueKey, this.activeCount(entry.queueKey) - 1); + this.schedule(entry.target); + } + } + + private updateWaitingMessages(target: AiRequestQueueTarget): void { + const queueKey = this.queueKey(target); + const active = this.activeCount(queueKey); + const queue = [...(this.getQueue(queueKey) ?? [])]; + + Promise.allSettled(queue.map((entry, index) => { + return entry.onPositionChange(active + index); + })).then(results => { + for (const result of results) { + if (result.status === "rejected") { + console.error(result.reason); + } + } + }).catch(console.error); + } + + private deleteQueueIfIdle(queueKey: string, queue: Array>): void { + if (!queue.length && this.activeCount(queueKey) <= 0) { + this.waiting.delete(queueKey); + } + } +} + +export const aiProviderRequestQueue = new AiProviderRequestQueue(); diff --git a/src/ai/regenerate-callback.ts b/src/ai/regenerate-callback.ts new file mode 100644 index 0000000..9b34f18 --- /dev/null +++ b/src/ai/regenerate-callback.ts @@ -0,0 +1,24 @@ +import {AiProvider} from "../model/ai-provider"; + +export const AI_REGENERATE_CALLBACK = "/regenerate_ai"; + +export type AiRegenerateCallbackData = { + provider: AiProvider; + think: boolean; +}; + +export function buildAiRegenerateCallbackData(provider: AiProvider, think = false): string { + return `${AI_REGENERATE_CALLBACK} ${provider} ${think ? "1" : "0"}`; +} + +export function parseAiRegenerateCallbackData(data: string): AiRegenerateCallbackData | null { + if (!data.startsWith(AI_REGENERATE_CALLBACK)) return null; + + const [, provider, think] = data.split(/\s+/); + if (!Object.values(AiProvider).includes(provider as AiProvider)) return null; + + return { + provider: provider as AiProvider, + think: think === "1" || think === "true", + }; +} diff --git a/src/ai/telegram-attachments.ts b/src/ai/telegram-attachments.ts new file mode 100644 index 0000000..d00e810 --- /dev/null +++ b/src/ai/telegram-attachments.ts @@ -0,0 +1,227 @@ +import {Message} from "typescript-telegram-bot-api"; +import {bot} from "../index"; +import {downloadTelegramFile, logError} from "../util/utils"; +import fs from "node:fs"; +import path from "node:path"; +import {Environment} from "../common/environment"; +import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment"; +import {performFFmpeg} from "../util/ffmpeg"; +import ffmpeg from "fluent-ffmpeg"; +import {AsyncSemaphore, KeyedAsyncLock} from "../util/async-lock"; + +export type AiDownloadedFile = { + kind: StoredAttachmentKind; + fileId: string; + fileName: string; + mimeType?: string; + buffer: Buffer; + path: string; +}; + +const cachePathLocks = new KeyedAsyncLock(); +const ffmpegSemaphore = new AsyncSemaphore(2); + +function safeFileName(value: string): string { + return value.replace(/[\\/:*?"<>|\u0000-\u001F]/g, "_").slice(0, 180); +} + +function extensionFromMimeType(mimeType?: string): string { + switch ((mimeType || "").toLowerCase()) { + case "audio/ogg": + case "audio/opus": + return ".ogg"; + case "audio/mpeg": + case "audio/mp3": + return ".mp3"; + case "audio/mp4": + case "audio/x-m4a": + return ".m4a"; + case "audio/wav": + case "audio/wave": + case "audio/x-wav": + return ".wav"; + case "audio/webm": + return ".webm"; + case "image/jpeg": + return ".jpg"; + case "image/png": + return ".png"; + case "image/webp": + return ".webp"; + case "application/pdf": + return ".pdf"; + case "text/plain": + return ".txt"; + case "application/zip": + case "application/x-zip": + case "application/x-zip-compressed": + return ".zip"; + case "application/x-tar": + case "application/tar": + return ".tar"; + case "application/gzip": + case "application/x-gzip": + case "application/gzip-compressed": + return ".gz"; + case "video/mp4": + return ".mp4"; + default: + return ""; + } +} + +function fileNameWithExtension(fileName: string, mimeType?: string, telegramFilePath?: string): string { + if (path.extname(fileName)) return fileName; + + const telegramExt = telegramFilePath ? path.extname(telegramFilePath) : ""; + const ext = telegramExt || extensionFromMimeType(mimeType); + return ext ? `${fileName}${ext}` : fileName; +} + +function cacheDirFor(kind: StoredAttachmentKind): string { + const dirName = kind === "image" ? "photo" : kind; + return path.join(Environment.DATA_PATH, "cache", dirName); +} + +function cachePathFor(kind: StoredAttachmentKind, fileUniqueId: string | undefined, fileId: string, fileName: string): string { + const base = safeFileName(fileUniqueId || fileId); + const ext = path.extname(fileName); + return path.join(cacheDirFor(kind), `${base}${ext || ""}`); +} + +async function downloadToCache(kind: StoredAttachmentKind, fileId: string, fileName: string, mimeType?: string, fileUniqueId?: string): Promise { + const file = await bot.getFile({file_id: fileId}); + const finalFileName = fileNameWithExtension(fileName, mimeType, file.file_path); + const location = cachePathFor(kind, fileUniqueId, fileId, finalFileName); + + await cachePathLocks.runExclusive(location, async () => { + if (fs.existsSync(location)) return; + + const buffer = await downloadTelegramFile(file.file_path); + if (!buffer) return; + + const tempLocation = `${location}.${process.pid}.${Date.now()}.tmp`; + fs.mkdirSync(path.dirname(location), {recursive: true}); + fs.writeFileSync(tempLocation, buffer); + fs.renameSync(tempLocation, location); + }); + + return {kind, fileId, fileUniqueId, fileName: finalFileName, mimeType, cachePath: location}; +} + +async function convertAudioToWav(input: string, output: string, noVideo = false): Promise { + await cachePathLocks.runExclusive(output, async () => { + if (fs.existsSync(output)) return; + + await ffmpegSemaphore.runExclusive(async () => { + if (fs.existsSync(output)) return; + + const tempOutput = `${output}.${process.pid}.${Date.now()}.tmp.wav`; + try { + await performFFmpeg(() => { + const command = ffmpeg(input); + if (noVideo) command.noVideo(); + return command + .toFormat("wav") + .save(tempOutput) + .on("progress", (progress) => { + console.log("progress", progress); + }); + }); + fs.renameSync(tempOutput, output); + } catch (e) { + if (fs.existsSync(tempOutput)) { + fs.rmSync(tempOutput, {force: true}); + } + throw e; + } + }); + }); +} + +export async function cacheMessageAttachments(msg: Message): Promise { + const result: StoredAttachment[] = []; + + try { + if (msg.photo?.length) { + const size = msg.photo[msg.photo.length - 1]!; + const file = await downloadToCache("image", size.file_id, `${size.file_unique_id || size.file_id}.jpg`, "image/jpeg", size.file_unique_id); + if (file) result.push(file); + } + + if (msg.document) { + const doc = msg.document; + const kind: StoredAttachmentKind = doc.mime_type?.startsWith("image/") + ? "image" + : doc.mime_type?.startsWith("audio/") + ? "audio" + : "document"; + const file = await downloadToCache(kind, doc.file_id, doc.file_name || `${doc.file_unique_id || doc.file_id}`, doc.mime_type, doc.file_unique_id); + if (file) result.push(file); + } + + if (msg.voice) { + const file = await downloadToCache("audio", msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.ogg`, msg.voice.mime_type || "audio/ogg", msg.voice.file_unique_id); + if (file) { + const output = cachePathFor("audio", msg.voice.file_unique_id, msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.wav`); + try { + await convertAudioToWav(file.cachePath, output); + file.cachePath = output; + file.fileName = file?.fileName?.replace(".ogg", ".wav"); + file.mimeType = "audio/wav"; + } catch (e) { + logError(e); + } + } + + if (file) result.push(file); + } + + if (msg.audio) { + const file = await downloadToCache("audio", msg.audio.file_id, msg.audio.file_name || `${msg.audio.file_unique_id || msg.audio.file_id}.mp3`, msg.audio.mime_type, msg.audio.file_unique_id); + if (file) result.push(file); + } + + if (msg.video_note) { + const file = await downloadToCache("video-note", msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.mp4`, "video/mp4", msg.video_note.file_unique_id); + if (file) { + const output = cachePathFor("audio", msg.video_note.file_unique_id, msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.wav`); + try { + await convertAudioToWav(file.cachePath, output, true); + file.cachePath = output; + file.fileName = file?.fileName?.replace(".mp4", ".wav"); + file.mimeType = "audio/wav"; + } catch (e) { + logError(e); + } + } + + if (file) result.push(file); + } + } catch (e) { + logError(e); + } + + return result; +} + +export function attachmentsToDownloadedFiles(attachments: StoredAttachment[]): AiDownloadedFile[] { + return attachments + .filter(attachment => fs.existsSync(attachment.cachePath)) + .map(attachment => ({ + kind: attachment.kind, + fileId: attachment.fileId, + fileName: attachment.fileName, + mimeType: attachment.mimeType, + buffer: fs.readFileSync(attachment.cachePath), + path: attachment.cachePath, + })); +} + +export function cleanupDownloads(files: AiDownloadedFile[]): void { + // Files stay on disk in the message cache; drop in-memory buffers eagerly. + for (const file of files) { + file.buffer = Buffer.alloc(0); + } + files.length = 0; +} diff --git a/src/ai/telegram-stream-message.ts b/src/ai/telegram-stream-message.ts new file mode 100644 index 0000000..8dc5c6b --- /dev/null +++ b/src/ai/telegram-stream-message.ts @@ -0,0 +1,541 @@ +import {FileOptions, InlineKeyboardMarkup, Message} from "typescript-telegram-bot-api"; +import {bot} from "../index"; +import {buildCancelledGenerationText, logError, replyToMessage} from "../util/utils"; +import {Environment} from "../common/environment"; +import {MessageStore} from "../common/message-store"; +import {createQueuedFunction} from "../util/async-lock"; +import {enqueueTelegramApiCall} from "../util/telegram-api-queue"; +import fs from "node:fs"; +import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment"; +import {StoredMessage} from "../model/stored-message"; +import {prepareTelegramMarkdownV2} from "../util/markdown-v2-renderer"; +import {AiProvider} from "../model/ai-provider"; + +const TELEGRAM_LIMIT = 4096; +const TELEGRAM_CAPTION_LIMIT = 1024; +const TELEGRAM_FILE_LIMIT_BYTES = 50 * 1024 * 1024; +const TELEGRAM_PHOTO_LIMIT_BYTES = 10 * 1024 * 1024; +const EDIT_INTERVAL_MS = 4500; + +export type TelegramArtifactFile = { + kind: "image" | "file"; + path: string; + fileName: string; + mimeType?: string; + sizeBytes: number; +}; + +export class TelegramStreamMessage { + private waitMessage: Message | null = null; + private timer: NodeJS.Timeout | null = null; + private lastSent = ""; + private text = ""; + private status = ""; + private mediaMode = false; + private cancelled = false; + private cancelledProvider = ""; + private readonly startedAt = Date.now(); + private readonly enqueueEdit = createQueuedFunction(); + + constructor( + private readonly sourceMessage: Message, + private readonly cancelRequestId: string, + private readonly stream: boolean, + private readonly regenerateCallbackData?: string, + private readonly targetMessage?: Message, + private readonly cancelProvider?: AiProvider, + private readonly isGuest?: boolean, + ) { + } + + keyboard(): InlineKeyboardMarkup { + return { + inline_keyboard: [[{ + text: Environment.cancelText, + callback_data: this.cancelProvider + ? `/cancel_ai ${this.cancelRequestId} ${this.cancelProvider}` + : `/cancel_ai ${this.cancelRequestId}`, + }]], + }; + } + + emptyKeyboard(): InlineKeyboardMarkup { + return {inline_keyboard: []}; + } + + regenerateKeyboard(): InlineKeyboardMarkup | null { + if (!this.regenerateCallbackData) return null; + + return { + inline_keyboard: [[{ + text: Environment.regenerateText, + callback_data: this.regenerateCallbackData, + }]], + }; + } + + private isMessageNotModified(error: unknown): boolean { + const textToLookUp = "message is not modified"; + + if (error && error instanceof Error) { + return String(error.message).includes(textToLookUp); + } + + if (error && error instanceof String) { + return error.includes(textToLookUp); + } + + return false; + } + + private async updateKeyboard(replyMarkup: InlineKeyboardMarkup): Promise { + if (!this.waitMessage) return; + + try { + await enqueueTelegramApiCall( + () => bot.editMessageReplyMarkup({ + chat_id: this.waitMessage!.chat.id, + message_id: this.waitMessage!.message_id, + reply_markup: replyMarkup, + }), + { + method: "editMessageReplyMarkup", + chatId: this.waitMessage.chat.id, + chatType: this.waitMessage.chat.type, + } + ); + } catch (e) { + if (!this.isMessageNotModified(e)) logError(e); + } + } + + private async removeKeyboard(): Promise { + await this.updateKeyboard(this.emptyKeyboard()); + } + + private startFlushTimer(): void { + if (this.timer) clearInterval(this.timer); + this.timer = setInterval(() => this.flush().catch(logError), EDIT_INTERVAL_MS); + } + + private visibleText(): string { + const parts = [this.text, this.status].filter(v => v && v.trim().length); + let value = parts.join("\n\n").trim() || Environment.waitThinkText; + if (value.length > TELEGRAM_LIMIT) { + value = value.substring(0, TELEGRAM_LIMIT - 1); + } + return value; + } + + private visibleCaption(): string { + let value = this.visibleText(); + if (value.length > TELEGRAM_CAPTION_LIMIT) { + value = value.substring(0, TELEGRAM_CAPTION_LIMIT - 1); + } + return value; + } + + async start(initialStatus: string): Promise { + this.status = initialStatus; + const rawText = this.visibleText(); + const formatted = prepareTelegramMarkdownV2(rawText, {mode: "draft"}); + + if (this.targetMessage) { + this.waitMessage = this.targetMessage; + + try { + await MessageStore.put(this.targetMessage).catch(logError); + const result = await enqueueTelegramApiCall( + () => bot.editMessageText({ + chat_id: this.targetMessage!.chat.id, + message_id: this.targetMessage!.message_id, + text: formatted, + parse_mode: "MarkdownV2", + reply_markup: this.keyboard(), + }), + { + method: "editMessageText", + chatId: this.targetMessage.chat.id, + chatType: this.targetMessage.chat.type, + } + ); + if (result && result !== true) this.waitMessage = result; + this.mediaMode = false; + this.lastSent = rawText; + await this.store(); + this.startFlushTimer(); + return this.waitMessage; + } catch (e) { + if (this.isMessageNotModified(e)) { + this.lastSent = rawText; + await this.updateKeyboard(this.keyboard()); + await this.store(); + this.startFlushTimer(); + return this.waitMessage; + } + + logError(e); + this.waitMessage = null; + this.mediaMode = false; + } + } + + this.waitMessage = await replyToMessage({ + message: this.sourceMessage, + text: formatted, + reply_markup: this.keyboard(), + parse_mode: "MarkdownV2" + }); + this.lastSent = rawText; + this.startFlushTimer(); + return this.waitMessage; + } + + setStatus(status: string): void { + if (this.cancelled) return; + this.status = status; + } + + getStatus(): string { + return this.status; + } + + clearStatus(): void { + if (this.cancelled) return; + this.status = ""; + } + + append(delta: string): void { + if (this.cancelled) return; + if (!delta) return; + this.text += delta; + } + + replaceText(text: string): void { + if (this.cancelled) return; + this.text = text; + } + + getText(): string { + return this.text; + } + + async flush(replyMarkup: InlineKeyboardMarkup | null = this.keyboard(), end?: boolean): Promise { + return this.enqueueEdit(() => this.flushUnsafe(replyMarkup, end)); + } + + private async flushUnsafe(replyMarkup: InlineKeyboardMarkup | null = this.keyboard(), end?: boolean): Promise { + if (!this.waitMessage && this.stream) return; + + const next = this.mediaMode ? this.visibleCaption() : this.visibleText(); + const shouldRemoveKeyboard = replyMarkup === null; + if (next === this.lastSent && shouldRemoveKeyboard) { + await this.removeKeyboard(); + return; + } + + const formatted = prepareTelegramMarkdownV2(next, {mode: end ? "final" : "draft"}); + + if (next === this.lastSent && replyMarkup !== null) { + if (end) await this.updateKeyboard(replyMarkup); + return; + } + + try { + if (!this.stream && end && !this.waitMessage) { + if (this.isGuest) { + // await enqueueTelegramApiCall(() => bot.answerGuestQuery({ + // guest_query_id: this.sourceMessage.guest_query_id ?? "", + // result: {} + // }), + // {}); + } else { + await replyToMessage({ + message: this.sourceMessage, + text: formatted, + parse_mode: "MarkdownV2", + }); + } + } else { + if (this.waitMessage) { + const result = this.mediaMode + ? await enqueueTelegramApiCall( + () => bot.editMessageCaption({ + chat_id: this.waitMessage!.chat.id, + message_id: this.waitMessage!.message_id, + caption: formatted, + parse_mode: "MarkdownV2", + reply_markup: replyMarkup ?? this.emptyKeyboard(), + }), + { + method: "editMessageCaption", + chatId: this.waitMessage.chat.id, + chatType: this.waitMessage.chat.type, + } + ) + : await enqueueTelegramApiCall( + () => bot.editMessageText({ + chat_id: this.waitMessage!.chat.id, + message_id: this.waitMessage!.message_id, + text: formatted, + parse_mode: "MarkdownV2", + reply_markup: replyMarkup ?? this.emptyKeyboard(), + }), + { + method: "editMessageText", + chatId: this.waitMessage.chat.id, + chatType: this.waitMessage.chat.type, + } + ); + if (result && result !== true) this.waitMessage = result; + } + } + if (shouldRemoveKeyboard) await this.removeKeyboard(); + this.lastSent = next; + } catch (e: any) { + if (shouldRemoveKeyboard && this.isMessageNotModified(e)) { + await this.removeKeyboard(); + this.lastSent = next; + return; + } + if (!this.isMessageNotModified(e)) logError(e); + } + } + + async cancel(provider: string): Promise { + if (this.timer) clearInterval(this.timer); + this.timer = null; + this.cancelled = true; + this.cancelledProvider = provider; + this.status = ""; + this.text = buildCancelledGenerationText(this.text, this.cancelledProvider, this.mediaMode ? TELEGRAM_CAPTION_LIMIT : TELEGRAM_LIMIT); + await this.flush(this.regenerateKeyboard(), true); + await this.store(); + } + + async showImage(image: Buffer): Promise { + return this.enqueueEdit(() => this.showImageUnsafe(image)); + } + + async sendArtifact(file: TelegramArtifactFile): Promise { + return this.enqueueEdit(() => this.sendArtifactUnsafe(file)); + } + + private async showImageUnsafe(image: Buffer): Promise { + if (this.cancelled) return; + const next = this.visibleCaption(); + + if (!this.waitMessage) { + if (this.stream) return; + + this.waitMessage = await enqueueTelegramApiCall( + () => bot.sendPhoto({ + chat_id: this.sourceMessage.chat.id, + photo: image, + caption: prepareTelegramMarkdownV2(next, {mode: "final"}), + parse_mode: "MarkdownV2", + reply_parameters: {message_id: this.sourceMessage.message_id}, + }), + { + method: "sendPhoto", + chatId: this.sourceMessage.chat.id, + chatType: this.sourceMessage.chat.type, + } + ); + this.mediaMode = true; + this.lastSent = next; + return; + } + + try { + const result = await enqueueTelegramApiCall( + () => bot.editMessageMedia({ + chat_id: this.waitMessage!.chat.id, + message_id: this.waitMessage!.message_id, + media: { + type: "photo", + media: image, + caption: prepareTelegramMarkdownV2(next, {mode: "final"}), + parse_mode: "MarkdownV2", + }, + reply_markup: this.keyboard(), + }), + { + method: "editMessageMedia", + chatId: this.waitMessage.chat.id, + chatType: this.waitMessage.chat.type, + } + ); + if (result && result !== true) this.waitMessage = result; + this.mediaMode = true; + this.lastSent = next; + } catch (e: any) { + if (!String(e?.message ?? e).includes("message is not modified")) logError(e); + } + } + + private async sendArtifactUnsafe(file: TelegramArtifactFile): Promise { + if (this.cancelled) return null; + + if (file.sizeBytes > TELEGRAM_FILE_LIMIT_BYTES) { + throw new Error(Environment.getTelegramFileTooLargeText( + file.fileName, + TELEGRAM_FILE_LIMIT_BYTES / 1024 / 1024, + )); + } + + const caption = file.fileName.slice(0, TELEGRAM_CAPTION_LIMIT); + const isPhoto = this.isPhotoArtifact(file); + + await enqueueTelegramApiCall( + () => bot.sendChatAction({ + chat_id: this.sourceMessage.chat.id, + action: isPhoto ? "upload_photo" : "upload_document", + }), + { + method: "sendChatAction", + chatId: this.sourceMessage.chat.id, + chatType: this.sourceMessage.chat.type, + } + ).catch(logError); + + let sent: Message; + if (isPhoto) { + try { + sent = await enqueueTelegramApiCall( + async () => { + const upload = this.createArtifactUpload(file); + try { + return await bot.sendPhoto({ + chat_id: this.sourceMessage.chat.id, + photo: upload, + caption, + reply_parameters: {message_id: this.sourceMessage.message_id}, + }); + } finally { + this.destroyUpload(upload); + } + }, + { + method: "sendPhoto", + chatId: this.sourceMessage.chat.id, + chatType: this.sourceMessage.chat.type, + } + ); + } catch (e) { + logError(e); + sent = await this.sendArtifactAsDocument(file, caption); + } + } else { + sent = await this.sendArtifactAsDocument(file, caption); + } + + await this.storeArtifactMessage(sent, file); + return sent; + } + + private isPhotoArtifact(file: TelegramArtifactFile): boolean { + return file.kind === "image" + && file.sizeBytes <= TELEGRAM_PHOTO_LIMIT_BYTES + && ["image/jpeg", "image/png", "image/webp"].includes((file.mimeType || "").toLowerCase()); + } + + private createArtifactUpload(file: TelegramArtifactFile): FileOptions { + return new FileOptions(fs.createReadStream(file.path), { + filename: file.fileName, + contentType: file.mimeType || "application/octet-stream", + }); + } + + private destroyUpload(upload: FileOptions): void { + if ("destroy" in upload.file && typeof upload.file.destroy === "function") { + upload.file.destroy(); + } + } + + private async sendArtifactAsDocument(file: TelegramArtifactFile, caption: string): Promise { + return enqueueTelegramApiCall( + async () => { + const upload = this.createArtifactUpload(file); + try { + return await bot.sendDocument({ + chat_id: this.sourceMessage.chat.id, + document: upload, + caption, + reply_parameters: {message_id: this.sourceMessage.message_id}, + }); + } finally { + this.destroyUpload(upload); + } + }, + { + method: "sendDocument", + chatId: this.sourceMessage.chat.id, + chatType: this.sourceMessage.chat.type, + } + ); + } + + private async storeArtifactMessage(sent: Message, file: TelegramArtifactFile): Promise { + const photo = sent.photo?.[sent.photo.length - 1]; + const attachmentKind: StoredAttachmentKind = file.kind === "image" ? "image" : "document"; + const attachment: StoredAttachment = { + kind: attachmentKind, + fileId: sent.document?.file_id ?? photo?.file_id ?? file.path, + fileUniqueId: sent.document?.file_unique_id ?? photo?.file_unique_id, + fileName: file.fileName, + mimeType: file.mimeType, + cachePath: file.path, + }; + + const stored: StoredMessage = { + chatId: sent.chat.id, + id: sent.message_id, + replyToMessageId: sent.reply_to_message?.message_id ?? this.sourceMessage.message_id, + fromId: sent.from?.id ?? 0, + text: sent.caption ?? file.fileName, + date: sent.date ?? Math.floor(Date.now() / 1000), + attachments: [attachment], + }; + + await MessageStore.put(stored); + } + + async finish(removeKeyboard = true): Promise { + if (this.timer) clearInterval(this.timer); + this.timer = null; + + if (this.cancelled) { + await this.flush(removeKeyboard ? this.regenerateKeyboard() : this.keyboard(), true); + await this.store(); + return; + } + + if (Environment.SEND_TIME_TOOK) { + const diff = Date.now() - this.startedAt; + if (this.text.length + 32 < TELEGRAM_LIMIT) this.text += `\n\n⏱️ ${diff}ms`; + } + + this.clearStatus(); + await this.flush(removeKeyboard ? this.regenerateKeyboard() : this.keyboard(), true); + + await this.store(); + } + + async fail(error: unknown): Promise { + if (this.timer) clearInterval(this.timer); + this.timer = null; + this.status = ""; + this.text = `${Environment.errorText}\n${error instanceof Error ? error.message : String(error)}`; + await this.flush(this.regenerateKeyboard(), true); + } + + private async store(): Promise { + if (!this.waitMessage) return; + try { + await MessageStore.put({...this.waitMessage, text: this.visibleText()} as Message); + } catch (e) { + logError(e); + } + } +} diff --git a/src/ai/unified-ai-runner.ts b/src/ai/unified-ai-runner.ts new file mode 100644 index 0000000..0bbfd81 --- /dev/null +++ b/src/ai/unified-ai-runner.ts @@ -0,0 +1,2155 @@ +import {Message} from "typescript-telegram-bot-api"; +import fs, {openAsBlob} from "node:fs"; +import path from "node:path"; +import {AiProvider} from "../model/ai-provider"; +import {Environment} from "../common/environment"; +import {bot, photoGenDir} from "../index"; +import {clamp, collectReplyChainText, delay, ifTrue, logError, replyToMessage} from "../util/utils"; +import {MessageStore} from "../common/message-store"; +import { + AiProviderName, + getGeminiTools, + getMistralTools, + getOllamaTools, + getOpenAIResponsesTools, + getOpenAITools +} from "./tool-mappers"; +import {createAiCancelRequest, finishAiRequest, setAiCancelMessageId} from "./cancel-registry"; +import {TelegramArtifactFile, TelegramStreamMessage} from "./telegram-stream-message"; +import {AiDownloadedFile, attachmentsToDownloadedFiles, cleanupDownloads} from "./telegram-attachments"; +import {getModelCapabilities, getRuntimeCapabilities} from "./provider-model-runtime"; +import {StoredAttachment} from "../model/stored-attachment"; +import {AiChatMessage, ChatMessage} from "./chat-messages-types"; +import {ChatRequest, ListResponse, Tool} from "ollama"; +import {executeToolCall, ToolRuntimeContext} from "./tools/runtime"; +import {MessageImagePart, MessagePart} from "../common/message-part"; +import {enqueueTelegramApiCall} from "../util/telegram-api-queue"; +import {KeyedAsyncLock} from "../util/async-lock"; +import {getCurrentDateTimeTool} from "./tools/datetime"; +import {getMarketRatesTool} from "./tools/market-rates"; +import {getWeatherTool} from "./tools/weather"; +import {aiProviderRequestQueue, type AiRequestQueueTarget} from "./provider-request-queue"; +import {loadOllamaModel, unloadAllOllamaModels} from "./tools/utils"; +import {prepareOllamaDocumentRag} from "./ollama-rag"; +import {PYTHON_INTERPRETER_TOOL_NAME, pythonInterpreterToolPrompt} from "./tools/python-interpretator"; +import { + AI_VOICE_MODE_TRANSCRIPT, + DEFAULT_AI_RESPONSE_LANGUAGE, + getResponseLanguageInstruction, + resolveAiContextSizeForUser, + resolveAiResponseLanguageForUser, + resolveAiVoiceModeForUser, + UserAiResponseLanguage, + UserAiVoiceMode, +} from "../common/user-ai-settings"; +import { + isTranscribableAudioDownload, + resolveSpeechToTextProviderForUser, + transcribeSpeechDownloads, +} from "./speech-to-text"; +import { + isTextToSpeechConfigured, + resolveTextToSpeechProviderForUser, + sendSynthesizedSpeech, + synthesizeSpeech, +} from "./text-to-speech"; +import {OpenAIChatMessage} from "./openai-chat-message"; +import {ResponseInputMessageContentList} from "openai/resources/responses/responses.js"; +import {MistralChatMessage} from "./mistral-chat-message"; +import {OllamaChatMessage} from "./ollama-chat-message"; +import {GeminiMessage} from "./gemini-chat-message"; +import {buildAiRegenerateCallbackData} from "./regenerate-callback"; +import {prepareTelegramMarkdownV2} from "../util/markdown-v2-renderer"; +import { + AiRuntimeTarget, + createGeminiOpenAiClient, + createGoogleGenAiClient, + createMistralClient, + createOllamaClient, + createOpenAiClient, + getGeminiApiMode, + resolveAiRuntimeTarget, +} from "./ai-runtime-target"; + +const TELEGRAM_LIMIT = 4096; +const MAX_TOOL_ROUNDS = 20; +const OPENAI_IMAGE_PARTIALS = 3; +const AI_REQUEST_TIMEOUT_MS = 10 * 60 * 1000; +const DEFAULT_OLLAMA_CONTEXT_SIZE = 256000; +const MIN_OLLAMA_CONTEXT_SIZE = 4096; +const toolResourceLocks = new KeyedAsyncLock(); + +type UnifiedRunOptions = { + provider: AiProvider; + msg: Message; + isGuestMsg?: boolean; + text: string; + stream?: boolean; + think?: Think; + responseLanguage?: UserAiResponseLanguage; + contextSize?: number; + voiceMode?: UserAiVoiceMode; + targetMessage?: Message; +}; + +export type ToolCallData = { + id: string; + name: string; + argumentsText: string; +}; + +type OpenAiResponsesFunctionCall = { + callId: string; + name: string; + argumentsText: string; +}; + +// type OpenAiRunnerMessage = { +// role: "system" | "user" | "assistant"; +// content: string | Array>; +// }; + +// type MistralRunnerMessage = { +// role: "system" | "user" | "assistant" | "tool"; +// content?: string | Array>; +// name?: string; +// toolCallId?: string; +// toolCalls?: Array<{ +// id: string; +// function: { +// name: string; +// arguments: string; +// }; +// }>; +// }; + +// type GeminiRunnerInput = Array> +// type RunnerMessage = ChatMessage | OpenAiRunnerMessage | MistralRunnerMessage | GeminiRunnerInput; + +type AttachmentKind = "image" | "document" | "audio" | "video" | "video-note"; + +type Think = boolean | "high" | "medium" | "low"; + +type RuntimeConfigSnapshot = { + useNamesInPrompt: boolean; + useSystemPrompt: boolean; + systemPrompt?: string; + ollamaModel: string; + ollamaImageModel: string; + ollamaThinkModel: string; + ollamaAudioModel: string; + ollamaEmbeddingModel: string; + ollamaChatTarget: AiRuntimeTarget; + ollamaVisionTarget: AiRuntimeTarget; + ollamaThinkingTarget: AiRuntimeTarget; + ollamaAudioTarget: AiRuntimeTarget; + ollamaDocumentsTarget: AiRuntimeTarget; + ollamaRagChunkSize: number; + ollamaRagChunkOverlap: number; + ollamaRagTopK: number; + ollamaRagMaxContextChars: number; + ollamaRagMinScore: number; + ollamaRagMaxArchiveFiles: number; + ollamaRagMaxArchiveBytes: number; + ollamaRagMaxArchiveDepth: number; + geminiModel: string; + geminiImageModel: string; + geminiTranscriptionModel: string; + geminiChatTarget: AiRuntimeTarget; + mistralModel: string; + mistralTranscriptionModel: string; + mistralChatTarget: AiRuntimeTarget; + openAiModel: string; + openAiImageModel: string; + openAiTranscriptionModel: string; + openAiChatTarget: AiRuntimeTarget; + openAiImageTarget: AiRuntimeTarget; +}; + +function snapshotRuntimeConfig(): RuntimeConfigSnapshot { + return { + useNamesInPrompt: Environment.USE_NAMES_IN_PROMPT, + useSystemPrompt: Environment.USE_SYSTEM_PROMPT, + systemPrompt: Environment.SYSTEM_PROMPT, + ollamaModel: Environment.OLLAMA_CHAT_MODEL, + ollamaImageModel: Environment.OLLAMA_IMAGE_MODEL, + ollamaThinkModel: Environment.OLLAMA_THINK_MODEL, + ollamaAudioModel: Environment.OLLAMA_AUDIO_MODEL, + ollamaEmbeddingModel: Environment.OLLAMA_EMBEDDING_MODEL, + ollamaChatTarget: resolveAiRuntimeTarget(AiProvider.OLLAMA, "chat"), + ollamaVisionTarget: resolveAiRuntimeTarget(AiProvider.OLLAMA, "vision"), + ollamaThinkingTarget: resolveAiRuntimeTarget(AiProvider.OLLAMA, "thinking"), + ollamaAudioTarget: resolveAiRuntimeTarget(AiProvider.OLLAMA, "audio"), + ollamaDocumentsTarget: resolveAiRuntimeTarget(AiProvider.OLLAMA, "documents"), + ollamaRagChunkSize: Environment.OLLAMA_RAG_CHUNK_SIZE, + ollamaRagChunkOverlap: Environment.OLLAMA_RAG_CHUNK_OVERLAP, + ollamaRagTopK: Environment.OLLAMA_RAG_TOP_K, + ollamaRagMaxContextChars: Environment.OLLAMA_RAG_MAX_CONTEXT_CHARS, + ollamaRagMinScore: Environment.OLLAMA_RAG_MIN_SCORE, + ollamaRagMaxArchiveFiles: Environment.OLLAMA_RAG_MAX_ARCHIVE_FILES, + ollamaRagMaxArchiveBytes: Environment.OLLAMA_RAG_MAX_ARCHIVE_BYTES, + ollamaRagMaxArchiveDepth: Environment.OLLAMA_RAG_MAX_ARCHIVE_DEPTH, + geminiModel: Environment.GEMINI_MODEL, + geminiImageModel: Environment.GEMINI_IMAGE_MODEL, + geminiTranscriptionModel: Environment.GEMINI_TRANSCRIPTION_MODEL, + geminiChatTarget: resolveAiRuntimeTarget(AiProvider.GEMINI, "chat"), + mistralModel: Environment.MISTRAL_MODEL, + mistralTranscriptionModel: Environment.MISTRAL_TRANSCRIPTION_MODEL, + mistralChatTarget: resolveAiRuntimeTarget(AiProvider.MISTRAL, "chat"), + openAiModel: Environment.OPENAI_MODEL, + openAiImageModel: Environment.OPENAI_IMAGE_MODEL, + openAiTranscriptionModel: Environment.OPENAI_TRANSCRIPTION_MODEL, + openAiChatTarget: resolveAiRuntimeTarget(AiProvider.OPENAI, "chat"), + openAiImageTarget: resolveAiRuntimeTarget(AiProvider.OPENAI, "outputImages"), + }; +} + +function getMessageImageParts(part: MessagePart): MessageImagePart[] { + if (part.imageParts?.length) return part.imageParts; + return (part.images ?? []).map(data => ({data, mimeType: "image/jpeg"})); +} + +function openAiImageDataUrl(image: MessageImagePart): string { + return `data:${image.mimeType || "image/jpeg"};base64,${image.data}`; +} + +function geminiAudioMimeType(mimeType: string | undefined): string { + const normalized = mimeType?.toLowerCase(); + switch (normalized) { + case "audio/wav": + case "audio/mp3": + case "audio/aiff": + case "audio/aac": + case "audio/ogg": + case "audio/flac": + case "audio/mpeg": + case "audio/m4a": + case "audio/l16": + case "audio/opus": + case "audio/alaw": + case "audio/mulaw": + return normalized; + default: + return "audio/wav"; + } +} + +function snapshotModel(provider: AiProvider, config: RuntimeConfigSnapshot): string { + switch (provider) { + case AiProvider.OLLAMA: + return config.ollamaChatTarget.model; + case AiProvider.GEMINI: + return config.geminiModel; + case AiProvider.MISTRAL: + return config.mistralChatTarget.model; + case AiProvider.OPENAI: + return config.openAiChatTarget.model; + } +} + +function providerName(provider: AiProvider): AiProviderName { + switch (provider) { + case AiProvider.OLLAMA: + return "ollama"; + case AiProvider.GEMINI: + return "gemini"; + case AiProvider.MISTRAL: + return "mistral"; + case AiProvider.OPENAI: + return "openai"; + } +} + +function buildSystemInstruction( + config: RuntimeConfigSnapshot, + responseLanguage: UserAiResponseLanguage, + includePythonToolPrompt: boolean, +): string { + return [ + getResponseLanguageInstruction(responseLanguage), + config.systemPrompt && config.useSystemPrompt ? config.systemPrompt : null, + includePythonToolPrompt ? pythonInterpreterToolPrompt : null, + ].filter(Boolean).join("\n\n"); +} + +function initialStatus(downloads: AiDownloadedFile[], messagePartsImages: number): string { + const documents = downloads.filter(d => d.kind === "document"); + const images = downloads.filter(d => d.kind === "image").length + messagePartsImages; + const audio = downloads.filter(isTranscribableAudioDownload).length; + + if (documents.length) return prepareTelegramMarkdownV2(Environment.getAnalyzingDocumentText(documents.map(d => d.fileName))); + if (audio) return Environment.transcribingAudioText; + if (images > 1) return Environment.analyzingPicturesText; + if (images === 1) return Environment.analyzingPictureText; + return Environment.waitThinkText; +} + +function hasAudioAttachmentKind(kinds: Set): boolean { + return kinds.has("audio") || kinds.has("video-note"); +} + +function resolveAiRequestQueueTarget( + options: Pick, + config: RuntimeConfigSnapshot, + requestedAttachmentKinds: Set, +): AiRequestQueueTarget { + switch (options.provider) { + case AiProvider.OLLAMA: + if (hasAudioAttachmentKind(requestedAttachmentKinds)) return config.ollamaAudioTarget; + if (requestedAttachmentKinds.has("image")) return config.ollamaVisionTarget; + return options.think ? config.ollamaThinkingTarget : config.ollamaChatTarget; + case AiProvider.GEMINI: + return config.geminiChatTarget; + case AiProvider.MISTRAL: + return config.mistralChatTarget; + case AiProvider.OPENAI: + return config.openAiChatTarget; + } +} + +function roundStatus(round: number, firstRoundStatus: string, content?: string, toolCalls?: ToolCallData[], thinking?: boolean): string | null { + if (content?.length && !toolCalls?.length && !thinking) { + // console.log("ROUND_STATUS", "null"); + return null; + } + + const status = toolCalls?.length ? Environment.getUseToolText(toolCalls) + : thinking ? Environment.reasoningText + : round === 0 ? firstRoundStatus + : Environment.waitThinkText; + + // console.log("ROUND_STATUS", status); + + return status; +} + +function isPlainTextDocument(doc: AiDownloadedFile): boolean { + const ext = path.extname(doc.fileName).toLowerCase(); + const mime = (doc.mimeType ?? "").toLowerCase(); + + return mime.startsWith("text/") + || mime === "application/json" + || mime === "application/xml" + || [ + ".txt", + ".md", + ".markdown", + ".csv", + ".json", + ".jsonl", + ".xml", + ".yaml", + ".yml", + ".ini", + ".env", + ".log", + ".ps1", + ".sh", + ".bat", + ".cmd", + ".js", + ".jsx", + ".ts", + ".tsx", + ".py", + ".rb", + ".go", + ".java", + ".c", + ".cc", + ".cpp", + ".h", + ".hpp", + ".php", + ".sql", + ].includes(ext); +} + +function decodeTextDocument(doc: AiDownloadedFile): string { + return doc.buffer.toString("utf8").replace(/\u0000/g, ""); +} + +export function ollamaModelNames(response: ListResponse): string[] { + return (response?.models ?? []) + .flatMap((model) => [model?.model, model?.name]) + .filter((value): value is string => typeof value === "string" && value.length > 0); +} + +async function isOllamaModelActive(target: AiRuntimeTarget): Promise { + const active = await createOllamaClient(target).ps(); + return ollamaModelNames(active).includes(target.model); +} + +function addMessageAttachmentKinds(msg: Message | undefined, kinds: Set): void { + if (!msg) return; + + if (msg.photo?.length) kinds.add("image"); + if (msg.document) { + const mimeType = msg.document.mime_type; + kinds.add(mimeType?.startsWith("image/") ? "image" : mimeType?.startsWith("audio/") ? "audio" : "document"); + } + if (msg.voice || msg.audio || msg.video_note) kinds.add("audio"); +} + +async function collectStoredReplyChainAttachments(msg: Message): Promise { + const attachments: StoredAttachment[] = []; + const seen = new Set(); + let current = await MessageStore.get(msg.chat.id, msg.message_id); + + for (let i = 0; current && i < 40; i++) { + for (const attachment of current.attachments ?? []) { + const key = [ + attachment.kind, + attachment.fileUniqueId || attachment.fileId, + attachment.cachePath, + ].join(":"); + if (seen.has(key)) continue; + seen.add(key); + attachments.push(attachment); + } + current = await MessageStore.get(current.chatId, current.replyToMessageId); + } + + return attachments; +} + +async function hasStoredReplyChainImage(msg: Message): Promise { + const attachments = await collectStoredReplyChainAttachments(msg); + if (attachments.some(attachment => attachment.kind === "image")) return true; + + let current = await MessageStore.get(msg.chat.id, msg.message_id); + + for (let i = 0; current && i < 40; i++) { + if (current.photoMaxSizeFilePath?.length) return true; + current = await MessageStore.get(current.chatId, current.replyToMessageId); + } + + return false; +} + +async function collectRequestedAttachmentKinds(msg: Message): Promise> { + const kinds = new Set(); + + addMessageAttachmentKinds(msg, kinds); + addMessageAttachmentKinds(msg.reply_to_message, kinds); + + for (const attachment of await collectStoredReplyChainAttachments(msg)) { + kinds.add(attachment.kind); + } + + if (!kinds.has("image") && await hasStoredReplyChainImage(msg)) { + kinds.add("image"); + } + + return kinds; +} + +function unsupportedAttachmentText(provider: AiProvider, model: string, kind: AttachmentKind): string { + const providerName = provider.toLowerCase(); + + switch (kind) { + case "audio": + return Environment.getCurrentModelUnsupportedInputText(model, providerName, "voice or audio messages"); + case "image": + return Environment.getCurrentModelUnsupportedInputText(model, providerName, "images"); + case "document": + return Environment.getCurrentModelUnsupportedInputText(model, providerName, "documents"); + case "video": + return Environment.getCurrentModelUnsupportedInputText(model, providerName, "video"); + case "video-note": + return Environment.getCurrentModelUnsupportedInputText(model, providerName, "video notes"); + } +} + +async function rejectUnsupportedAttachments( + provider: AiProvider, + model: string, + msg: Message, + config: RuntimeConfigSnapshot, + requestedAttachmentKinds?: Set, +): Promise { + const kinds = requestedAttachmentKinds ?? await collectRequestedAttachmentKinds(msg); + let effectiveModel = model || snapshotModel(provider, config); + const hasAudio = hasAudioAttachmentKind(kinds); + + if (provider === AiProvider.OLLAMA) { + effectiveModel = hasAudio ? config.ollamaAudioTarget.model + : kinds.has("image") ? config.ollamaVisionTarget.model + : config.ollamaChatTarget.model; + } + + const caps = await getRuntimeCapabilities(provider, effectiveModel); + + let speechToTextSupported = !hasAudio; + if (hasAudio && msg.from?.id) { + speechToTextSupported = await resolveSpeechToTextProviderForUser(msg.from.id, provider) + .then(() => true) + .catch(() => false); + } + + const unsupported = + (hasAudio && !speechToTextSupported ? "audio" : null) ?? + (kinds.has("image") && !caps.vision?.supported ? "image" : null) ?? + (kinds.has("document") && !caps.documents?.supported ? "document" : null); + + if (!unsupported) return false; + + if (!kinds.has("audio")) { + await replyToMessage({ + message: msg, + text: unsupportedAttachmentText(provider, effectiveModel, unsupported), + }).catch(logError); + } + + return true; +} + +async function collectCachedMessageAttachments(msg: Message): Promise<{ + attachments: StoredAttachment[]; + missing: StoredAttachment[] +}> { + const attachments = await collectStoredReplyChainAttachments(msg); + return { + attachments, + missing: attachments.filter(attachment => !fs.existsSync(attachment.cachePath)), + }; +} + +function safeJsonParseObject(value?: string): Record { + if (!value?.trim()) return {}; + try { + const parsed = JSON.parse(value); + return typeof parsed === "object" && parsed && !Array.isArray(parsed) ? parsed : {}; + } catch { + return {}; + } +} + +function toolRuntimeContextFromDownloads(downloads: AiDownloadedFile[]): ToolRuntimeContext { + if (!downloads.length) return {}; + + return { + pythonInputFiles: downloads.map(download => ({ + kind: download.kind, + path: download.path, + fileName: download.fileName, + mimeType: download.mimeType, + })), + }; +} + +function extractToolArtifacts(toolName: string, result: string): TelegramArtifactFile[] { + if (toolName !== PYTHON_INTERPRETER_TOOL_NAME) return []; + + try { + const parsed = JSON.parse(result) as Record; + const artifacts = Array.isArray(parsed.artifacts) ? parsed.artifacts : []; + + return artifacts + .map(artifact => artifact as Partial) + .filter((artifact): artifact is TelegramArtifactFile => { + return (artifact.kind === "image" || artifact.kind === "file") + && typeof artifact.path === "string" + && typeof artifact.fileName === "string" + && Number.isSafeInteger(artifact.sizeBytes); + }); + } catch { + return []; + } +} + +async function sendToolArtifacts(toolCall: ToolCallData, result: string, message: TelegramStreamMessage): Promise { + const artifacts = extractToolArtifacts(toolCall.name, result); + for (const artifact of artifacts) { + await message.sendArtifact(artifact); + } +} + +function normalizeMistralToolCalls(calls: any[] = []): ToolCallData[] { + return calls.map((call, i) => ({ + id: call.id || `call_${Date.now()}_${i}`, + name: call.function?.name || call.name || "", + argumentsText: typeof call.function?.arguments === "string" ? call.function.arguments : JSON.stringify(call.function?.arguments ?? call.arguments ?? {}), + })).filter(c => c.name); +} + +function contentFromMistralDelta(delta: any): string { + if (!delta?.content) return ""; + if (typeof delta.content === "string") return delta.content; + if (Array.isArray(delta.content)) return delta.content.map((c: any) => c.text ?? "").join(""); + return ""; +} + +function buildOpenAiResponseMessage(part: MessagePart, getContent: (part: MessagePart) => string): OpenAIChatMessage { + const content: ResponseInputMessageContentList = [{ + type: "input_text", + text: getContent(part), + }]; + + if (!part.bot) { + for (const image of getMessageImageParts(part)) { + content.push({type: "input_image", image_url: openAiImageDataUrl(image), detail: "auto"}); + } + } + + return {role: part.bot ? "assistant" : "user", content, type: "message"}; +} + +function buildGeminiMessage(part: MessagePart, getContent: (part: MessagePart) => string): GeminiMessage { + const parts: GeminiMessage["parts"] = [{text: getContent(part)}]; + + if (!part.bot) { + for (const image of getMessageImageParts(part)) { + parts.push({ + inlineData: { + data: image.data, + mimeType: image.mimeType || "image/jpeg", + }, + }); + } + + const audioParts = part.audioParts?.length + ? part.audioParts + : (part.audios ?? []).map(data => ({data, mimeType: "audio/wav"})); + + for (const audio of audioParts) { + parts.push({ + inlineData: { + data: audio.data, + mimeType: geminiAudioMimeType(audio.mimeType), + }, + }); + } + + for (const videoNote of part.videoNotes ?? []) { + parts.push({ + inlineData: { + data: videoNote, + mimeType: "audio/wav", + }, + }); + } + } + + return { + role: part.bot ? "model" : "user", + parts, + }; +} + +async function collectTextMessages( + msg: Message, + textOverride: string, + provider: AiProvider, + downloads: AiDownloadedFile[], + config: RuntimeConfigSnapshot, + responseLanguage: UserAiResponseLanguage, +): Promise<{ + chatMessages: AiChatMessage[]; + imageCount: number +}> { + const storedMsg = await MessageStore.get(msg.chat.id, msg.message_id); + const messageParts = await collectReplyChainText({triggerMsg: storedMsg, downloads: downloads}); + + if (messageParts.length && textOverride?.trim()) { + // const latest = messageParts[0]; + // if (!latest.bot && textOverride?.trim()) latest.content = textOverride.trim(); + } + + const ordered = messageParts.reverse(); + const imageCount = ordered.reduce((sum, p) => sum + (p.imageParts?.length ?? p.images?.length ?? 0), 0); + const includePythonToolPrompt = Environment.ENABLE_PYTHON_INTERPRETER && msg.from?.id === Environment.CREATOR_ID; + const systemInstruction = buildSystemInstruction(config, responseLanguage, includePythonToolPrompt); + + const getContent = (part: MessagePart): string => { + if (part.bot) return part.content; + + const userInfo = [ + "[user_info]:", + `name: ${part.name}`, + `username: @${part.userName}`, + "" + ].join("\n"); + + const finalContent = [ + part.content + ]; + + if (Environment.USE_NAMES_IN_PROMPT) { + finalContent.unshift(userInfo); + } + + return finalContent.join("\n"); + }; + + if (provider === AiProvider.OPENAI) { + const messages: OpenAIChatMessage[] = ordered.map(part => buildOpenAiResponseMessage(part, getContent)); + + if (systemInstruction) { + messages.unshift({role: "system", content: systemInstruction, type: "message"}); + } + return {chatMessages: messages, imageCount}; + } + + if (provider === AiProvider.MISTRAL) { + const messages: MistralChatMessage[] = ordered.map(part => { + if (part.bot) { + return { + role: "assistant", + content: [{type: "text", text: getContent(part)}] + }; + } else { + return { + role: "user", + content: [ + {type: "text", text: getContent(part)}, + ...getMessageImageParts(part).map(p => { + return { + type: "image_url" as const, + imageUrl: `data:${p.mimeType || "image/jpeg"};base64,${p.data}` + }; + }) + ] + }; + } + }); + + if (systemInstruction) { + messages.unshift({role: "system", content: systemInstruction}); + } + return {chatMessages: messages, imageCount}; + } + + if (provider === AiProvider.OLLAMA) { + const messages: OllamaChatMessage[] = ordered.map(part => ({ + role: part.bot ? "assistant" : "user", + content: getContent(part), + images: part.bot ? undefined : part.images, + imageParts: part.imageParts, + audios: part.audios, + audioParts: part.audioParts, + videos: part.videos, + videoNotes: part.videoNotes + })); + + if (systemInstruction) { + /* + "[current_date]:\n" + new Date().toLocaleTimeString([], { + day: "2-digit", + month: "2-digit", + year: "numeric", + }) + "\n\n" + + */ + + messages.unshift({ + role: "system", + content: systemInstruction + }); + } + + return {chatMessages: messages, imageCount}; + } + + if (provider === AiProvider.GEMINI) { + if (getGeminiApiMode(config.geminiChatTarget) === "openai") { + const messages: OpenAIChatMessage[] = ordered.map(part => buildOpenAiResponseMessage(part, getContent)); + if (systemInstruction) { + messages.unshift({role: "system", content: systemInstruction, type: "message"}); + } + + return {chatMessages: messages, imageCount}; + } + + const messages: GeminiMessage[] = ordered.map(part => buildGeminiMessage(part, getContent)); + if (systemInstruction) { + messages.unshift({ + role: "user", + parts: [{text: systemInstruction}], + }); + } + + return {chatMessages: messages, imageCount}; + } + + // const messages: RunnerMessage[] = ordered.map(part => { + // return { + // role: part.bot ? "assistant" : "user", + // content: `${config.useNamesInPrompt && !part.bot ? `${part.name}:\n` : ""}${part.content}`, + // images: part.images, + // imageParts: part.imageParts, + // audios: part.audios, + // audioParts: part.audioParts, + // documents: part.documents, + // }; + // }); + // + // if (systemInstruction) { + // messages.unshift({role: "system", content: systemInstruction}); + // } + // + return {chatMessages: [], imageCount: -1}; +} + +// function collectGeminiOutputText(response: any): string { +// if (typeof response?.output_text === "string") return response.output_text; +// if (typeof response?.text === "string") return response.text; +// +// return (response?.outputs ?? []) +// .map((output: any) => { +// if (typeof output === "string") return output; +// if (output?.type === "text") return output.text ?? ""; +// if (typeof output?.text === "string") return output.text; +// return ""; +// }) +// .join(""); +// } + +// function collectGeminiOutputImages(response: any): string[] { +// return (response?.outputs ?? []) +// .filter((output: any) => output?.type === "image" && (output.data || output.image_base64)) +// .map((output: any) => output.data ?? output.image_base64); +// } + +// async function maybeGenerateGeminiImage(msg: Message, text: string, streamMessage: TelegramStreamMessage, signal: AbortSignal, config: RuntimeConfigSnapshot): Promise { +// if (signal.aborted) throw new Error("Aborted"); +// +// streamMessage.setStatus(Environment.genImageText); +// await streamMessage.flush(); +// +// const response = await geminiAi.interactions.create({ +// model: config.geminiImageModel, +// input: text, +// response_modalities: ["image"], +// }); +// +// if (signal.aborted) throw new Error("Aborted"); +// +// const images = collectGeminiOutputImages(response); +// const imageB64 = images[images.length - 1]; +// +// if (!imageB64) throw new Error("Gemini did not return image.output/image_base64."); +// const imageBuffer = Buffer.from(imageB64, "base64"); +// +// await enqueueTelegramApiCall( +// () => bot.sendPhoto({ +// chat_id: msg.chat.id, +// photo: imageBuffer, +// caption: `👨‍🎨 Done. Model: ${config.geminiImageModel}`, +// reply_parameters: {message_id: msg.message_id}, +// }), +// {method: "sendPhoto", chatId: msg.chat.id, chatType: msg.chat.type} +// ); +// +// streamMessage.clearStatus(); +// streamMessage.replaceText("👨‍🎨 Image generated."); +// await streamMessage.finish(); + +// return true; +// } + +async function transcribeAudioIfNeeded(provider: AiProvider, userId: number | undefined, downloads: AiDownloadedFile[], message: TelegramStreamMessage, signal: AbortSignal): Promise { + if (!downloads.some(isTranscribableAudioDownload)) return ""; + if (!userId) throw new Error(Environment.couldNotIdentifyUserForSpeechToTextText); + if (signal.aborted) throw new Error("Aborted"); + + message.setStatus(Environment.transcribingAudioText); + await message.flush(); + + const resolved = await resolveSpeechToTextProviderForUser(userId, provider); + const transcript = await transcribeSpeechDownloads(resolved.provider, downloads, signal); + if (!transcript.trim()) { + throw new Error(Environment.speechToTextEmptyResultText); + } + return transcript; +} + +function stripAudioFromRunnerMessages(parts: AiChatMessage[]): void { + for (const part of parts) { + if ("audios" in part) { + delete part.audios; + } + if ("audioParts" in part) { + delete part.audioParts; + } + + if ("videoNotes" in part) { + delete part.videoNotes; + } + + if ("parts" in part && Array.isArray(part.parts)) { + part.parts = part.parts.filter(geminiPart => { + if (!("inlineData" in geminiPart)) return true; + const mimeType = geminiPart.inlineData.mimeType.toLowerCase(); + return !mimeType.startsWith("audio/") && !mimeType.startsWith("video/"); + }); + } + } +} + +function appendTranscriptToChatMessages(chatMessages: AiChatMessage[], provider: AiProvider, transcript: string): void { + const lastUser = [...chatMessages].reverse().find((p) => "role" in p && p.role === "user"); + if (!lastUser) return; + + const text = transcript; + + if (provider === AiProvider.GEMINI && "parts" in lastUser && Array.isArray(lastUser.parts)) { + lastUser.parts.push({text}); + return; + } + + if (!("content" in lastUser)) return; + + if (typeof lastUser.content === "string") { + lastUser.content = [lastUser.content, text].filter((value: string) => value.trim()).join("\n\n"); + return; + } + + if (Array.isArray(lastUser.content)) { + lastUser.content.push({ + type: provider === AiProvider.OPENAI ? "input_text" : "text", + text: text, + } as any); + } +} + +async function sendVoiceResponseIfNeeded(options: UnifiedRunOptions, downloads: AiDownloadedFile[], text: string): Promise { + if (!downloads.some(isTranscribableAudioDownload)) return; + if (!options.msg.from?.id) return; + + const trimmed = text.trim(); + if (!trimmed) return; + + try { + const provider = isTextToSpeechConfigured(options.provider) + ? options.provider + : (await resolveTextToSpeechProviderForUser(options.msg.from.id)).provider; + const speech = await synthesizeSpeech({provider, text: trimmed}); + await sendSynthesizedSpeech(options.msg, speech); + } catch (e) { + logError(e); + } +} + +async function deleteMistralLibrary(libraryId: string | undefined, target: AiRuntimeTarget): Promise { + if (!libraryId) return; + + try { + const mistralAi = createMistralClient(target); + await mistralAi.beta.libraries.delete({libraryId}); + } catch (e) { + logError(e); + } +} + +async function appendMistralTextDocument(doc: AiDownloadedFile, messages: any[], message: TelegramStreamMessage): Promise { + message.setStatus(prepareTelegramMarkdownV2(Environment.getAnalyzingDocumentText([doc.fileName]))); + await message.flush(); + + const text = decodeTextDocument(doc).trim(); + if (!text) { + throw new Error(Environment.getDocumentIsEmptyText(doc.fileName)); + } + + messages.push({ + role: "user", + content: [ + { + type: "text", + text: Environment.getDocumentContentText(doc.fileName, text), + }, + ], + }); +} + +async function prepareMistralDocuments(downloads: AiDownloadedFile[], messages: any[], message: TelegramStreamMessage, target: AiRuntimeTarget, signal: AbortSignal): Promise<{ + documents: any[]; + libraryId?: string +}> { + const docs = downloads.filter(d => d.kind === "document"); + const result: any[] = []; + if (!docs.length) return {documents: result}; + + const mistralAi = createMistralClient(target); + const library: any = await mistralAi.beta.libraries.create({ + name: `tg-chat-bot-${Date.now()}`, + description: "Temporary library for document search", + }, {signal}); + const libraryId = library?.id; + if (!libraryId) { + throw new Error(Environment.mistralLibraryIdMissingText); + } + + try { + for (const doc of docs) { + if (signal.aborted) throw new Error("Aborted"); + + if (isPlainTextDocument(doc)) { + await appendMistralTextDocument(doc, messages, message); + continue; + } + + message.setStatus(prepareTelegramMarkdownV2(Environment.getAnalyzingDocumentText([doc.fileName]))); + await message.flush(); + + const uploaded: any = await mistralAi.beta.libraries.documents.upload({ + libraryId, + requestBody: { + file: await openAsBlob(doc.path), + }, + }, {signal}); + + const documentId = uploaded?.id ?? uploaded?.document_id; + if (!documentId) { + throw new Error(Environment.getMistralUploadedDocumentIdMissingText(doc.fileName)); + } + + let processed = false; + for (let i = 0; i < 90; i++) { + const info: any = await mistralAi.beta.libraries.documents.status({libraryId, documentId}, {signal}); + const status = info.status ?? info.process_status ?? info.processing_status; + + if (status === "processed" || status === "Completed" || status === "done" || status === "Done") { + processed = true; + break; + } + if (status === "failed" || status === "error" || status === "Failed" || status === "Error" || status === "missing_content") { + throw new Error(Environment.getMistralDocumentProcessingFailedText(doc.fileName, status)); + } + await delay(2000, signal); + } + + if (!processed) { + throw new Error(Environment.getMistralDocumentProcessingTimedOutText(doc.fileName)); + } + + result.push({type: "file", id: libraryId}); + } + + return {documents: result, libraryId}; + } catch (e) { + await deleteMistralLibrary(libraryId, target); + throw e; + } +} + +async function executeTool( + toolCall: ToolCallData, + message: TelegramStreamMessage, + context: ToolRuntimeContext, +): Promise { + await message.flush(); + const result = await executeToolCall(toolCall.name, safeJsonParseObject(toolCall.argumentsText), context); + await sendToolArtifacts(toolCall, result, message); + return result; +} + +function toolResourceKeys(toolCall: ToolCallData): string[] { + const args = safeJsonParseObject(toolCall.argumentsText); + const pathValue = typeof args.path === "string" ? args.path : undefined; + const sourcePath = typeof args.sourcePath === "string" ? args.sourcePath : undefined; + const targetPath = typeof args.targetPath === "string" ? args.targetPath : undefined; + + switch (toolCall.name) { + case "get_datetime": + case "web_search": + case "get_weather": + case "read_file": + case "list_directory": + return []; + case "create_file": + case "create_directory": + case "update_file": + case "delete_path": + return [`file:${pathValue ?? "*"}`]; + case "copy_path": + case "rename_path": + return [`file:${sourcePath ?? "*"}`, `file:${targetPath ?? "*"}`]; + case "shell_execute": + return ["shell:*"]; + default: + return [`tool:${toolCall.name}`]; + } +} + +async function runWithToolLocks(keys: string[], task: () => Promise): Promise { + const uniqueKeys = [...new Set(keys)].sort(); + const run = (index: number): Promise => { + const key = uniqueKeys[index]; + if (!key) return task(); + return toolResourceLocks.runExclusive(key, () => run(index + 1)); + }; + + return run(0); +} + +async function executeScheduledTool( + toolCall: ToolCallData, + message: TelegramStreamMessage, + context: ToolRuntimeContext, +): Promise { + const keys = toolResourceKeys(toolCall); + if (!keys.length) return executeTool(toolCall, message, context); + return runWithToolLocks(keys, () => executeTool(toolCall, message, context)); +} + +async function executeToolBatch( + toolCalls: ToolCallData[], + message: TelegramStreamMessage, + context: ToolRuntimeContext, +): Promise { + if (!toolCalls.length) return []; + + message.setStatus(Environment.getUseToolText(toolCalls)); + await message.flush(); + + const result = await Promise.all(toolCalls.map(async toolCall => { + message.setStatus(Environment.getUseToolText(toolCalls)); + await message.flush(); + const result = await executeScheduledTool(toolCall, message, context); + message.setStatus(Environment.getUseToolText(toolCalls)); + await message.flush(); + return stringifyToolExecutionResult(result); + })); + message.setStatus(Environment.getUseToolText(toolCalls)); + await message.flush(); + + return result; +} + +function appendOllamaToolResults(messages: ChatMessage[], calls: ToolCallData[], results: string[]): void { + for (const [index, call] of calls.entries()) { + messages.push({ + role: "tool", + content: results[index] ?? "", + tool_name: call.name + }); + } +} + +function stringifyToolExecutionResult(result: unknown): string { + return typeof result === "string" ? result : JSON.stringify(result); +} + +async function appendMistralToolResult( + messages: MistralChatMessage[], + call: ToolCallData, + streamMessage: TelegramStreamMessage, + context: ToolRuntimeContext, +): Promise { + const result = await executeTool(call, streamMessage, context); + messages.push({ + role: "tool", + name: call.name, + toolCallId: call.id, + content: stringifyToolExecutionResult(result), + }); +} + +function getOpenAIResponsesToolsWithImage(config: RuntimeConfigSnapshot): any[] { + return [ + ...getOpenAIResponsesTools(), + { + type: "image_generation", + model: config.openAiImageTarget.model, + size: "auto", + moderation: "low", + output_format: "png", + partial_images: OPENAI_IMAGE_PARTIALS, + }, + ]; +} + +function collectOpenAiResponseText(response: any): string { + if (typeof response?.output_text === "string") return response.output_text; + + return (response?.output ?? []) + .filter((item: any) => item?.type === "message") + .flatMap((item: any) => item.content ?? []) + .map((content: any) => content?.text ?? content?.refusal ?? "") + .join(""); +} + +function collectOpenAiResponseFunctionCalls(response: any): OpenAiResponsesFunctionCall[] { + return (response?.output ?? []) + .filter((item: any) => item?.type === "function_call" && item.call_id && item.name) + .map((item: any) => ({ + callId: item.call_id, + name: item.name, + argumentsText: item.arguments ?? "{}", + })); +} + +function collectOpenAiResponseImages(response: any): string[] { + return (response?.output ?? []) + .filter((item: any) => item?.type === "image_generation_call" && item.result) + .map((item: any) => item.result); +} + +function writeOpenAiGeneratedImage(sourceMessage: Message, b64: string, label: string): Buffer { + const imageBuffer = Buffer.from(b64, "base64"); + const fileName = `${sourceMessage.chat.id}_${sourceMessage.message_id}_${Date.now()}_${label}.png`; + fs.writeFileSync(path.join(photoGenDir, fileName), imageBuffer); + return imageBuffer; +} + +async function showOpenAiGeneratedImage( + streamMessage: TelegramStreamMessage, + sourceMessage: Message, + b64: string, + label: string, + status: string, + final: boolean, +): Promise { + const imageBuffer = writeOpenAiGeneratedImage(sourceMessage, b64, label); + if (final && !streamMessage.getText().trim()) { + streamMessage.replaceText(status); + streamMessage.clearStatus(); + } else { + streamMessage.setStatus(status); + } + await streamMessage.showImage(imageBuffer); +} + +async function runOpenAi( + messages: OpenAIChatMessage[], + streamMessage: TelegramStreamMessage, + signal: AbortSignal, + stream: boolean, + firstRoundStatus: string, + sourceMessage: Message, + config: RuntimeConfigSnapshot, + toolContext: ToolRuntimeContext, +): Promise { + firstRoundStatus; + let responseInput = [...messages]; + const openAi = createOpenAiClient(config.openAiChatTarget); + + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + // const status = roundStatus(round, firstRoundStatus); + // if (!status) { + // streamMessage.clearStatus(); + // } else { + // streamMessage.setStatus(status); + // } + + // streamMessage.setStatus(roundStatus(round, firstRoundStatus)); + // await streamMessage.flush(); + + if (!stream) { + const response = await openAi.responses.create({ + model: config.openAiChatTarget.model, + input: responseInput, + tools: getOpenAIResponsesToolsWithImage(config), + parallel_tool_calls: true, + // instructions: systemPrompt, + }, {signal}); + + streamMessage.append(collectOpenAiResponseText(response)); + const images = collectOpenAiResponseImages(response); + if (images.length) { + await showOpenAiGeneratedImage( + streamMessage, + sourceMessage, + images[images.length - 1], + `final_${round}`, + Environment.getImageGenDoneText(config.openAiImageTarget.model), + true, + ); + } + + const calls = collectOpenAiResponseFunctionCalls(response); + if (!calls.length) return; + + const toolOutputs = []; + for (const call of calls) { + const result = await executeTool({ + id: call.callId, + name: call.name, + argumentsText: call.argumentsText + }, streamMessage, toolContext); + toolOutputs.push({ + type: "function_call_output", + call_id: call.callId, + output: typeof result === "string" ? result : JSON.stringify(result), + }); + } + // @ts-ignore + messages = [...messages, ...(response.output ?? []), ...toolOutputs]; + responseInput = [...messages]; + continue; + } + + let completedResponse: any = null; + const response = await openAi.responses.create({ + model: config.openAiChatTarget.model, + input: responseInput, + stream: true, + tools: getOpenAIResponsesToolsWithImage(config), + parallel_tool_calls: true, + }, {signal}); + + + let localToolCalls: ToolCallData[] = []; + for await (const event of response) { + if (signal.aborted) throw new Error("Aborted"); + + switch (event.type) { + case "response.output_text.delta": + streamMessage.append(event.delta ?? ""); + break; + case "response.image_generation_call.in_progress": + streamMessage.setStatus(Environment.startingImageGenText); + await streamMessage.flush(); + break; + case "response.image_generation_call.generating": + streamMessage.setStatus(Environment.imageGenText); + await streamMessage.flush(); + break; + case "response.image_generation_call.partial_image": { + const iteration = (event.partial_image_index ?? 0) + 1; + await showOpenAiGeneratedImage( + streamMessage, + sourceMessage, + event.partial_image_b64, + `partial_${round}_${iteration}`, + Environment.getPartialImageGenText(iteration, OPENAI_IMAGE_PARTIALS), + false, + ); + break; + } + case "response.image_generation_call.completed": + streamMessage.setStatus(Environment.finalizingImageGenText); + await streamMessage.flush(); + break; + case "response.output_item.added": + if (event.item.type === "function_call" && event.item.name) { + localToolCalls.push({ + id: event.item.id, + name: event.item.name, + argumentsText: event.item.arguments, + }); + + streamMessage.setStatus(Environment.getUseToolText(localToolCalls)); + await streamMessage.flush(); + } + break; + case "response.output_item.done": + if (event.item.type === "function_call" && event.item.name) { + const index = localToolCalls.findIndex(c => c.id === event.item.id); + if (index !== -1) { + localToolCalls.splice(index, 1); + if (localToolCalls.length === 0) { + streamMessage.clearStatus(); + } else { + streamMessage.setStatus(Environment.getUseToolText(localToolCalls)); + } + await streamMessage.flush(); + } + } + break; + case "response.function_call_arguments.delta": + break; + case "response.function_call_arguments.done": + break; + + case "response.completed": + completedResponse = event.response; + break; + case "response.failed": + throw new Error(event.response?.error?.message ?? "OpenAI response failed"); + case "error": + throw new Error(event.message ?? event?.message ?? "OpenAI stream error"); + } + } + + if (!completedResponse) throw new Error("OpenAI did not return the final response.completed event."); + + const images = collectOpenAiResponseImages(completedResponse); + if (images.length) { + await showOpenAiGeneratedImage( + streamMessage, + sourceMessage, + images[images.length - 1], + `final_${round}`, + Environment.getImageGenDoneText(config.openAiImageTarget.model), + true, + ); + } + + const calls = collectOpenAiResponseFunctionCalls(completedResponse); + if (!calls.length) return; + + const toolOutputs = []; + for (const call of calls) { + const result = await executeTool({ + id: call.callId, + name: call.name, + argumentsText: call.argumentsText + }, streamMessage, toolContext); + toolOutputs.push({ + type: "function_call_output", + call_id: call.callId, + output: typeof result === "string" ? result : JSON.stringify(result), + }); + } + responseInput = [...responseInput, ...(completedResponse.output ?? []), ...toolOutputs]; + } +} + +async function runOllama( + msg: Message, + messages: ChatMessage[], + streamMessage: TelegramStreamMessage, + signal: AbortSignal, + stream: boolean, + think: Think, + firstRoundStatus: string, + config: RuntimeConfigSnapshot, + toolContext: ToolRuntimeContext, + contextSize?: number, +): Promise { + const fromId = msg.from?.id; + + const audioCount = messages.filter(m => m.audios?.length).flatMap(m => m.audios ?? []).length; + const videoNoteCount = messages.filter(m => m.videoNotes?.length).flatMap(m => m.videoNotes ?? []).length; + const imageCount = messages.filter(m => m.images?.length).flatMap(m => m.images ?? []).length; + + const target = (audioCount || videoNoteCount) ? config.ollamaAudioTarget : + imageCount ? config.ollamaVisionTarget : + think ? config.ollamaThinkingTarget : config.ollamaChatTarget; + const model = target.model; + const ollama = createOllamaClient(target); + const modelInfo = await ollama.show({model: model}); + const contextKey = Object.keys(modelInfo.model_info).find(k => k.endsWith(".context_length")); + // @ts-ignore + const maxContextLength = contextKey ? modelInfo?.model_info?.[contextKey] : 32768; + const context = clamp(contextSize ?? DEFAULT_OLLAMA_CONTEXT_SIZE, MIN_OLLAMA_CONTEXT_SIZE, maxContextLength ?? 32768); + + console.log("MESSAGES", messages); + + await unloadAllOllamaModels(ollama, [model, config.ollamaEmbeddingModel]); + + if (!(await isOllamaModelActive(target))) { + const currentStatus = streamMessage.getStatus(); + streamMessage.setStatus(Environment.getLoadingModelText(model)); + await streamMessage.flush(); + if (await loadOllamaModel(model, ollama, context)) { + streamMessage.setStatus(currentStatus ?? Environment.waitThinkText); + await streamMessage.flush(); + } + } + + // console.log("STREAM_MESSAGE", JSON.stringify(streamMessage)); + + let interval: NodeJS.Timeout | null = null; + + if (!stream) { + let typingInFlight = false; + const applyTyping = async () => { + if (typingInFlight) return; + typingInFlight = true; + try { + await enqueueTelegramApiCall( + () => bot.sendChatAction({chat_id: msg.chat.id, action: "typing"}), + {method: "sendChatAction", chatId: msg.chat.id, chatType: msg.chat.type} + ).catch(logError); + } finally { + typingInFlight = false; + } + }; + + await applyTyping(); + interval = setInterval(() => { + applyTyping().catch(logError); + }, 5000); + } + + try { + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + // if (round === 0) { + // streamMessage.setStatus(Environment.weightsLoadingText); + // await streamMessage.flush(); + // } else { + // streamMessage.setStatus(roundStatus(round, firstRoundStatus)); + // await streamMessage.flush(); + // } + + console.log("MAX_CONTENT_LENGTH", context); + + const request: ChatRequest = { + model: model, + messages: messages, + think: audioCount ? false : think, + options: { + temperature: messages.length <= 2 ? 0 : 0.6, + num_ctx: context, + } + }; + + if ((await getModelCapabilities(AiProvider.OLLAMA, model, "tools"))?.tools?.supported) { + if (fromId !== Environment.CREATOR_ID) { + request.tools = [ + getCurrentDateTimeTool, + getMarketRatesTool, + getWeatherTool + ]; + } else { + request.tools = getOllamaTools() as Tool[]; + } + } + + if (!stream) { + const response = await ollama.chat({ + ...request, + stream: false + }); + + const message = response.message; + + streamMessage.append(message?.content ?? ""); + + const calls = (message?.tool_calls ?? []).map((c: any, i: number) => ({ + id: `ollama_${round}_${i}`, + name: c.function?.name, + argumentsText: JSON.stringify(c.function?.arguments ?? {}) + })); + + if (!calls.length) break; + messages.push({ + role: "assistant", + content: message?.content ?? "", + tool_calls: calls.map(c => ({ + function: { + name: c.name, + arguments: safeJsonParseObject(c.argumentsText), + }, + })), + }); + + appendOllamaToolResults(messages, calls, await executeToolBatch(calls, streamMessage, toolContext)); + continue; + } + + const response = await ollama.chat({ + ...request, + stream: true + }); + + const calls: ToolCallData[] = []; + const abortOllamaResponse = () => response.abort?.(); + signal.addEventListener("abort", abortOllamaResponse, {once: true}); + if (signal.aborted) abortOllamaResponse(); + try { + for await (const chunk of response) { + const localToolCalls: ToolCallData[] = []; + + if (chunk.message.tool_calls) { + for (const [i, call] of (chunk.message?.tool_calls ?? []).entries()) { + localToolCalls.push({ + id: `ollama_${round}_${i}`, + name: call.function?.name, + argumentsText: JSON.stringify(call.function?.arguments ?? {}) + }); + } + } + + const newStatus = roundStatus(round, firstRoundStatus, chunk.message.content, localToolCalls, !!chunk.message.thinking); + const previousStatus = streamMessage.getStatus(); + if (newStatus && newStatus !== Environment.waitThinkText) { + streamMessage.setStatus(newStatus); + } else { + streamMessage.clearStatus(); + } + + if (streamMessage.getStatus() !== previousStatus && previousStatus && newStatus !== Environment.waitThinkText) { + await streamMessage.flush(); + } + + // console.log("OLLAMA_CHUNK", JSON.stringify(chunk)); + + if (signal.aborted) { + response.abort?.(); + throw new Error("Aborted"); + } + + if (chunk.message?.thinking && streamMessage.getStatus() !== Environment.reasoningText) { + // streamMessage.setStatus(Environment.reasoningText); + // await streamMessage.flush(); + } else { + // if (!streamMessage.getText().trim().length) { + // if (streamMessage.getStatus() === Environment.reasoningText) { + // streamMessage.clearStatus(); + // await streamMessage.flush(); + // } else if (streamMessage.getStatus() !== Environment.waitThinkText) { + // streamMessage.setStatus(Environment.waitThinkText); + // await streamMessage.flush(); + // } + // } else { + // if (streamMessage.getStatus().length) { + // streamMessage.clearStatus(); + // await streamMessage.flush(); + // } + // } + + streamMessage.append(chunk.message?.content ?? ""); + } + + for (const [i, call] of (chunk.message?.tool_calls ?? []).entries()) { + calls.push({ + id: `ollama_${round}_${i}`, + name: call.function?.name, + argumentsText: JSON.stringify(call.function?.arguments ?? {}) + }); + } + + if (chunk.done) { + await streamMessage.flush(streamMessage.regenerateKeyboard(), true); + } + } + } finally { + signal.removeEventListener("abort", abortOllamaResponse); + } + if (!calls.length) break; + + messages.push({ + role: "assistant", + content: streamMessage.getText(), + tool_calls: calls.map(c => ({ + function: { + name: c.name, + arguments: safeJsonParseObject(c.argumentsText) + } + })) + }); + + appendOllamaToolResults(messages, calls, await executeToolBatch(calls, streamMessage, toolContext)); + } + } finally { + if (interval) clearInterval(interval); + } +} + +async function runMistral( + messages: MistralChatMessage[], + documents: any[], + streamMessage: TelegramStreamMessage, + signal: AbortSignal, + stream: boolean, + firstRoundStatus: string, + config: RuntimeConfigSnapshot, + toolContext: ToolRuntimeContext, +): Promise { + documents; + const mistralAi = createMistralClient(config.mistralChatTarget); + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + if (signal.aborted) throw new Error("Aborted"); + + streamMessage.setStatus(roundStatus(round, firstRoundStatus) ?? ""); + await streamMessage.flush(); + + if (!stream) { + const response = await mistralAi.chat.complete({ + model: config.mistralChatTarget.model, + messages: messages as any, + tools: getMistralTools() as any, + // documents: documents as any + }, {signal}); + const msg = response.choices?.[0]?.message; + streamMessage.append(typeof msg?.content === "string" ? msg.content : JSON.stringify(msg?.content ?? "")); + const calls = normalizeMistralToolCalls(msg?.toolCalls ?? []); + if (!calls.length) return; + messages.push({ + role: "assistant", + content: (msg?.content ?? "") as any, + toolCalls: msg?.toolCalls ?? [], + }); + for (const call of calls) await appendMistralToolResult(messages, call, streamMessage, toolContext); + continue; + } + + const streamResponse = await mistralAi.chat.stream({ + model: config.mistralChatTarget.model, + messages: messages as any, + tools: getMistralTools() as any, + // documents: documents as any + }, {signal}); + let calls: ToolCallData[] = []; + for await (const event of streamResponse) { + if (signal.aborted) throw new Error("Aborted"); + const choice = event.data?.choices?.[0]; + const delta = choice?.delta; + streamMessage.append(contentFromMistralDelta(delta)); + const deltaCalls = normalizeMistralToolCalls(delta?.toolCalls ?? []); + if (deltaCalls.length) calls = deltaCalls; + } + if (!calls.length) return; + messages.push({ + role: "assistant", + content: streamMessage.getText(), + toolCalls: calls.map(c => ({id: c.id, function: {name: c.name, arguments: c.argumentsText}})) + }); + for (const call of calls) await appendMistralToolResult(messages, call, streamMessage, toolContext); + } +} + +function openAiResponseContentToText(content: unknown): string { + if (typeof content === "string") return content; + if (!Array.isArray(content)) return ""; + return content.map((part: any) => part?.text ?? part?.content ?? part?.refusal ?? "").join(""); +} + +function openAiResponseMessagesToChatCompletions(messages: OpenAIChatMessage[]): any[] { + return messages.map((message: any) => { + if (message.role === "system" || message.role === "assistant") { + return { + role: message.role, + content: openAiResponseContentToText(message.content), + }; + } + + const content = Array.isArray(message.content) + ? message.content.map((part: any) => { + if (part.type === "input_image") { + return { + type: "image_url", + image_url: {url: part.image_url}, + }; + } + + return { + type: "text", + text: part.text ?? "", + }; + }) + : message.content; + + return {role: "user", content}; + }); +} + +function normalizeOpenAiChatToolCalls(toolCalls: any[] = []): ToolCallData[] { + return toolCalls.map((call, i) => ({ + id: call.id || `openai_chat_${Date.now()}_${i}`, + name: call.function?.name || call.name || "", + argumentsText: typeof call.function?.arguments === "string" + ? call.function.arguments + : JSON.stringify(call.function?.arguments ?? call.arguments ?? {}), + })).filter(call => call.name); +} + +function collectOpenAiChatStreamToolCalls(toolCalls: any[] = []): ToolCallData[] { + const byIndex = new Map(); + + for (const item of toolCalls) { + const index = item.index ?? byIndex.size; + const existing = byIndex.get(index) ?? { + id: item.id || `openai_chat_stream_${Date.now()}_${index}`, + name: "", + argumentsText: "", + }; + + if (item.id) existing.id = item.id; + if (item.function?.name) existing.name = item.function.name; + if (item.function?.arguments) existing.argumentsText += item.function.arguments; + byIndex.set(index, existing); + } + + return [...byIndex.values()].filter(call => call.name); +} + +async function appendOpenAiChatToolResults( + messages: any[], + calls: ToolCallData[], + results: string[], +): Promise { + for (const [index, call] of calls.entries()) { + messages.push({ + role: "tool", + tool_call_id: call.id, + content: results[index] ?? "", + }); + } +} + +async function runOpenAiCompatibleChat( + messages: OpenAIChatMessage[], + streamMessage: TelegramStreamMessage, + signal: AbortSignal, + stream: boolean, + firstRoundStatus: string, + config: RuntimeConfigSnapshot, + toolContext: ToolRuntimeContext, +): Promise { + const geminiOpenAi = createGeminiOpenAiClient(config.geminiChatTarget); + const chatMessages = openAiResponseMessagesToChatCompletions(messages); + + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + streamMessage.setStatus(roundStatus(round, firstRoundStatus) ?? ""); + await streamMessage.flush(); + + if (!stream) { + const response: any = await geminiOpenAi.chat.completions.create({ + model: config.geminiChatTarget.model, + messages: chatMessages, + tools: getOpenAITools() as any, + parallel_tool_calls: true, + temperature: chatMessages.length <= 2 ? 0 : 0.6, + }, {signal}); + const message = response.choices?.[0]?.message; + streamMessage.append(message?.content ?? ""); + const calls = normalizeOpenAiChatToolCalls(message?.tool_calls ?? []); + if (!calls.length) return; + + chatMessages.push({ + role: "assistant", + content: message?.content ?? "", + tool_calls: message?.tool_calls ?? [], + }); + await appendOpenAiChatToolResults(chatMessages, calls, await executeToolBatch(calls, streamMessage, toolContext)); + continue; + } + + const response: any = await geminiOpenAi.chat.completions.create({ + model: config.geminiChatTarget.model, + messages: chatMessages, + tools: getOpenAITools() as any, + parallel_tool_calls: true, + temperature: chatMessages.length <= 2 ? 0 : 0.6, + stream: true, + }, {signal}); + + const streamToolCalls: any[] = []; + for await (const chunk of response) { + if (signal.aborted) throw new Error("Aborted"); + const delta = chunk.choices?.[0]?.delta; + streamMessage.append(delta?.content ?? ""); + if (delta?.tool_calls?.length) { + streamToolCalls.push(...delta.tool_calls); + } + } + + const calls = collectOpenAiChatStreamToolCalls(streamToolCalls); + if (!calls.length) return; + + chatMessages.push({ + role: "assistant", + content: streamMessage.getText(), + tool_calls: calls.map(call => ({ + id: call.id, + type: "function", + function: { + name: call.name, + arguments: call.argumentsText, + }, + })), + }); + await appendOpenAiChatToolResults(chatMessages, calls, await executeToolBatch(calls, streamMessage, toolContext)); + } +} + +function collectGeminiResponseText(response: any): string { + if (typeof response?.text === "string") return response.text; + + return (response?.candidates ?? []) + .flatMap((candidate: any) => candidate?.content?.parts ?? []) + .map((part: any) => part?.text ?? "") + .join(""); +} + +function collectGeminiFunctionCalls(response: any): ToolCallData[] { + const calls = response?.functionCalls + ?? (response?.candidates ?? []).flatMap((candidate: any) => { + return (candidate?.content?.parts ?? []) + .map((part: any) => part?.functionCall) + .filter(Boolean); + }); + + return (calls ?? []).map((call: any, index: number) => ({ + id: call.id || `gemini_${Date.now()}_${index}`, + name: call.name ?? "", + argumentsText: JSON.stringify(call.args ?? call.arguments ?? {}), + })).filter((call: ToolCallData) => call.name); +} + +function mergeGeminiFunctionCalls(existing: ToolCallData[], next: ToolCallData[]): ToolCallData[] { + const merged = [...existing]; + for (const call of next) { + const index = merged.findIndex(item => item.id === call.id); + if (index === -1) { + merged.push(call); + } else { + merged[index] = call; + } + } + return merged; +} + +function appendGeminiToolRound(messages: GeminiMessage[], calls: ToolCallData[], results: string[]): void { + messages.push({ + role: "model", + parts: calls.map(call => ({ + functionCall: { + id: call.id, + name: call.name, + args: safeJsonParseObject(call.argumentsText), + }, + })), + }); + + messages.push({ + role: "user", + parts: calls.map((call, index) => ({ + functionResponse: { + id: call.id, + name: call.name, + response: {result: results[index] ?? ""}, + }, + })), + }); +} + +async function runGemini( + messages: GeminiMessage[], + streamMessage: TelegramStreamMessage, + signal: AbortSignal, + stream: boolean, + firstRoundStatus: string, + config: RuntimeConfigSnapshot, + toolContext: ToolRuntimeContext, +): Promise { + const geminiAi = createGoogleGenAiClient(config.geminiChatTarget); + + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + if (signal.aborted) throw new Error("Aborted"); + + streamMessage.setStatus(roundStatus(round, firstRoundStatus) ?? ""); + await streamMessage.flush(); + + const request = { + model: config.geminiChatTarget.model, + contents: messages as any, + config: { + tools: getGeminiTools() as any, + temperature: messages.length <= 2 ? 0 : 0.6, + abortSignal: signal, + }, + }; + + if (!stream) { + const response: any = await geminiAi.models.generateContent(request); + streamMessage.append(collectGeminiResponseText(response)); + const calls = collectGeminiFunctionCalls(response); + if (!calls.length) return; + + appendGeminiToolRound(messages, calls, await executeToolBatch(calls, streamMessage, toolContext)); + continue; + } + + const response: any = await geminiAi.models.generateContentStream(request); + let calls: ToolCallData[] = []; + for await (const chunk of response) { + if (signal.aborted) throw new Error("Aborted"); + streamMessage.append(collectGeminiResponseText(chunk)); + calls = mergeGeminiFunctionCalls(calls, collectGeminiFunctionCalls(chunk)); + } + + if (!calls.length) return; + appendGeminiToolRound(messages, calls, await executeToolBatch(calls, streamMessage, toolContext)); + } +} + +async function executeUnifiedAiRequest( + options: UnifiedRunOptions, + config: RuntimeConfigSnapshot, + downloads: AiDownloadedFile[], + controller: AbortController, + streamMessage: TelegramStreamMessage, +): Promise<{ mistralLibraryId?: string }> { + const { + chatMessages, + imageCount + } = await collectTextMessages( + options.msg, + options.text, + options.provider, + downloads, + config, + options.responseLanguage ?? DEFAULT_AI_RESPONSE_LANGUAGE, + ); + const firstRoundStatus = initialStatus(downloads, imageCount); + const toolContext = toolRuntimeContextFromDownloads(downloads); + + streamMessage.setStatus(firstRoundStatus); + await streamMessage.flush(); + + const hasDocument = downloads.some(d => d.kind === "document"); + if (hasDocument && options.provider !== AiProvider.MISTRAL && options.provider !== AiProvider.OLLAMA) { + throw new Error(Environment.documentsUnifiedRunnerUnsupportedText); + } + + let mistralLibraryId: string | undefined; + + const transcript = await transcribeAudioIfNeeded(options.provider, options.msg.from?.id, downloads, streamMessage, controller.signal).catch(e => { + if (downloads.some(isTranscribableAudioDownload)) throw e; + return ""; + }); + + if (transcript.trim()) { + if (options.voiceMode === AI_VOICE_MODE_TRANSCRIPT) { + streamMessage.replaceText(transcript.trim()); + await streamMessage.finish(); + return {mistralLibraryId}; + } + + appendTranscriptToChatMessages(chatMessages, options.provider, transcript); + stripAudioFromRunnerMessages(chatMessages); + } + + try { + const preparedMistral = options.provider === AiProvider.MISTRAL + ? await prepareMistralDocuments(downloads, chatMessages, streamMessage, config.mistralChatTarget, controller.signal) + : {documents: []}; + const documents = preparedMistral.documents; + mistralLibraryId = preparedMistral.libraryId; + + if (options.provider === AiProvider.OLLAMA) { + await prepareOllamaDocumentRag({ + downloads, + messages: chatMessages as OllamaChatMessage[], + userQuery: options.text, + message: streamMessage, + config: { + embeddingModel: config.ollamaDocumentsTarget.model, + embeddingClient: createOllamaClient(config.ollamaDocumentsTarget), + chunkSize: config.ollamaRagChunkSize, + chunkOverlap: config.ollamaRagChunkOverlap, + topK: config.ollamaRagTopK, + maxContextChars: config.ollamaRagMaxContextChars, + minScore: config.ollamaRagMinScore, + maxArchiveFiles: config.ollamaRagMaxArchiveFiles, + maxArchiveBytes: config.ollamaRagMaxArchiveBytes, + maxArchiveDepth: config.ollamaRagMaxArchiveDepth, + }, + }); + } + + switch (options.provider) { + case AiProvider.OPENAI: + await runOpenAi(chatMessages as OpenAIChatMessage[], streamMessage, controller.signal, options.stream ?? true, firstRoundStatus, options.msg, config, toolContext); + break; + case AiProvider.OLLAMA: + const currentModel = config.ollamaChatTarget.model; + if (currentModel?.includes("gpt-oss")) { + if (options.think) { + options.think = "high"; + } + } + + await runOllama(options.msg, chatMessages as ChatMessage[], streamMessage, controller.signal, ifTrue(options.stream), options.think ?? false, firstRoundStatus, config, toolContext, options.contextSize); + break; + case AiProvider.MISTRAL: + await runMistral(chatMessages as MistralChatMessage[], documents, streamMessage, controller.signal, options.stream ?? true, firstRoundStatus, config, toolContext); + break; + case AiProvider.GEMINI: + if (getGeminiApiMode(config.geminiChatTarget) === "openai") { + await runOpenAiCompatibleChat(chatMessages as OpenAIChatMessage[], streamMessage, controller.signal, options.stream ?? true, firstRoundStatus, config, toolContext); + } else { + await runGemini(chatMessages as GeminiMessage[], streamMessage, controller.signal, options.stream ?? true, firstRoundStatus, config, toolContext); + } + break; + } + + if (streamMessage.getText().length > TELEGRAM_LIMIT) { + streamMessage.replaceText(streamMessage.getText().slice(0, TELEGRAM_LIMIT - 3) + "..."); + } + await streamMessage.finish(); + await sendVoiceResponseIfNeeded(options, downloads, streamMessage.getText()); + + return {mistralLibraryId}; + } catch (e) { + if (mistralLibraryId) { + await deleteMistralLibrary(mistralLibraryId, config.mistralChatTarget); + } + throw e; + } +} + +export async function runUnifiedAi(options: UnifiedRunOptions): Promise { + const config = snapshotRuntimeConfig(); + options.responseLanguage ??= await resolveAiResponseLanguageForUser(options.msg.from?.id); + options.contextSize ??= await resolveAiContextSizeForUser(options.msg.from?.id); + options.voiceMode ??= await resolveAiVoiceModeForUser(options.msg.from?.id); + const requestedAttachmentKinds = await collectRequestedAttachmentKinds(options.msg); + + if (await rejectUnsupportedAttachments(options.provider, snapshotModel(options.provider, config), options.msg, config, requestedAttachmentKinds)) { + return; + } + + const cached = await collectCachedMessageAttachments(options.msg); + if (cached.missing.length) { + await replyToMessage({ + message: options.msg, + text: Environment.getAttachmentMissingFromCacheText(cached.missing[0].fileName), + }).catch(logError); + return; + } + + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), AI_REQUEST_TIMEOUT_MS); + const cancel = createAiCancelRequest({ + chatId: options.msg.chat.id, + fromId: options.msg.from?.id ?? 0, + provider: providerName(options.provider), + controller + }); + const streamMessage = new TelegramStreamMessage( + options.msg, + cancel.id, + ifTrue(options.stream), + options.voiceMode === AI_VOICE_MODE_TRANSCRIPT && hasAudioAttachmentKind(requestedAttachmentKinds) + ? undefined + : buildAiRegenerateCallbackData(options.provider, !!options.think), + options.targetMessage, + options.provider, + options.isGuestMsg + ); + cancel.onCancel = () => streamMessage.cancel(cancel.provider); + let mistralLibraryId: string | undefined; + const queueTarget = resolveAiRequestQueueTarget(options, config, requestedAttachmentKinds); + + try { + const queueMessage = await streamMessage.start(Environment.getAiQueueText(options.provider, 0)); + setAiCancelMessageId(cancel.id, queueMessage.message_id); + + await aiProviderRequestQueue.enqueue(queueTarget, { + signal: controller.signal, + onPositionChange: async requestsBefore => { + streamMessage.setStatus(Environment.getAiQueueText(options.provider, requestsBefore)); + await streamMessage.flush(); + }, + run: async () => { + const downloads = attachmentsToDownloadedFiles(cached.attachments); + try { + const result = await executeUnifiedAiRequest(options, config, downloads, controller, streamMessage); + mistralLibraryId = result.mistralLibraryId; + } finally { + cleanupDownloads(downloads); + } + }, + }); + } catch (e: any) { + if (controller.signal.aborted || String(e?.message ?? e).includes("Aborted")) { + streamMessage.replaceText(streamMessage.getText()); + await streamMessage.finish(); + } else { + await streamMessage.fail(e); + logError(e); + } + } finally { + clearTimeout(timeout); + finishAiRequest(cancel.id); + if (mistralLibraryId) { + await deleteMistralLibrary(mistralLibraryId, config.mistralChatTarget); + } + } +} diff --git a/src/model/ai-capability-info.ts b/src/model/ai-capability-info.ts index 345928a..291374a 100644 --- a/src/model/ai-capability-info.ts +++ b/src/model/ai-capability-info.ts @@ -1,5 +1,14 @@ +import {AiProvider} from "./ai-provider"; + +export type AiEndpointInfo = { + provider?: AiProvider; + baseUrl?: string; + external?: boolean; +}; + export type AiCapabilityInfo = { supported?: boolean, external?: boolean, - model?: string -}; \ No newline at end of file + model?: string, + endpoint?: AiEndpointInfo, +}; diff --git a/src/model/ai-model-capabilities.ts b/src/model/ai-model-capabilities.ts index 40ab9e6..4b8aadd 100644 --- a/src/model/ai-model-capabilities.ts +++ b/src/model/ai-model-capabilities.ts @@ -1,9 +1,14 @@ import {AiCapabilityInfo} from "./ai-capability-info"; export class AiModelCapabilities { - vision?: AiCapabilityInfo; - ocr?: AiCapabilityInfo; - thinking?: AiCapabilityInfo; - tools?: AiCapabilityInfo; - audio?: AiCapabilityInfo; -} \ No newline at end of file + vision: AiCapabilityInfo | undefined; + ocr: AiCapabilityInfo | undefined; + thinking: AiCapabilityInfo | undefined; + extendedThinking: AiCapabilityInfo | undefined; + tools: AiCapabilityInfo | undefined; + audio: AiCapabilityInfo | undefined; + documents: AiCapabilityInfo | undefined; + outputImages: AiCapabilityInfo | undefined; + speechToText: AiCapabilityInfo | undefined; + textToSpeech: AiCapabilityInfo | undefined; +}