ai: add unified runtime and provider adapters

This commit is contained in:
2026-05-10 22:52:35 +03:00
parent 4c2a5471df
commit 32c35f54aa
15 changed files with 4038 additions and 8 deletions
+213
View File
@@ -0,0 +1,213 @@
import {Mistral} from "@mistralai/mistralai";
import {GoogleGenAI} from "@google/genai";
import {Ollama} from "ollama";
import {OpenAI} from "openai";
import {Environment} from "../common/environment";
import {AiModelCapabilities} from "../model/ai-model-capabilities";
import {AiProvider} from "../model/ai-provider";
export type AiCapabilityName = keyof AiModelCapabilities;
export type AiRuntimePurpose = AiCapabilityName | "chat";
export type AiRuntimeTarget = {
provider: AiProvider;
purpose: AiRuntimePurpose;
model: string;
baseUrl?: string;
apiKey?: string;
};
export type GeminiApiMode = "google" | "openai";
const GEMINI_OPENAI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/";
const PURPOSE_SUFFIXES: Record<AiRuntimePurpose, string[]> = {
chat: ["CHAT"],
vision: ["VISION", "IMAGE"],
ocr: ["OCR", "VISION", "IMAGE"],
thinking: ["THINKING", "THINK"],
extendedThinking: ["EXTENDED_THINKING", "THINKING", "THINK"],
tools: ["TOOLS", "CHAT"],
audio: ["AUDIO"],
documents: ["DOCUMENTS", "RAG", "EMBEDDING"],
outputImages: ["OUTPUT_IMAGES", "IMAGE"],
speechToText: ["SPEECH_TO_TEXT", "TRANSCRIPTION", "STT", "AUDIO"],
textToSpeech: ["TEXT_TO_SPEECH", "TTS"],
};
function providerPrefix(provider: AiProvider): string {
return provider.toString();
}
function env(name: string): string | undefined {
return Environment.getOptionalConfigValue(name);
}
function firstEnv(names: string[]): string | undefined {
for (const name of names) {
const value = env(name);
if (value) return value;
}
return undefined;
}
function endpointEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] {
const prefix = providerPrefix(provider);
return PURPOSE_SUFFIXES[purpose].flatMap(suffix => [
`${prefix}_${suffix}_BASE_URL`,
`${prefix}_${suffix}_ENDPOINT`,
`${prefix}_${suffix}_ADDRESS`,
]);
}
function apiKeyEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] {
const prefix = providerPrefix(provider);
return PURPOSE_SUFFIXES[purpose].map(suffix => `${prefix}_${suffix}_API_KEY`);
}
function modelEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] {
const prefix = providerPrefix(provider);
return PURPOSE_SUFFIXES[purpose].map(suffix => `${prefix}_${suffix}_MODEL`);
}
export function getProviderBaseUrl(provider: AiProvider): string | undefined {
switch (provider) {
case AiProvider.OLLAMA:
return env("OLLAMA_ENDPOINT");
case AiProvider.GEMINI:
return env("GEMINI_BASE_URL") ?? env("GEMINI_ENDPOINT")
?? (Environment.GEMINI_API_MODE === "openai" ? GEMINI_OPENAI_BASE_URL : undefined);
case AiProvider.MISTRAL:
return env("MISTRAL_BASE_URL") ?? env("MISTRAL_ENDPOINT");
case AiProvider.OPENAI:
return env("OPENAI_BASE_URL") ?? env("OPENAI_ENDPOINT");
}
}
export function getProviderApiKey(provider: AiProvider): string | undefined {
switch (provider) {
case AiProvider.OLLAMA:
return Environment.OLLAMA_API_KEY;
case AiProvider.GEMINI:
return Environment.GEMINI_API_KEY;
case AiProvider.MISTRAL:
return Environment.MISTRAL_API_KEY;
case AiProvider.OPENAI:
return Environment.OPENAI_API_KEY;
}
}
export function getDefaultModelForPurpose(provider: AiProvider, purpose: AiRuntimePurpose): string {
switch (provider) {
case AiProvider.OLLAMA:
switch (purpose) {
case "vision":
case "ocr":
case "outputImages":
return Environment.OLLAMA_IMAGE_MODEL;
case "thinking":
case "extendedThinking":
return Environment.OLLAMA_THINK_MODEL;
case "audio":
case "speechToText":
return Environment.OLLAMA_AUDIO_MODEL;
case "documents":
return Environment.OLLAMA_EMBEDDING_MODEL;
default:
return Environment.OLLAMA_CHAT_MODEL;
}
case AiProvider.GEMINI:
switch (purpose) {
case "outputImages":
return Environment.GEMINI_IMAGE_MODEL;
case "speechToText":
return Environment.GEMINI_TRANSCRIPTION_MODEL;
case "textToSpeech":
return Environment.GEMINI_TTS_MODEL;
default:
return Environment.GEMINI_MODEL;
}
case AiProvider.MISTRAL:
switch (purpose) {
case "speechToText":
return Environment.MISTRAL_TRANSCRIPTION_MODEL;
case "textToSpeech":
return Environment.MISTRAL_TTS_MODEL || Environment.MISTRAL_MODEL;
default:
return Environment.MISTRAL_MODEL;
}
case AiProvider.OPENAI:
switch (purpose) {
case "outputImages":
return Environment.OPENAI_IMAGE_MODEL;
case "speechToText":
return Environment.OPENAI_TRANSCRIPTION_MODEL;
case "textToSpeech":
return Environment.OPENAI_TTS_MODEL;
default:
return Environment.OPENAI_MODEL;
}
}
}
export function resolveAiRuntimeTarget(
provider: AiProvider,
purpose: AiRuntimePurpose,
modelOverride?: string,
): AiRuntimeTarget {
const model = modelOverride
?? firstEnv(modelEnvNames(provider, purpose))
?? getDefaultModelForPurpose(provider, purpose);
const baseUrl = firstEnv(endpointEnvNames(provider, purpose)) ?? getProviderBaseUrl(provider);
const apiKey = firstEnv(apiKeyEnvNames(provider, purpose)) ?? getProviderApiKey(provider);
return {provider, purpose, model, baseUrl, apiKey};
}
export function sameRuntimeEndpoint(left: AiRuntimeTarget, right: AiRuntimeTarget): boolean {
return left.provider === right.provider
&& (left.baseUrl ?? "") === (right.baseUrl ?? "")
&& (left.apiKey ?? "") === (right.apiKey ?? "");
}
export function createOpenAiClient(target: AiRuntimeTarget): OpenAI {
return new OpenAI({
apiKey: target.apiKey,
baseURL: target.baseUrl,
});
}
export function getGeminiApiMode(target?: AiRuntimeTarget): GeminiApiMode {
if (Environment.GEMINI_API_MODE === "openai") return "openai";
if (Environment.GEMINI_API_MODE === "google") return "google";
if ((target?.baseUrl ?? "").includes("/openai")) return "openai";
return "google";
}
export function createGeminiOpenAiClient(target: AiRuntimeTarget): OpenAI {
return createOpenAiClient({
...target,
baseUrl: target.baseUrl ?? GEMINI_OPENAI_BASE_URL,
});
}
export function createGoogleGenAiClient(target: AiRuntimeTarget): GoogleGenAI {
return new GoogleGenAI({
apiKey: target.apiKey,
});
}
export function createMistralClient(target: AiRuntimeTarget): Mistral {
return new Mistral({
apiKey: target.apiKey,
serverURL: target.baseUrl,
});
}
export function createOllamaClient(target: AiRuntimeTarget): Ollama {
return new Ollama({
host: target.baseUrl?.endsWith(":11434") ? target.baseUrl : target.baseUrl + ":11434",
headers: target.apiKey ? {"Authorization": `Bearer ${target.apiKey}`} : undefined,
});
}
+55
View File
@@ -0,0 +1,55 @@
import {randomUUID} from "node:crypto";
export type AiCancelRequest = {
id: string;
chatId: number;
messageId?: number;
fromId: number;
provider: string;
controller: AbortController;
onCancel?: () => Promise<void> | void;
};
const requests = new Map<string, AiCancelRequest>();
export function createAiCancelRequest(params: Omit<AiCancelRequest, "id" | "controller"> & { controller?: AbortController }): AiCancelRequest {
const request: AiCancelRequest = {
id: randomUUID(),
controller: params.controller ?? new AbortController(),
chatId: params.chatId,
messageId: params.messageId,
fromId: params.fromId,
provider: params.provider,
onCancel: params.onCancel,
};
requests.set(request.id, request);
return request;
}
export function setAiCancelMessageId(id: string, messageId: number): void {
const request = requests.get(id);
if (request) request.messageId = messageId;
}
export function getAiCancelRequest(id: string): AiCancelRequest | undefined {
return requests.get(id);
}
export async function abortAiRequest(id: string): Promise<boolean> {
const request = requests.get(id);
if (!request) return false;
request.controller.abort();
try {
await request.onCancel?.();
} finally {
requests.delete(id);
}
return true;
}
export function finishAiRequest(id: string): void {
requests.delete(id);
}
+90
View File
@@ -0,0 +1,90 @@
import {AiToolCall} from "./tool-types";
import {OllamaChatMessage} from "./ollama-chat-message";
import {GeminiMessage} from "./gemini-chat-message";
import {MistralChatMessage} from "./mistral-chat-message";
import {MessageAudioPart, MessageImagePart} from "../common/message-part";
import {OpenAIChatMessage} from "./openai-chat-message";
export type ChatMessage = {
role: "system" | "user" | "assistant" | "tool";
content: string;
images?: string[];
imageParts?: MessageImagePart[];
documents?: string[];
audios?: string[];
audioParts?: MessageAudioPart[];
videos?: string[];
videoNotes?: string[];
thinking?: string;
tool_calls?: AiToolCall[];
tool_name?: string;
}
export function asOllamaChatMessage(message: ChatMessage): OllamaChatMessage {
return {
role: message.role,
content: message.content,
thinking: message.thinking,
images: message.images,
tool_calls: message.tool_calls,
tool_name: message.tool_name
};
}
// export function asGeminiChatMessage(message: ChatMessage): GeminiMessage {
// if (message.images) {
// return {
// role: message.role,
// content: message.images.map(() => {
// return {
// type: "image",
// };
// })
// };
// }
//
// return {
// role: message.role,
// content: {
// type: "text",
// text: message.content,
// },
// };
// }
export function asMistralChatMessage(message: ChatMessage): MistralChatMessage {
return {
role: message.role,
content: message.content,
};
}
// export function asOpenAIChatMessage(message: ChatMessage): OpenAIChatMessage {
// return {
//
// }
// }
/*
const messages: any[] = ordered.map(part => {
const content: any[] = [{
type: "input_text",
text: (Environment.USE_NAMES_IN_PROMPT && !part.bot ? `MESSAGE FROM USER \"${part.name}\":\n` : "") + part.content,
}];
if (!part.bot) {
for (const image of part.images ?? []) {
content.push({type: "input_image", image_url: `data:image/jpeg;base64,${image}`, detail: "auto"});
}
}
return {role: part.bot ? "assistant" : "user", content};
});
if (Environment.SYSTEM_PROMPT && Environment.USE_SYSTEM_PROMPT) {
messages.unshift({role: "system", content: Environment.SYSTEM_PROMPT});
}
return {parts: messages, imageCount};
*/
export type AiChatMessage = | OpenAIChatMessage | OllamaChatMessage | MistralChatMessage | GeminiMessage;
+84
View File
@@ -0,0 +1,84 @@
export type GeminiUserInputStep = {
type: "user_input";
content?: Array<GeminiContent>;
}
export type GeminiModelOutputStep = {
type: "model_output";
content?: Array<GeminiContent>;
}
export type GeminiFunctionCallStep = {
id: string;
arguments: {
[key: string]: unknown;
};
name: string;
type: "function_call";
signature?: string;
}
export type GeminiFunctionResultStep = {
call_id: string;
result: unknown | Array<GeminiTextContent | GeminiImageContent> | string;
type: "function_result";
is_error?: boolean;
name?: string;
signature?: string;
}
export type GeminiStep =
| GeminiUserInputStep
| GeminiModelOutputStep
| GeminiFunctionCallStep
| GeminiFunctionResultStep;
export type GeminiTextContent = {
text: string;
}
export type GeminiInlineContent = {
inlineData: {
data: string;
mimeType: string;
};
}
export type GeminiImageContent = GeminiInlineContent;
export type GeminiAudioContent = GeminiInlineContent;
export type GeminiDocumentContent = GeminiInlineContent;
export type GeminiVideoContent = GeminiInlineContent;
export type GeminiFunctionCallContent = {
functionCall: {
id?: string;
name?: string;
args?: Record<string, unknown>;
};
}
export type GeminiFunctionResponseContent = {
functionResponse: {
id?: string;
name?: string;
response: Record<string, unknown>;
};
}
export type GeminiContent =
| GeminiTextContent
| GeminiInlineContent
| GeminiFunctionCallContent
| GeminiFunctionResponseContent;
export type GeminiTurn = {
content?: Array<GeminiContent> | GeminiContent;
role?: string;
}
export type GeminiInput = string | Array<GeminiStep> | Array<GeminiContent> | GeminiContent;
export type GeminiMessage = {
role: "user" | "model";
parts: GeminiContent[];
};
+112
View File
@@ -0,0 +1,112 @@
export const MistralImageDetail = {
Low: "low",
Auto: "auto",
High: "high",
} as const;
export type MistralImageDetail = OpenEnum<typeof MistralImageDetail>;
declare const __brand: unique symbol;
export type Unrecognized<T> = T & { [__brand]: "unrecognized" };
export type OpenEnum<T extends Readonly<Record<string, string | number>>> =
| T[keyof T]
| Unrecognized<T[keyof T] extends number ? number : string>;
export const BuiltInConnectors = {
WebSearch: "web_search",
WebSearchPremium: "web_search_premium",
CodeInterpreter: "code_interpreter",
ImageGeneration: "image_generation",
DocumentLibrary: "document_library",
} as const;
export type BuiltInConnectors = OpenEnum<typeof BuiltInConnectors>;
export type MistralTextChunk = {
type: "text";
text: string;
};
export type MistralToolReferenceChunk = {
type: "tool_reference" | undefined;
tool: BuiltInConnectors | string;
title: string;
url?: string | null | undefined;
favicon?: string | null | undefined;
description?: string | null | undefined;
};
export type MistralThinkChunk = {
type: "thinking";
thinking: Array<MistralToolReferenceChunk | MistralTextChunk>;
signature?: string | null | undefined;
closed?: boolean | undefined;
};
export type MistralImageURLChunk = {
type: "image_url";
imageUrl: string | {
url: string;
detail?: MistralImageDetail | null | undefined;
};
}
export type MistralContentChunk =
| MistralTextChunk
| MistralThinkChunk
| MistralImageURLChunk
/*
| (ImageURLChunk & { type: "image_url" })
| (DocumentURLChunk & { type: "document_url" })
| (TextChunk & { type: "text" })
| (ReferenceChunk & { type: "reference" })
| (FileChunk & { type: "file" })
| (ThinkChunk & { type: "thinking" })
| AudioChunk
*/
export type MistralFunctionCall = {
name: string;
arguments: { [k: string]: any } | string;
};
export type MistralToolCall = {
id?: string | undefined;
type?: string | undefined;
function: MistralFunctionCall;
index?: number | undefined;
};
export type MistralAssistantMessage = {
role: "assistant";
content?: string | Array<MistralContentChunk> | null | undefined;
toolCalls?: Array<MistralToolCall> | null | undefined;
prefix?: boolean | undefined;
}
export type MistralSystemMessageContentChunks =
| MistralTextChunk
| MistralThinkChunk;
export type MistralSystemMessage = {
role: "system";
content: string;
}
export type MistralToolMessage = {
role: "tool";
content: string | Array<MistralContentChunk> | null;
toolCallId?: string | null | undefined;
name?: string | null | undefined;
};
export type MistralUserMessage = {
role: "user";
content: string | Array<MistralContentChunk> | null;
};
export type MistralChatMessage =
| MistralAssistantMessage
| MistralSystemMessage
| MistralToolMessage
| MistralUserMessage
+3
View File
@@ -0,0 +1,3 @@
import {Message} from "ollama";
export type OllamaChatMessage = Message;
+3
View File
@@ -0,0 +1,3 @@
import {ResponseInputItem} from "openai/resources/responses/responses";
export type OpenAIChatMessage = ResponseInputItem
+325
View File
@@ -0,0 +1,325 @@
import {AiProvider} from "../model/ai-provider";
import {AiModelCapabilities} from "../model/ai-model-capabilities";
import {Environment} from "../common/environment";
import {logError} from "../util/utils";
import {AiCapabilityInfo} from "../model/ai-capability-info";
import {isOllamaSpeechToTextModel} from "./speech-to-text-models";
import {
AiCapabilityName,
AiRuntimeTarget,
createGeminiOpenAiClient,
createGoogleGenAiClient,
createMistralClient,
createOllamaClient,
createOpenAiClient,
getGeminiApiMode,
resolveAiRuntimeTarget,
sameRuntimeEndpoint,
} from "./ai-runtime-target";
export type RuntimeModelInfo = {
provider: AiProvider;
model: string;
capabilities: AiModelCapabilities;
};
const CAPABILITY_NAMES: AiCapabilityName[] = [
"vision",
"ocr",
"thinking",
"extendedThinking",
"tools",
"audio",
"documents",
"outputImages",
"speechToText",
"textToSpeech",
];
export function getRuntimeModel(provider: AiProvider): string {
switch (provider) {
case AiProvider.OLLAMA:
return Environment.OLLAMA_CHAT_MODEL;
case AiProvider.GEMINI:
return Environment.GEMINI_MODEL;
case AiProvider.MISTRAL:
return Environment.MISTRAL_MODEL;
case AiProvider.OPENAI:
return Environment.OPENAI_MODEL;
}
}
export function setRuntimeModel(provider: AiProvider, model: string): void {
switch (provider) {
case AiProvider.OLLAMA:
Environment.OLLAMA_CHAT_MODEL = model;
break;
case AiProvider.GEMINI:
Environment.GEMINI_MODEL = model;
break;
case AiProvider.MISTRAL:
Environment.MISTRAL_MODEL = model;
break;
case AiProvider.OPENAI:
Environment.OPENAI_MODEL = model;
break;
}
}
function capability(supported: boolean, target?: AiRuntimeTarget, runtimeTarget?: AiRuntimeTarget): AiCapabilityInfo {
const result: AiCapabilityInfo = {supported};
if (target?.model) result.model = target.model;
if (target) {
result.endpoint = {
provider: target.provider,
baseUrl: target.baseUrl,
external: runtimeTarget ? !sameRuntimeEndpoint(target, runtimeTarget) : false,
};
}
if (target && runtimeTarget && (target.model !== runtimeTarget.model || !sameRuntimeEndpoint(target, runtimeTarget))) {
result.external = true;
}
return result;
}
function buildCapabilities(overrides: Partial<Record<AiCapabilityName, AiCapabilityInfo>>): AiModelCapabilities {
return Object.assign(new AiModelCapabilities(), {
vision: {supported: false},
ocr: {supported: false},
thinking: {supported: false},
extendedThinking: {supported: false},
tools: {supported: false},
audio: {supported: false},
documents: {supported: false},
outputImages: {supported: false},
speechToText: {supported: false},
textToSpeech: {supported: false},
...overrides,
});
}
function lowerModelName(model: string): string {
return model.toLowerCase();
}
function isOpenAiTextModel(model: string): boolean {
const name = lowerModelName(model);
if (!name) return false;
if (/^(gpt-image|dall-e|tts-|whisper|text-embedding|text-moderation|omni-moderation)/.test(name)) return false;
if (name.includes("transcribe")) return false;
return /^(gpt-|o\d|chatgpt-|codex-|computer-use)/.test(name);
}
function isOpenAiReasoningModel(model: string): boolean {
const name = lowerModelName(model);
return /^o\d/.test(name) || name.startsWith("gpt-5");
}
function isOpenAiVisionModel(model: string): boolean {
const name = lowerModelName(model);
if (!isOpenAiTextModel(model)) return false;
if (name.startsWith("gpt-3.5")) return false;
if (name.includes("audio-preview") || name.includes("search-preview")) return false;
return true;
}
function isGeminiNonChatModel(model: string): boolean {
const name = lowerModelName(model);
return name.includes("lyria") || name.includes("-tts") || name.includes("image-preview") || name.endsWith("-image");
}
function geminiSupportsAudioInput(model: string): boolean {
const name = lowerModelName(model);
return name.startsWith("gemini-") && !isGeminiNonChatModel(model);
}
export async function getModelCapabilities(
provider: AiProvider,
model: string,
purpose: AiCapabilityName | "chat" = "chat",
): Promise<AiModelCapabilities | undefined> {
if (!model) return undefined;
try {
const runtimeTarget = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider));
const target = resolveAiRuntimeTarget(provider, purpose, model);
switch (provider) {
case AiProvider.OLLAMA: {
const ollama = createOllamaClient(target);
const info = await ollama.show({model});
const modelCapabilities = Array.isArray(info.capabilities) ? info.capabilities : [];
const has = (cap: string): boolean => modelCapabilities.includes(cap);
const audioSupported = isOllamaSpeechToTextModel(model);
const documentsTarget = resolveAiRuntimeTarget(provider, "documents");
return buildCapabilities({
vision: capability(has("vision"), target, runtimeTarget),
ocr: capability(has("ocr"), target, runtimeTarget),
thinking: capability(has("thinking"), target, runtimeTarget),
extendedThinking: capability(has("thinking") && model.includes("gpt-oss"), target, runtimeTarget),
tools: capability(has("tools"), target, runtimeTarget),
audio: capability(audioSupported, target, runtimeTarget),
documents: capability(!!documentsTarget.model, documentsTarget, runtimeTarget),
speechToText: capability(audioSupported, target, runtimeTarget),
});
}
case AiProvider.GEMINI: {
const chatLike = lowerModelName(model).startsWith("gemini-") && !isGeminiNonChatModel(model);
const reasoningModel = lowerModelName(model).includes("2.5") || lowerModelName(model).includes("thinking");
const imageTarget = resolveAiRuntimeTarget(provider, "outputImages");
const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
return buildCapabilities({
vision: capability(chatLike, target, runtimeTarget),
ocr: capability(chatLike, target, runtimeTarget),
thinking: capability(reasoningModel, target, runtimeTarget),
extendedThinking: capability(reasoningModel, target, runtimeTarget),
tools: capability(chatLike, target, runtimeTarget),
audio: capability(geminiSupportsAudioInput(model), target, runtimeTarget),
speechToText: capability(!!speechTarget.apiKey && geminiSupportsAudioInput(speechTarget.model), speechTarget, runtimeTarget),
outputImages: capability(!!imageTarget.apiKey && !!imageTarget.model, imageTarget, runtimeTarget),
textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
});
}
case AiProvider.MISTRAL: {
const mistral = createMistralClient(target);
const info = await mistral.models.retrieve({modelId: model});
const caps = info.type !== "UNKNOWN" ? info.capabilities : undefined;
const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
return buildCapabilities({
vision: capability(!!caps?.vision, target, runtimeTarget),
ocr: capability(!!caps?.ocr, target, runtimeTarget),
thinking: capability(!!caps?.reasoning, target, runtimeTarget),
tools: capability(!!caps?.functionCalling, target, runtimeTarget),
audio: capability(!!caps?.audio, target, runtimeTarget),
documents: capability(true, target, runtimeTarget),
speechToText: capability(!!speechTarget.model || !!caps?.audioTranscription, speechTarget, runtimeTarget),
textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
});
}
case AiProvider.OPENAI: {
const textModel = isOpenAiTextModel(model);
const reasoningModel = isOpenAiReasoningModel(model);
const imageTarget = resolveAiRuntimeTarget(provider, "outputImages");
const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
return buildCapabilities({
vision: capability(isOpenAiVisionModel(model), target, runtimeTarget),
ocr: capability(isOpenAiVisionModel(model), target, runtimeTarget),
thinking: capability(reasoningModel, target, runtimeTarget),
extendedThinking: capability(reasoningModel, target, runtimeTarget),
tools: capability(textModel, target, runtimeTarget),
outputImages: capability(!!imageTarget.model, imageTarget, runtimeTarget),
speechToText: capability(!!speechTarget.model, speechTarget, runtimeTarget),
textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
});
}
}
} catch (e) {
logError(e);
return undefined;
}
}
export async function getRuntimeCapabilities(
provider: AiProvider = Environment.DEFAULT_AI_PROVIDER,
model: string | undefined = getRuntimeModel(provider)
): Promise<AiModelCapabilities> {
const runtimeTarget = resolveAiRuntimeTarget(provider, "chat", model ?? getRuntimeModel(provider));
const result = await getModelCapabilities(provider, runtimeTarget.model, "chat") ?? buildCapabilities({});
for (const capabilityName of CAPABILITY_NAMES) {
const target = resolveAiRuntimeTarget(provider, capabilityName);
if (target.model === runtimeTarget.model && sameRuntimeEndpoint(target, runtimeTarget)) continue;
const targetCapabilities = await getModelCapabilities(provider, target.model, capabilityName);
const capabilityInfo = targetCapabilities?.[capabilityName];
if (capabilityInfo) {
result[capabilityName] = capabilityInfo;
}
}
return result;
}
export async function formatRuntimeModelInfo(
provider: AiProvider = Environment.DEFAULT_AI_PROVIDER,
model: string | undefined = getRuntimeModel(provider),
caps?: AiModelCapabilities
): Promise<string> {
if (!caps) caps = await getRuntimeCapabilities(provider, model);
const line = (title: string, value?: AiCapabilityInfo) => {
const state = value?.supported ? "✅" : "❌";
const external = value?.external ?? (!!value?.model && value.model !== model);
return Environment.getRuntimeCapabilityLineText({
state,
title,
model: value?.model,
endpointBaseUrl: value?.endpoint?.baseUrl,
external,
});
};
return Environment.getRuntimeModelInfoText(
provider.toString().toLowerCase(),
model,
[
line(Environment.runtimeCapabilityVisionText, caps.vision),
line(Environment.runtimeCapabilityOcrText, caps.ocr),
line(Environment.runtimeCapabilityThinkingText, caps.thinking),
line(Environment.runtimeCapabilityExtendedThinkingText, caps.extendedThinking),
line(Environment.runtimeCapabilityToolsText, caps.tools),
line(Environment.runtimeCapabilityAudioText, caps.audio),
line(Environment.runtimeCapabilitySpeechToTextText, caps.speechToText),
line(Environment.runtimeCapabilityTextToSpeechText, caps.textToSpeech),
line(Environment.runtimeCapabilityDocumentsText, caps.documents),
line(Environment.runtimeCapabilityOutputImagesText, caps.outputImages),
],
);
}
export async function listProviderModels(provider: AiProvider): Promise<string[]> {
const target = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider));
switch (provider) {
case AiProvider.OLLAMA: {
const ollama = createOllamaClient(target);
const result: any = await ollama.list();
return (result.models ?? []).map((m: any) => m.model || m.name).filter(Boolean);
}
case AiProvider.GEMINI: {
const models: string[] = [];
if (getGeminiApiMode(target) === "openai") {
const geminiAi = createGeminiOpenAiClient(target);
const iterable: any = await geminiAi.models.list();
for await (const model of iterable) models.push(model.name || model.id || String(model));
return models;
}
const geminiAi = createGoogleGenAiClient(target);
const iterable: any = await geminiAi.models.list();
for await (const model of iterable) {
const name = model.name || model.id || String(model);
models.push(String(name).replace(/^models\//, ""));
}
return models;
}
case AiProvider.MISTRAL: {
const mistralAi = createMistralClient(target);
const result: any = await mistralAi.models.list();
return (result.data ?? result.models ?? result ?? []).map((m: any) => m.id || m.name || String(m)).filter(Boolean);
}
case AiProvider.OPENAI: {
const openAi = createOpenAiClient(target);
const result: any = await openAi.models.list();
return (result.data ?? []).map((m: any) => m.id).filter(Boolean);
}
}
}
+184
View File
@@ -0,0 +1,184 @@
import {Environment} from "../common/environment";
import {AiProvider} from "../model/ai-provider";
export type AiRequestQueueTarget = {
provider: AiProvider;
model: string;
baseUrl?: string;
};
type QueueEntry<T> = {
target: AiRequestQueueTarget;
queueKey: string;
run: () => Promise<T>;
resolve: (value: T | PromiseLike<T>) => void;
reject: (reason?: unknown) => void;
onPositionChange: (requestsBefore: number) => Promise<void> | void;
signal?: AbortSignal;
abortHandler?: () => void;
started: boolean;
};
type EnqueueOptions<T> = {
signal?: AbortSignal;
onPositionChange: (requestsBefore: number) => Promise<void> | void;
run: () => Promise<T>;
};
class AiProviderRequestQueue {
private readonly waiting = new Map<string, Array<QueueEntry<any>>>();
private readonly active = new Map<string, number>();
enqueue<T>(target: AiRequestQueueTarget, options: EnqueueOptions<T>): Promise<T> {
if (options.signal?.aborted) {
return Promise.reject(new Error("Aborted"));
}
return new Promise<T>((resolve, reject) => {
const queueKey = this.queueKey(target);
const entry: QueueEntry<T> = {
target,
queueKey,
run: options.run,
resolve,
reject,
onPositionChange: options.onPositionChange,
signal: options.signal,
started: false,
};
entry.abortHandler = () => {
if (entry.started) return;
const removed = this.removeWaitingEntry(entry);
if (!removed) return;
reject(new Error("Aborted"));
this.schedule(target);
};
options.signal?.addEventListener("abort", entry.abortHandler, {once: true});
this.getOrCreateQueue(queueKey).push(entry);
this.schedule(target);
});
}
private getQueue(queueKey: string): Array<QueueEntry<any>> | undefined {
return this.waiting.get(queueKey);
}
private getOrCreateQueue(queueKey: string): Array<QueueEntry<any>> {
let queue = this.waiting.get(queueKey);
if (!queue) {
queue = [];
this.waiting.set(queueKey, queue);
}
return queue;
}
private activeCount(queueKey: string): number {
return this.active.get(queueKey) ?? 0;
}
private setActiveCount(queueKey: string, count: number): void {
if (count <= 0) {
this.active.delete(queueKey);
return;
}
this.active.set(queueKey, count);
}
private maxActive(target: AiRequestQueueTarget): number {
return Math.max(1, Environment.getAiProviderMaxConcurrentRequests(target.provider));
}
private normalizeBaseUrl(baseUrl: string | undefined): string {
return (baseUrl ?? "").trim().replace(/\/+$/, "");
}
private queueKey(target: AiRequestQueueTarget): string {
return JSON.stringify([
target.provider,
this.normalizeBaseUrl(target.baseUrl),
target.model.trim(),
]);
}
private removeWaitingEntry(entry: QueueEntry<any>): boolean {
const queue = this.getQueue(entry.queueKey);
if (!queue) return false;
const index = queue.indexOf(entry);
if (index < 0) return false;
queue.splice(index, 1);
if (entry.abortHandler) {
entry.signal?.removeEventListener("abort", entry.abortHandler);
}
this.deleteQueueIfIdle(entry.queueKey, queue);
return true;
}
private schedule(target: AiRequestQueueTarget): void {
const queueKey = this.queueKey(target);
const queue = this.getOrCreateQueue(queueKey);
while (queue.length && this.activeCount(queueKey) < this.maxActive(target)) {
const entry = queue.shift();
if (!entry) continue;
if (entry.abortHandler) {
entry.signal?.removeEventListener("abort", entry.abortHandler);
}
if (entry.signal?.aborted) {
entry.reject(new Error("Aborted"));
continue;
}
entry.started = true;
this.setActiveCount(queueKey, this.activeCount(queueKey) + 1);
void this.runEntry(entry);
}
this.updateWaitingMessages(target);
if (!queue.length && this.activeCount(queueKey) <= 0) {
this.waiting.delete(queueKey);
}
}
private async runEntry(entry: QueueEntry<any>): Promise<void> {
try {
entry.resolve(await entry.run());
} catch (e) {
entry.reject(e);
} finally {
this.setActiveCount(entry.queueKey, this.activeCount(entry.queueKey) - 1);
this.schedule(entry.target);
}
}
private updateWaitingMessages(target: AiRequestQueueTarget): void {
const queueKey = this.queueKey(target);
const active = this.activeCount(queueKey);
const queue = [...(this.getQueue(queueKey) ?? [])];
Promise.allSettled(queue.map((entry, index) => {
return entry.onPositionChange(active + index);
})).then(results => {
for (const result of results) {
if (result.status === "rejected") {
console.error(result.reason);
}
}
}).catch(console.error);
}
private deleteQueueIfIdle(queueKey: string, queue: Array<QueueEntry<any>>): void {
if (!queue.length && this.activeCount(queueKey) <= 0) {
this.waiting.delete(queueKey);
}
}
}
export const aiProviderRequestQueue = new AiProviderRequestQueue();
+24
View File
@@ -0,0 +1,24 @@
import {AiProvider} from "../model/ai-provider";
export const AI_REGENERATE_CALLBACK = "/regenerate_ai";
export type AiRegenerateCallbackData = {
provider: AiProvider;
think: boolean;
};
export function buildAiRegenerateCallbackData(provider: AiProvider, think = false): string {
return `${AI_REGENERATE_CALLBACK} ${provider} ${think ? "1" : "0"}`;
}
export function parseAiRegenerateCallbackData(data: string): AiRegenerateCallbackData | null {
if (!data.startsWith(AI_REGENERATE_CALLBACK)) return null;
const [, provider, think] = data.split(/\s+/);
if (!Object.values(AiProvider).includes(provider as AiProvider)) return null;
return {
provider: provider as AiProvider,
think: think === "1" || think === "true",
};
}
+227
View File
@@ -0,0 +1,227 @@
import {Message} from "typescript-telegram-bot-api";
import {bot} from "../index";
import {downloadTelegramFile, logError} from "../util/utils";
import fs from "node:fs";
import path from "node:path";
import {Environment} from "../common/environment";
import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment";
import {performFFmpeg} from "../util/ffmpeg";
import ffmpeg from "fluent-ffmpeg";
import {AsyncSemaphore, KeyedAsyncLock} from "../util/async-lock";
export type AiDownloadedFile = {
kind: StoredAttachmentKind;
fileId: string;
fileName: string;
mimeType?: string;
buffer: Buffer;
path: string;
};
const cachePathLocks = new KeyedAsyncLock();
const ffmpegSemaphore = new AsyncSemaphore(2);
function safeFileName(value: string): string {
return value.replace(/[\\/:*?"<>|\u0000-\u001F]/g, "_").slice(0, 180);
}
function extensionFromMimeType(mimeType?: string): string {
switch ((mimeType || "").toLowerCase()) {
case "audio/ogg":
case "audio/opus":
return ".ogg";
case "audio/mpeg":
case "audio/mp3":
return ".mp3";
case "audio/mp4":
case "audio/x-m4a":
return ".m4a";
case "audio/wav":
case "audio/wave":
case "audio/x-wav":
return ".wav";
case "audio/webm":
return ".webm";
case "image/jpeg":
return ".jpg";
case "image/png":
return ".png";
case "image/webp":
return ".webp";
case "application/pdf":
return ".pdf";
case "text/plain":
return ".txt";
case "application/zip":
case "application/x-zip":
case "application/x-zip-compressed":
return ".zip";
case "application/x-tar":
case "application/tar":
return ".tar";
case "application/gzip":
case "application/x-gzip":
case "application/gzip-compressed":
return ".gz";
case "video/mp4":
return ".mp4";
default:
return "";
}
}
function fileNameWithExtension(fileName: string, mimeType?: string, telegramFilePath?: string): string {
if (path.extname(fileName)) return fileName;
const telegramExt = telegramFilePath ? path.extname(telegramFilePath) : "";
const ext = telegramExt || extensionFromMimeType(mimeType);
return ext ? `${fileName}${ext}` : fileName;
}
function cacheDirFor(kind: StoredAttachmentKind): string {
const dirName = kind === "image" ? "photo" : kind;
return path.join(Environment.DATA_PATH, "cache", dirName);
}
function cachePathFor(kind: StoredAttachmentKind, fileUniqueId: string | undefined, fileId: string, fileName: string): string {
const base = safeFileName(fileUniqueId || fileId);
const ext = path.extname(fileName);
return path.join(cacheDirFor(kind), `${base}${ext || ""}`);
}
async function downloadToCache(kind: StoredAttachmentKind, fileId: string, fileName: string, mimeType?: string, fileUniqueId?: string): Promise<StoredAttachment | null> {
const file = await bot.getFile({file_id: fileId});
const finalFileName = fileNameWithExtension(fileName, mimeType, file.file_path);
const location = cachePathFor(kind, fileUniqueId, fileId, finalFileName);
await cachePathLocks.runExclusive(location, async () => {
if (fs.existsSync(location)) return;
const buffer = await downloadTelegramFile(file.file_path);
if (!buffer) return;
const tempLocation = `${location}.${process.pid}.${Date.now()}.tmp`;
fs.mkdirSync(path.dirname(location), {recursive: true});
fs.writeFileSync(tempLocation, buffer);
fs.renameSync(tempLocation, location);
});
return {kind, fileId, fileUniqueId, fileName: finalFileName, mimeType, cachePath: location};
}
async function convertAudioToWav(input: string, output: string, noVideo = false): Promise<void> {
await cachePathLocks.runExclusive(output, async () => {
if (fs.existsSync(output)) return;
await ffmpegSemaphore.runExclusive(async () => {
if (fs.existsSync(output)) return;
const tempOutput = `${output}.${process.pid}.${Date.now()}.tmp.wav`;
try {
await performFFmpeg(() => {
const command = ffmpeg(input);
if (noVideo) command.noVideo();
return command
.toFormat("wav")
.save(tempOutput)
.on("progress", (progress) => {
console.log("progress", progress);
});
});
fs.renameSync(tempOutput, output);
} catch (e) {
if (fs.existsSync(tempOutput)) {
fs.rmSync(tempOutput, {force: true});
}
throw e;
}
});
});
}
export async function cacheMessageAttachments(msg: Message): Promise<StoredAttachment[]> {
const result: StoredAttachment[] = [];
try {
if (msg.photo?.length) {
const size = msg.photo[msg.photo.length - 1]!;
const file = await downloadToCache("image", size.file_id, `${size.file_unique_id || size.file_id}.jpg`, "image/jpeg", size.file_unique_id);
if (file) result.push(file);
}
if (msg.document) {
const doc = msg.document;
const kind: StoredAttachmentKind = doc.mime_type?.startsWith("image/")
? "image"
: doc.mime_type?.startsWith("audio/")
? "audio"
: "document";
const file = await downloadToCache(kind, doc.file_id, doc.file_name || `${doc.file_unique_id || doc.file_id}`, doc.mime_type, doc.file_unique_id);
if (file) result.push(file);
}
if (msg.voice) {
const file = await downloadToCache("audio", msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.ogg`, msg.voice.mime_type || "audio/ogg", msg.voice.file_unique_id);
if (file) {
const output = cachePathFor("audio", msg.voice.file_unique_id, msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.wav`);
try {
await convertAudioToWav(file.cachePath, output);
file.cachePath = output;
file.fileName = file?.fileName?.replace(".ogg", ".wav");
file.mimeType = "audio/wav";
} catch (e) {
logError(e);
}
}
if (file) result.push(file);
}
if (msg.audio) {
const file = await downloadToCache("audio", msg.audio.file_id, msg.audio.file_name || `${msg.audio.file_unique_id || msg.audio.file_id}.mp3`, msg.audio.mime_type, msg.audio.file_unique_id);
if (file) result.push(file);
}
if (msg.video_note) {
const file = await downloadToCache("video-note", msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.mp4`, "video/mp4", msg.video_note.file_unique_id);
if (file) {
const output = cachePathFor("audio", msg.video_note.file_unique_id, msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.wav`);
try {
await convertAudioToWav(file.cachePath, output, true);
file.cachePath = output;
file.fileName = file?.fileName?.replace(".mp4", ".wav");
file.mimeType = "audio/wav";
} catch (e) {
logError(e);
}
}
if (file) result.push(file);
}
} catch (e) {
logError(e);
}
return result;
}
export function attachmentsToDownloadedFiles(attachments: StoredAttachment[]): AiDownloadedFile[] {
return attachments
.filter(attachment => fs.existsSync(attachment.cachePath))
.map(attachment => ({
kind: attachment.kind,
fileId: attachment.fileId,
fileName: attachment.fileName,
mimeType: attachment.mimeType,
buffer: fs.readFileSync(attachment.cachePath),
path: attachment.cachePath,
}));
}
export function cleanupDownloads(files: AiDownloadedFile[]): void {
// Files stay on disk in the message cache; drop in-memory buffers eagerly.
for (const file of files) {
file.buffer = Buffer.alloc(0);
}
files.length = 0;
}
+541
View File
@@ -0,0 +1,541 @@
import {FileOptions, InlineKeyboardMarkup, Message} from "typescript-telegram-bot-api";
import {bot} from "../index";
import {buildCancelledGenerationText, logError, replyToMessage} from "../util/utils";
import {Environment} from "../common/environment";
import {MessageStore} from "../common/message-store";
import {createQueuedFunction} from "../util/async-lock";
import {enqueueTelegramApiCall} from "../util/telegram-api-queue";
import fs from "node:fs";
import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment";
import {StoredMessage} from "../model/stored-message";
import {prepareTelegramMarkdownV2} from "../util/markdown-v2-renderer";
import {AiProvider} from "../model/ai-provider";
const TELEGRAM_LIMIT = 4096;
const TELEGRAM_CAPTION_LIMIT = 1024;
const TELEGRAM_FILE_LIMIT_BYTES = 50 * 1024 * 1024;
const TELEGRAM_PHOTO_LIMIT_BYTES = 10 * 1024 * 1024;
const EDIT_INTERVAL_MS = 4500;
export type TelegramArtifactFile = {
kind: "image" | "file";
path: string;
fileName: string;
mimeType?: string;
sizeBytes: number;
};
export class TelegramStreamMessage {
private waitMessage: Message | null = null;
private timer: NodeJS.Timeout | null = null;
private lastSent = "";
private text = "";
private status = "";
private mediaMode = false;
private cancelled = false;
private cancelledProvider = "";
private readonly startedAt = Date.now();
private readonly enqueueEdit = createQueuedFunction();
constructor(
private readonly sourceMessage: Message,
private readonly cancelRequestId: string,
private readonly stream: boolean,
private readonly regenerateCallbackData?: string,
private readonly targetMessage?: Message,
private readonly cancelProvider?: AiProvider,
private readonly isGuest?: boolean,
) {
}
keyboard(): InlineKeyboardMarkup {
return {
inline_keyboard: [[{
text: Environment.cancelText,
callback_data: this.cancelProvider
? `/cancel_ai ${this.cancelRequestId} ${this.cancelProvider}`
: `/cancel_ai ${this.cancelRequestId}`,
}]],
};
}
emptyKeyboard(): InlineKeyboardMarkup {
return {inline_keyboard: []};
}
regenerateKeyboard(): InlineKeyboardMarkup | null {
if (!this.regenerateCallbackData) return null;
return {
inline_keyboard: [[{
text: Environment.regenerateText,
callback_data: this.regenerateCallbackData,
}]],
};
}
private isMessageNotModified(error: unknown): boolean {
const textToLookUp = "message is not modified";
if (error && error instanceof Error) {
return String(error.message).includes(textToLookUp);
}
if (error && error instanceof String) {
return error.includes(textToLookUp);
}
return false;
}
private async updateKeyboard(replyMarkup: InlineKeyboardMarkup): Promise<void> {
if (!this.waitMessage) return;
try {
await enqueueTelegramApiCall(
() => bot.editMessageReplyMarkup({
chat_id: this.waitMessage!.chat.id,
message_id: this.waitMessage!.message_id,
reply_markup: replyMarkup,
}),
{
method: "editMessageReplyMarkup",
chatId: this.waitMessage.chat.id,
chatType: this.waitMessage.chat.type,
}
);
} catch (e) {
if (!this.isMessageNotModified(e)) logError(e);
}
}
private async removeKeyboard(): Promise<void> {
await this.updateKeyboard(this.emptyKeyboard());
}
private startFlushTimer(): void {
if (this.timer) clearInterval(this.timer);
this.timer = setInterval(() => this.flush().catch(logError), EDIT_INTERVAL_MS);
}
private visibleText(): string {
const parts = [this.text, this.status].filter(v => v && v.trim().length);
let value = parts.join("\n\n").trim() || Environment.waitThinkText;
if (value.length > TELEGRAM_LIMIT) {
value = value.substring(0, TELEGRAM_LIMIT - 1);
}
return value;
}
private visibleCaption(): string {
let value = this.visibleText();
if (value.length > TELEGRAM_CAPTION_LIMIT) {
value = value.substring(0, TELEGRAM_CAPTION_LIMIT - 1);
}
return value;
}
async start(initialStatus: string): Promise<Message> {
this.status = initialStatus;
const rawText = this.visibleText();
const formatted = prepareTelegramMarkdownV2(rawText, {mode: "draft"});
if (this.targetMessage) {
this.waitMessage = this.targetMessage;
try {
await MessageStore.put(this.targetMessage).catch(logError);
const result = await enqueueTelegramApiCall(
() => bot.editMessageText({
chat_id: this.targetMessage!.chat.id,
message_id: this.targetMessage!.message_id,
text: formatted,
parse_mode: "MarkdownV2",
reply_markup: this.keyboard(),
}),
{
method: "editMessageText",
chatId: this.targetMessage.chat.id,
chatType: this.targetMessage.chat.type,
}
);
if (result && result !== true) this.waitMessage = result;
this.mediaMode = false;
this.lastSent = rawText;
await this.store();
this.startFlushTimer();
return this.waitMessage;
} catch (e) {
if (this.isMessageNotModified(e)) {
this.lastSent = rawText;
await this.updateKeyboard(this.keyboard());
await this.store();
this.startFlushTimer();
return this.waitMessage;
}
logError(e);
this.waitMessage = null;
this.mediaMode = false;
}
}
this.waitMessage = await replyToMessage({
message: this.sourceMessage,
text: formatted,
reply_markup: this.keyboard(),
parse_mode: "MarkdownV2"
});
this.lastSent = rawText;
this.startFlushTimer();
return this.waitMessage;
}
setStatus(status: string): void {
if (this.cancelled) return;
this.status = status;
}
getStatus(): string {
return this.status;
}
clearStatus(): void {
if (this.cancelled) return;
this.status = "";
}
append(delta: string): void {
if (this.cancelled) return;
if (!delta) return;
this.text += delta;
}
replaceText(text: string): void {
if (this.cancelled) return;
this.text = text;
}
getText(): string {
return this.text;
}
async flush(replyMarkup: InlineKeyboardMarkup | null = this.keyboard(), end?: boolean): Promise<void> {
return this.enqueueEdit(() => this.flushUnsafe(replyMarkup, end));
}
private async flushUnsafe(replyMarkup: InlineKeyboardMarkup | null = this.keyboard(), end?: boolean): Promise<void> {
if (!this.waitMessage && this.stream) return;
const next = this.mediaMode ? this.visibleCaption() : this.visibleText();
const shouldRemoveKeyboard = replyMarkup === null;
if (next === this.lastSent && shouldRemoveKeyboard) {
await this.removeKeyboard();
return;
}
const formatted = prepareTelegramMarkdownV2(next, {mode: end ? "final" : "draft"});
if (next === this.lastSent && replyMarkup !== null) {
if (end) await this.updateKeyboard(replyMarkup);
return;
}
try {
if (!this.stream && end && !this.waitMessage) {
if (this.isGuest) {
// await enqueueTelegramApiCall(() => bot.answerGuestQuery({
// guest_query_id: this.sourceMessage.guest_query_id ?? "",
// result: {}
// }),
// {});
} else {
await replyToMessage({
message: this.sourceMessage,
text: formatted,
parse_mode: "MarkdownV2",
});
}
} else {
if (this.waitMessage) {
const result = this.mediaMode
? await enqueueTelegramApiCall(
() => bot.editMessageCaption({
chat_id: this.waitMessage!.chat.id,
message_id: this.waitMessage!.message_id,
caption: formatted,
parse_mode: "MarkdownV2",
reply_markup: replyMarkup ?? this.emptyKeyboard(),
}),
{
method: "editMessageCaption",
chatId: this.waitMessage.chat.id,
chatType: this.waitMessage.chat.type,
}
)
: await enqueueTelegramApiCall(
() => bot.editMessageText({
chat_id: this.waitMessage!.chat.id,
message_id: this.waitMessage!.message_id,
text: formatted,
parse_mode: "MarkdownV2",
reply_markup: replyMarkup ?? this.emptyKeyboard(),
}),
{
method: "editMessageText",
chatId: this.waitMessage.chat.id,
chatType: this.waitMessage.chat.type,
}
);
if (result && result !== true) this.waitMessage = result;
}
}
if (shouldRemoveKeyboard) await this.removeKeyboard();
this.lastSent = next;
} catch (e: any) {
if (shouldRemoveKeyboard && this.isMessageNotModified(e)) {
await this.removeKeyboard();
this.lastSent = next;
return;
}
if (!this.isMessageNotModified(e)) logError(e);
}
}
async cancel(provider: string): Promise<void> {
if (this.timer) clearInterval(this.timer);
this.timer = null;
this.cancelled = true;
this.cancelledProvider = provider;
this.status = "";
this.text = buildCancelledGenerationText(this.text, this.cancelledProvider, this.mediaMode ? TELEGRAM_CAPTION_LIMIT : TELEGRAM_LIMIT);
await this.flush(this.regenerateKeyboard(), true);
await this.store();
}
async showImage(image: Buffer): Promise<void> {
return this.enqueueEdit(() => this.showImageUnsafe(image));
}
async sendArtifact(file: TelegramArtifactFile): Promise<Message | null> {
return this.enqueueEdit(() => this.sendArtifactUnsafe(file));
}
private async showImageUnsafe(image: Buffer): Promise<void> {
if (this.cancelled) return;
const next = this.visibleCaption();
if (!this.waitMessage) {
if (this.stream) return;
this.waitMessage = await enqueueTelegramApiCall(
() => bot.sendPhoto({
chat_id: this.sourceMessage.chat.id,
photo: image,
caption: prepareTelegramMarkdownV2(next, {mode: "final"}),
parse_mode: "MarkdownV2",
reply_parameters: {message_id: this.sourceMessage.message_id},
}),
{
method: "sendPhoto",
chatId: this.sourceMessage.chat.id,
chatType: this.sourceMessage.chat.type,
}
);
this.mediaMode = true;
this.lastSent = next;
return;
}
try {
const result = await enqueueTelegramApiCall(
() => bot.editMessageMedia({
chat_id: this.waitMessage!.chat.id,
message_id: this.waitMessage!.message_id,
media: {
type: "photo",
media: image,
caption: prepareTelegramMarkdownV2(next, {mode: "final"}),
parse_mode: "MarkdownV2",
},
reply_markup: this.keyboard(),
}),
{
method: "editMessageMedia",
chatId: this.waitMessage.chat.id,
chatType: this.waitMessage.chat.type,
}
);
if (result && result !== true) this.waitMessage = result;
this.mediaMode = true;
this.lastSent = next;
} catch (e: any) {
if (!String(e?.message ?? e).includes("message is not modified")) logError(e);
}
}
private async sendArtifactUnsafe(file: TelegramArtifactFile): Promise<Message | null> {
if (this.cancelled) return null;
if (file.sizeBytes > TELEGRAM_FILE_LIMIT_BYTES) {
throw new Error(Environment.getTelegramFileTooLargeText(
file.fileName,
TELEGRAM_FILE_LIMIT_BYTES / 1024 / 1024,
));
}
const caption = file.fileName.slice(0, TELEGRAM_CAPTION_LIMIT);
const isPhoto = this.isPhotoArtifact(file);
await enqueueTelegramApiCall(
() => bot.sendChatAction({
chat_id: this.sourceMessage.chat.id,
action: isPhoto ? "upload_photo" : "upload_document",
}),
{
method: "sendChatAction",
chatId: this.sourceMessage.chat.id,
chatType: this.sourceMessage.chat.type,
}
).catch(logError);
let sent: Message;
if (isPhoto) {
try {
sent = await enqueueTelegramApiCall(
async () => {
const upload = this.createArtifactUpload(file);
try {
return await bot.sendPhoto({
chat_id: this.sourceMessage.chat.id,
photo: upload,
caption,
reply_parameters: {message_id: this.sourceMessage.message_id},
});
} finally {
this.destroyUpload(upload);
}
},
{
method: "sendPhoto",
chatId: this.sourceMessage.chat.id,
chatType: this.sourceMessage.chat.type,
}
);
} catch (e) {
logError(e);
sent = await this.sendArtifactAsDocument(file, caption);
}
} else {
sent = await this.sendArtifactAsDocument(file, caption);
}
await this.storeArtifactMessage(sent, file);
return sent;
}
private isPhotoArtifact(file: TelegramArtifactFile): boolean {
return file.kind === "image"
&& file.sizeBytes <= TELEGRAM_PHOTO_LIMIT_BYTES
&& ["image/jpeg", "image/png", "image/webp"].includes((file.mimeType || "").toLowerCase());
}
private createArtifactUpload(file: TelegramArtifactFile): FileOptions {
return new FileOptions(fs.createReadStream(file.path), {
filename: file.fileName,
contentType: file.mimeType || "application/octet-stream",
});
}
private destroyUpload(upload: FileOptions): void {
if ("destroy" in upload.file && typeof upload.file.destroy === "function") {
upload.file.destroy();
}
}
private async sendArtifactAsDocument(file: TelegramArtifactFile, caption: string): Promise<Message> {
return enqueueTelegramApiCall(
async () => {
const upload = this.createArtifactUpload(file);
try {
return await bot.sendDocument({
chat_id: this.sourceMessage.chat.id,
document: upload,
caption,
reply_parameters: {message_id: this.sourceMessage.message_id},
});
} finally {
this.destroyUpload(upload);
}
},
{
method: "sendDocument",
chatId: this.sourceMessage.chat.id,
chatType: this.sourceMessage.chat.type,
}
);
}
private async storeArtifactMessage(sent: Message, file: TelegramArtifactFile): Promise<void> {
const photo = sent.photo?.[sent.photo.length - 1];
const attachmentKind: StoredAttachmentKind = file.kind === "image" ? "image" : "document";
const attachment: StoredAttachment = {
kind: attachmentKind,
fileId: sent.document?.file_id ?? photo?.file_id ?? file.path,
fileUniqueId: sent.document?.file_unique_id ?? photo?.file_unique_id,
fileName: file.fileName,
mimeType: file.mimeType,
cachePath: file.path,
};
const stored: StoredMessage = {
chatId: sent.chat.id,
id: sent.message_id,
replyToMessageId: sent.reply_to_message?.message_id ?? this.sourceMessage.message_id,
fromId: sent.from?.id ?? 0,
text: sent.caption ?? file.fileName,
date: sent.date ?? Math.floor(Date.now() / 1000),
attachments: [attachment],
};
await MessageStore.put(stored);
}
async finish(removeKeyboard = true): Promise<void> {
if (this.timer) clearInterval(this.timer);
this.timer = null;
if (this.cancelled) {
await this.flush(removeKeyboard ? this.regenerateKeyboard() : this.keyboard(), true);
await this.store();
return;
}
if (Environment.SEND_TIME_TOOK) {
const diff = Date.now() - this.startedAt;
if (this.text.length + 32 < TELEGRAM_LIMIT) this.text += `\n\n⏱️ ${diff}ms`;
}
this.clearStatus();
await this.flush(removeKeyboard ? this.regenerateKeyboard() : this.keyboard(), true);
await this.store();
}
async fail(error: unknown): Promise<void> {
if (this.timer) clearInterval(this.timer);
this.timer = null;
this.status = "";
this.text = `${Environment.errorText}\n${error instanceof Error ? error.message : String(error)}`;
await this.flush(this.regenerateKeyboard(), true);
}
private async store(): Promise<void> {
if (!this.waitMessage) return;
try {
await MessageStore.put({...this.waitMessage, text: this.visibleText()} as Message);
} catch (e) {
logError(e);
}
}
}
File diff suppressed because it is too large Load Diff
+10 -1
View File
@@ -1,5 +1,14 @@
import {AiProvider} from "./ai-provider";
export type AiEndpointInfo = {
provider?: AiProvider;
baseUrl?: string;
external?: boolean;
};
export type AiCapabilityInfo = { export type AiCapabilityInfo = {
supported?: boolean, supported?: boolean,
external?: boolean, external?: boolean,
model?: string model?: string,
endpoint?: AiEndpointInfo,
}; };
+10 -5
View File
@@ -1,9 +1,14 @@
import {AiCapabilityInfo} from "./ai-capability-info"; import {AiCapabilityInfo} from "./ai-capability-info";
export class AiModelCapabilities { export class AiModelCapabilities {
vision?: AiCapabilityInfo; vision: AiCapabilityInfo | undefined;
ocr?: AiCapabilityInfo; ocr: AiCapabilityInfo | undefined;
thinking?: AiCapabilityInfo; thinking: AiCapabilityInfo | undefined;
tools?: AiCapabilityInfo; extendedThinking: AiCapabilityInfo | undefined;
audio?: AiCapabilityInfo; tools: AiCapabilityInfo | undefined;
audio: AiCapabilityInfo | undefined;
documents: AiCapabilityInfo | undefined;
outputImages: AiCapabilityInfo | undefined;
speechToText: AiCapabilityInfo | undefined;
textToSpeech: AiCapabilityInfo | undefined;
} }