ai: add unified runtime and provider adapters
This commit is contained in:
@@ -0,0 +1,213 @@
|
||||
import {Mistral} from "@mistralai/mistralai";
|
||||
import {GoogleGenAI} from "@google/genai";
|
||||
import {Ollama} from "ollama";
|
||||
import {OpenAI} from "openai";
|
||||
import {Environment} from "../common/environment";
|
||||
import {AiModelCapabilities} from "../model/ai-model-capabilities";
|
||||
import {AiProvider} from "../model/ai-provider";
|
||||
|
||||
export type AiCapabilityName = keyof AiModelCapabilities;
|
||||
export type AiRuntimePurpose = AiCapabilityName | "chat";
|
||||
|
||||
export type AiRuntimeTarget = {
|
||||
provider: AiProvider;
|
||||
purpose: AiRuntimePurpose;
|
||||
model: string;
|
||||
baseUrl?: string;
|
||||
apiKey?: string;
|
||||
};
|
||||
|
||||
export type GeminiApiMode = "google" | "openai";
|
||||
|
||||
const GEMINI_OPENAI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/";
|
||||
|
||||
const PURPOSE_SUFFIXES: Record<AiRuntimePurpose, string[]> = {
|
||||
chat: ["CHAT"],
|
||||
vision: ["VISION", "IMAGE"],
|
||||
ocr: ["OCR", "VISION", "IMAGE"],
|
||||
thinking: ["THINKING", "THINK"],
|
||||
extendedThinking: ["EXTENDED_THINKING", "THINKING", "THINK"],
|
||||
tools: ["TOOLS", "CHAT"],
|
||||
audio: ["AUDIO"],
|
||||
documents: ["DOCUMENTS", "RAG", "EMBEDDING"],
|
||||
outputImages: ["OUTPUT_IMAGES", "IMAGE"],
|
||||
speechToText: ["SPEECH_TO_TEXT", "TRANSCRIPTION", "STT", "AUDIO"],
|
||||
textToSpeech: ["TEXT_TO_SPEECH", "TTS"],
|
||||
};
|
||||
|
||||
function providerPrefix(provider: AiProvider): string {
|
||||
return provider.toString();
|
||||
}
|
||||
|
||||
function env(name: string): string | undefined {
|
||||
return Environment.getOptionalConfigValue(name);
|
||||
}
|
||||
|
||||
function firstEnv(names: string[]): string | undefined {
|
||||
for (const name of names) {
|
||||
const value = env(name);
|
||||
if (value) return value;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function endpointEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] {
|
||||
const prefix = providerPrefix(provider);
|
||||
return PURPOSE_SUFFIXES[purpose].flatMap(suffix => [
|
||||
`${prefix}_${suffix}_BASE_URL`,
|
||||
`${prefix}_${suffix}_ENDPOINT`,
|
||||
`${prefix}_${suffix}_ADDRESS`,
|
||||
]);
|
||||
}
|
||||
|
||||
function apiKeyEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] {
|
||||
const prefix = providerPrefix(provider);
|
||||
return PURPOSE_SUFFIXES[purpose].map(suffix => `${prefix}_${suffix}_API_KEY`);
|
||||
}
|
||||
|
||||
function modelEnvNames(provider: AiProvider, purpose: AiRuntimePurpose): string[] {
|
||||
const prefix = providerPrefix(provider);
|
||||
return PURPOSE_SUFFIXES[purpose].map(suffix => `${prefix}_${suffix}_MODEL`);
|
||||
}
|
||||
|
||||
export function getProviderBaseUrl(provider: AiProvider): string | undefined {
|
||||
switch (provider) {
|
||||
case AiProvider.OLLAMA:
|
||||
return env("OLLAMA_ENDPOINT");
|
||||
case AiProvider.GEMINI:
|
||||
return env("GEMINI_BASE_URL") ?? env("GEMINI_ENDPOINT")
|
||||
?? (Environment.GEMINI_API_MODE === "openai" ? GEMINI_OPENAI_BASE_URL : undefined);
|
||||
case AiProvider.MISTRAL:
|
||||
return env("MISTRAL_BASE_URL") ?? env("MISTRAL_ENDPOINT");
|
||||
case AiProvider.OPENAI:
|
||||
return env("OPENAI_BASE_URL") ?? env("OPENAI_ENDPOINT");
|
||||
}
|
||||
}
|
||||
|
||||
export function getProviderApiKey(provider: AiProvider): string | undefined {
|
||||
switch (provider) {
|
||||
case AiProvider.OLLAMA:
|
||||
return Environment.OLLAMA_API_KEY;
|
||||
case AiProvider.GEMINI:
|
||||
return Environment.GEMINI_API_KEY;
|
||||
case AiProvider.MISTRAL:
|
||||
return Environment.MISTRAL_API_KEY;
|
||||
case AiProvider.OPENAI:
|
||||
return Environment.OPENAI_API_KEY;
|
||||
}
|
||||
}
|
||||
|
||||
export function getDefaultModelForPurpose(provider: AiProvider, purpose: AiRuntimePurpose): string {
|
||||
switch (provider) {
|
||||
case AiProvider.OLLAMA:
|
||||
switch (purpose) {
|
||||
case "vision":
|
||||
case "ocr":
|
||||
case "outputImages":
|
||||
return Environment.OLLAMA_IMAGE_MODEL;
|
||||
case "thinking":
|
||||
case "extendedThinking":
|
||||
return Environment.OLLAMA_THINK_MODEL;
|
||||
case "audio":
|
||||
case "speechToText":
|
||||
return Environment.OLLAMA_AUDIO_MODEL;
|
||||
case "documents":
|
||||
return Environment.OLLAMA_EMBEDDING_MODEL;
|
||||
default:
|
||||
return Environment.OLLAMA_CHAT_MODEL;
|
||||
}
|
||||
case AiProvider.GEMINI:
|
||||
switch (purpose) {
|
||||
case "outputImages":
|
||||
return Environment.GEMINI_IMAGE_MODEL;
|
||||
case "speechToText":
|
||||
return Environment.GEMINI_TRANSCRIPTION_MODEL;
|
||||
case "textToSpeech":
|
||||
return Environment.GEMINI_TTS_MODEL;
|
||||
default:
|
||||
return Environment.GEMINI_MODEL;
|
||||
}
|
||||
case AiProvider.MISTRAL:
|
||||
switch (purpose) {
|
||||
case "speechToText":
|
||||
return Environment.MISTRAL_TRANSCRIPTION_MODEL;
|
||||
case "textToSpeech":
|
||||
return Environment.MISTRAL_TTS_MODEL || Environment.MISTRAL_MODEL;
|
||||
default:
|
||||
return Environment.MISTRAL_MODEL;
|
||||
}
|
||||
case AiProvider.OPENAI:
|
||||
switch (purpose) {
|
||||
case "outputImages":
|
||||
return Environment.OPENAI_IMAGE_MODEL;
|
||||
case "speechToText":
|
||||
return Environment.OPENAI_TRANSCRIPTION_MODEL;
|
||||
case "textToSpeech":
|
||||
return Environment.OPENAI_TTS_MODEL;
|
||||
default:
|
||||
return Environment.OPENAI_MODEL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveAiRuntimeTarget(
|
||||
provider: AiProvider,
|
||||
purpose: AiRuntimePurpose,
|
||||
modelOverride?: string,
|
||||
): AiRuntimeTarget {
|
||||
const model = modelOverride
|
||||
?? firstEnv(modelEnvNames(provider, purpose))
|
||||
?? getDefaultModelForPurpose(provider, purpose);
|
||||
const baseUrl = firstEnv(endpointEnvNames(provider, purpose)) ?? getProviderBaseUrl(provider);
|
||||
const apiKey = firstEnv(apiKeyEnvNames(provider, purpose)) ?? getProviderApiKey(provider);
|
||||
|
||||
return {provider, purpose, model, baseUrl, apiKey};
|
||||
}
|
||||
|
||||
export function sameRuntimeEndpoint(left: AiRuntimeTarget, right: AiRuntimeTarget): boolean {
|
||||
return left.provider === right.provider
|
||||
&& (left.baseUrl ?? "") === (right.baseUrl ?? "")
|
||||
&& (left.apiKey ?? "") === (right.apiKey ?? "");
|
||||
}
|
||||
|
||||
export function createOpenAiClient(target: AiRuntimeTarget): OpenAI {
|
||||
return new OpenAI({
|
||||
apiKey: target.apiKey,
|
||||
baseURL: target.baseUrl,
|
||||
});
|
||||
}
|
||||
|
||||
export function getGeminiApiMode(target?: AiRuntimeTarget): GeminiApiMode {
|
||||
if (Environment.GEMINI_API_MODE === "openai") return "openai";
|
||||
if (Environment.GEMINI_API_MODE === "google") return "google";
|
||||
if ((target?.baseUrl ?? "").includes("/openai")) return "openai";
|
||||
return "google";
|
||||
}
|
||||
|
||||
export function createGeminiOpenAiClient(target: AiRuntimeTarget): OpenAI {
|
||||
return createOpenAiClient({
|
||||
...target,
|
||||
baseUrl: target.baseUrl ?? GEMINI_OPENAI_BASE_URL,
|
||||
});
|
||||
}
|
||||
|
||||
export function createGoogleGenAiClient(target: AiRuntimeTarget): GoogleGenAI {
|
||||
return new GoogleGenAI({
|
||||
apiKey: target.apiKey,
|
||||
});
|
||||
}
|
||||
|
||||
export function createMistralClient(target: AiRuntimeTarget): Mistral {
|
||||
return new Mistral({
|
||||
apiKey: target.apiKey,
|
||||
serverURL: target.baseUrl,
|
||||
});
|
||||
}
|
||||
|
||||
export function createOllamaClient(target: AiRuntimeTarget): Ollama {
|
||||
return new Ollama({
|
||||
host: target.baseUrl?.endsWith(":11434") ? target.baseUrl : target.baseUrl + ":11434",
|
||||
headers: target.apiKey ? {"Authorization": `Bearer ${target.apiKey}`} : undefined,
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
import {randomUUID} from "node:crypto";
|
||||
|
||||
export type AiCancelRequest = {
|
||||
id: string;
|
||||
chatId: number;
|
||||
messageId?: number;
|
||||
fromId: number;
|
||||
provider: string;
|
||||
controller: AbortController;
|
||||
onCancel?: () => Promise<void> | void;
|
||||
};
|
||||
|
||||
const requests = new Map<string, AiCancelRequest>();
|
||||
|
||||
export function createAiCancelRequest(params: Omit<AiCancelRequest, "id" | "controller"> & { controller?: AbortController }): AiCancelRequest {
|
||||
const request: AiCancelRequest = {
|
||||
id: randomUUID(),
|
||||
controller: params.controller ?? new AbortController(),
|
||||
chatId: params.chatId,
|
||||
messageId: params.messageId,
|
||||
fromId: params.fromId,
|
||||
provider: params.provider,
|
||||
onCancel: params.onCancel,
|
||||
};
|
||||
requests.set(request.id, request);
|
||||
return request;
|
||||
}
|
||||
|
||||
export function setAiCancelMessageId(id: string, messageId: number): void {
|
||||
const request = requests.get(id);
|
||||
if (request) request.messageId = messageId;
|
||||
}
|
||||
|
||||
export function getAiCancelRequest(id: string): AiCancelRequest | undefined {
|
||||
return requests.get(id);
|
||||
}
|
||||
|
||||
export async function abortAiRequest(id: string): Promise<boolean> {
|
||||
const request = requests.get(id);
|
||||
if (!request) return false;
|
||||
|
||||
request.controller.abort();
|
||||
|
||||
try {
|
||||
await request.onCancel?.();
|
||||
} finally {
|
||||
requests.delete(id);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
export function finishAiRequest(id: string): void {
|
||||
requests.delete(id);
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
import {AiToolCall} from "./tool-types";
|
||||
import {OllamaChatMessage} from "./ollama-chat-message";
|
||||
import {GeminiMessage} from "./gemini-chat-message";
|
||||
import {MistralChatMessage} from "./mistral-chat-message";
|
||||
import {MessageAudioPart, MessageImagePart} from "../common/message-part";
|
||||
import {OpenAIChatMessage} from "./openai-chat-message";
|
||||
|
||||
export type ChatMessage = {
|
||||
role: "system" | "user" | "assistant" | "tool";
|
||||
content: string;
|
||||
images?: string[];
|
||||
imageParts?: MessageImagePart[];
|
||||
documents?: string[];
|
||||
audios?: string[];
|
||||
audioParts?: MessageAudioPart[];
|
||||
videos?: string[];
|
||||
videoNotes?: string[];
|
||||
thinking?: string;
|
||||
tool_calls?: AiToolCall[];
|
||||
tool_name?: string;
|
||||
}
|
||||
|
||||
export function asOllamaChatMessage(message: ChatMessage): OllamaChatMessage {
|
||||
return {
|
||||
role: message.role,
|
||||
content: message.content,
|
||||
thinking: message.thinking,
|
||||
images: message.images,
|
||||
tool_calls: message.tool_calls,
|
||||
tool_name: message.tool_name
|
||||
};
|
||||
}
|
||||
|
||||
// export function asGeminiChatMessage(message: ChatMessage): GeminiMessage {
|
||||
// if (message.images) {
|
||||
// return {
|
||||
// role: message.role,
|
||||
// content: message.images.map(() => {
|
||||
// return {
|
||||
// type: "image",
|
||||
// };
|
||||
// })
|
||||
// };
|
||||
// }
|
||||
//
|
||||
// return {
|
||||
// role: message.role,
|
||||
// content: {
|
||||
// type: "text",
|
||||
// text: message.content,
|
||||
// },
|
||||
// };
|
||||
// }
|
||||
|
||||
export function asMistralChatMessage(message: ChatMessage): MistralChatMessage {
|
||||
return {
|
||||
role: message.role,
|
||||
content: message.content,
|
||||
};
|
||||
}
|
||||
|
||||
// export function asOpenAIChatMessage(message: ChatMessage): OpenAIChatMessage {
|
||||
// return {
|
||||
//
|
||||
// }
|
||||
// }
|
||||
|
||||
/*
|
||||
const messages: any[] = ordered.map(part => {
|
||||
const content: any[] = [{
|
||||
type: "input_text",
|
||||
text: (Environment.USE_NAMES_IN_PROMPT && !part.bot ? `MESSAGE FROM USER \"${part.name}\":\n` : "") + part.content,
|
||||
}];
|
||||
|
||||
if (!part.bot) {
|
||||
for (const image of part.images ?? []) {
|
||||
content.push({type: "input_image", image_url: `data:image/jpeg;base64,${image}`, detail: "auto"});
|
||||
}
|
||||
}
|
||||
|
||||
return {role: part.bot ? "assistant" : "user", content};
|
||||
});
|
||||
|
||||
if (Environment.SYSTEM_PROMPT && Environment.USE_SYSTEM_PROMPT) {
|
||||
messages.unshift({role: "system", content: Environment.SYSTEM_PROMPT});
|
||||
}
|
||||
return {parts: messages, imageCount};
|
||||
*/
|
||||
|
||||
export type AiChatMessage = | OpenAIChatMessage | OllamaChatMessage | MistralChatMessage | GeminiMessage;
|
||||
@@ -0,0 +1,84 @@
|
||||
export type GeminiUserInputStep = {
|
||||
type: "user_input";
|
||||
content?: Array<GeminiContent>;
|
||||
}
|
||||
|
||||
export type GeminiModelOutputStep = {
|
||||
type: "model_output";
|
||||
content?: Array<GeminiContent>;
|
||||
}
|
||||
|
||||
export type GeminiFunctionCallStep = {
|
||||
id: string;
|
||||
arguments: {
|
||||
[key: string]: unknown;
|
||||
};
|
||||
name: string;
|
||||
type: "function_call";
|
||||
signature?: string;
|
||||
}
|
||||
|
||||
export type GeminiFunctionResultStep = {
|
||||
call_id: string;
|
||||
result: unknown | Array<GeminiTextContent | GeminiImageContent> | string;
|
||||
type: "function_result";
|
||||
is_error?: boolean;
|
||||
name?: string;
|
||||
signature?: string;
|
||||
}
|
||||
|
||||
export type GeminiStep =
|
||||
| GeminiUserInputStep
|
||||
| GeminiModelOutputStep
|
||||
| GeminiFunctionCallStep
|
||||
| GeminiFunctionResultStep;
|
||||
|
||||
export type GeminiTextContent = {
|
||||
text: string;
|
||||
}
|
||||
|
||||
export type GeminiInlineContent = {
|
||||
inlineData: {
|
||||
data: string;
|
||||
mimeType: string;
|
||||
};
|
||||
}
|
||||
|
||||
export type GeminiImageContent = GeminiInlineContent;
|
||||
export type GeminiAudioContent = GeminiInlineContent;
|
||||
export type GeminiDocumentContent = GeminiInlineContent;
|
||||
export type GeminiVideoContent = GeminiInlineContent;
|
||||
|
||||
export type GeminiFunctionCallContent = {
|
||||
functionCall: {
|
||||
id?: string;
|
||||
name?: string;
|
||||
args?: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
export type GeminiFunctionResponseContent = {
|
||||
functionResponse: {
|
||||
id?: string;
|
||||
name?: string;
|
||||
response: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
export type GeminiContent =
|
||||
| GeminiTextContent
|
||||
| GeminiInlineContent
|
||||
| GeminiFunctionCallContent
|
||||
| GeminiFunctionResponseContent;
|
||||
|
||||
export type GeminiTurn = {
|
||||
content?: Array<GeminiContent> | GeminiContent;
|
||||
role?: string;
|
||||
}
|
||||
|
||||
export type GeminiInput = string | Array<GeminiStep> | Array<GeminiContent> | GeminiContent;
|
||||
|
||||
export type GeminiMessage = {
|
||||
role: "user" | "model";
|
||||
parts: GeminiContent[];
|
||||
};
|
||||
@@ -0,0 +1,112 @@
|
||||
export const MistralImageDetail = {
|
||||
Low: "low",
|
||||
Auto: "auto",
|
||||
High: "high",
|
||||
} as const;
|
||||
export type MistralImageDetail = OpenEnum<typeof MistralImageDetail>;
|
||||
|
||||
declare const __brand: unique symbol;
|
||||
export type Unrecognized<T> = T & { [__brand]: "unrecognized" };
|
||||
|
||||
export type OpenEnum<T extends Readonly<Record<string, string | number>>> =
|
||||
| T[keyof T]
|
||||
| Unrecognized<T[keyof T] extends number ? number : string>;
|
||||
|
||||
export const BuiltInConnectors = {
|
||||
WebSearch: "web_search",
|
||||
WebSearchPremium: "web_search_premium",
|
||||
CodeInterpreter: "code_interpreter",
|
||||
ImageGeneration: "image_generation",
|
||||
DocumentLibrary: "document_library",
|
||||
} as const;
|
||||
export type BuiltInConnectors = OpenEnum<typeof BuiltInConnectors>;
|
||||
|
||||
export type MistralTextChunk = {
|
||||
type: "text";
|
||||
text: string;
|
||||
};
|
||||
|
||||
export type MistralToolReferenceChunk = {
|
||||
type: "tool_reference" | undefined;
|
||||
tool: BuiltInConnectors | string;
|
||||
title: string;
|
||||
url?: string | null | undefined;
|
||||
favicon?: string | null | undefined;
|
||||
description?: string | null | undefined;
|
||||
};
|
||||
|
||||
export type MistralThinkChunk = {
|
||||
type: "thinking";
|
||||
thinking: Array<MistralToolReferenceChunk | MistralTextChunk>;
|
||||
signature?: string | null | undefined;
|
||||
closed?: boolean | undefined;
|
||||
};
|
||||
|
||||
export type MistralImageURLChunk = {
|
||||
type: "image_url";
|
||||
imageUrl: string | {
|
||||
url: string;
|
||||
detail?: MistralImageDetail | null | undefined;
|
||||
};
|
||||
}
|
||||
|
||||
export type MistralContentChunk =
|
||||
| MistralTextChunk
|
||||
| MistralThinkChunk
|
||||
| MistralImageURLChunk
|
||||
|
||||
/*
|
||||
| (ImageURLChunk & { type: "image_url" })
|
||||
| (DocumentURLChunk & { type: "document_url" })
|
||||
| (TextChunk & { type: "text" })
|
||||
| (ReferenceChunk & { type: "reference" })
|
||||
| (FileChunk & { type: "file" })
|
||||
| (ThinkChunk & { type: "thinking" })
|
||||
| AudioChunk
|
||||
*/
|
||||
|
||||
export type MistralFunctionCall = {
|
||||
name: string;
|
||||
arguments: { [k: string]: any } | string;
|
||||
};
|
||||
|
||||
export type MistralToolCall = {
|
||||
id?: string | undefined;
|
||||
type?: string | undefined;
|
||||
function: MistralFunctionCall;
|
||||
index?: number | undefined;
|
||||
};
|
||||
|
||||
export type MistralAssistantMessage = {
|
||||
role: "assistant";
|
||||
content?: string | Array<MistralContentChunk> | null | undefined;
|
||||
toolCalls?: Array<MistralToolCall> | null | undefined;
|
||||
prefix?: boolean | undefined;
|
||||
}
|
||||
|
||||
export type MistralSystemMessageContentChunks =
|
||||
| MistralTextChunk
|
||||
| MistralThinkChunk;
|
||||
|
||||
export type MistralSystemMessage = {
|
||||
role: "system";
|
||||
content: string;
|
||||
}
|
||||
|
||||
export type MistralToolMessage = {
|
||||
role: "tool";
|
||||
content: string | Array<MistralContentChunk> | null;
|
||||
toolCallId?: string | null | undefined;
|
||||
name?: string | null | undefined;
|
||||
};
|
||||
|
||||
export type MistralUserMessage = {
|
||||
role: "user";
|
||||
content: string | Array<MistralContentChunk> | null;
|
||||
};
|
||||
|
||||
export type MistralChatMessage =
|
||||
| MistralAssistantMessage
|
||||
| MistralSystemMessage
|
||||
| MistralToolMessage
|
||||
| MistralUserMessage
|
||||
@@ -0,0 +1,3 @@
|
||||
import {Message} from "ollama";
|
||||
|
||||
export type OllamaChatMessage = Message;
|
||||
@@ -0,0 +1,3 @@
|
||||
import {ResponseInputItem} from "openai/resources/responses/responses";
|
||||
|
||||
export type OpenAIChatMessage = ResponseInputItem
|
||||
@@ -0,0 +1,325 @@
|
||||
import {AiProvider} from "../model/ai-provider";
|
||||
import {AiModelCapabilities} from "../model/ai-model-capabilities";
|
||||
import {Environment} from "../common/environment";
|
||||
import {logError} from "../util/utils";
|
||||
import {AiCapabilityInfo} from "../model/ai-capability-info";
|
||||
import {isOllamaSpeechToTextModel} from "./speech-to-text-models";
|
||||
import {
|
||||
AiCapabilityName,
|
||||
AiRuntimeTarget,
|
||||
createGeminiOpenAiClient,
|
||||
createGoogleGenAiClient,
|
||||
createMistralClient,
|
||||
createOllamaClient,
|
||||
createOpenAiClient,
|
||||
getGeminiApiMode,
|
||||
resolveAiRuntimeTarget,
|
||||
sameRuntimeEndpoint,
|
||||
} from "./ai-runtime-target";
|
||||
|
||||
export type RuntimeModelInfo = {
|
||||
provider: AiProvider;
|
||||
model: string;
|
||||
capabilities: AiModelCapabilities;
|
||||
};
|
||||
|
||||
const CAPABILITY_NAMES: AiCapabilityName[] = [
|
||||
"vision",
|
||||
"ocr",
|
||||
"thinking",
|
||||
"extendedThinking",
|
||||
"tools",
|
||||
"audio",
|
||||
"documents",
|
||||
"outputImages",
|
||||
"speechToText",
|
||||
"textToSpeech",
|
||||
];
|
||||
|
||||
export function getRuntimeModel(provider: AiProvider): string {
|
||||
switch (provider) {
|
||||
case AiProvider.OLLAMA:
|
||||
return Environment.OLLAMA_CHAT_MODEL;
|
||||
case AiProvider.GEMINI:
|
||||
return Environment.GEMINI_MODEL;
|
||||
case AiProvider.MISTRAL:
|
||||
return Environment.MISTRAL_MODEL;
|
||||
case AiProvider.OPENAI:
|
||||
return Environment.OPENAI_MODEL;
|
||||
}
|
||||
}
|
||||
|
||||
export function setRuntimeModel(provider: AiProvider, model: string): void {
|
||||
switch (provider) {
|
||||
case AiProvider.OLLAMA:
|
||||
Environment.OLLAMA_CHAT_MODEL = model;
|
||||
break;
|
||||
case AiProvider.GEMINI:
|
||||
Environment.GEMINI_MODEL = model;
|
||||
break;
|
||||
case AiProvider.MISTRAL:
|
||||
Environment.MISTRAL_MODEL = model;
|
||||
break;
|
||||
case AiProvider.OPENAI:
|
||||
Environment.OPENAI_MODEL = model;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
function capability(supported: boolean, target?: AiRuntimeTarget, runtimeTarget?: AiRuntimeTarget): AiCapabilityInfo {
|
||||
const result: AiCapabilityInfo = {supported};
|
||||
if (target?.model) result.model = target.model;
|
||||
if (target) {
|
||||
result.endpoint = {
|
||||
provider: target.provider,
|
||||
baseUrl: target.baseUrl,
|
||||
external: runtimeTarget ? !sameRuntimeEndpoint(target, runtimeTarget) : false,
|
||||
};
|
||||
}
|
||||
if (target && runtimeTarget && (target.model !== runtimeTarget.model || !sameRuntimeEndpoint(target, runtimeTarget))) {
|
||||
result.external = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function buildCapabilities(overrides: Partial<Record<AiCapabilityName, AiCapabilityInfo>>): AiModelCapabilities {
|
||||
return Object.assign(new AiModelCapabilities(), {
|
||||
vision: {supported: false},
|
||||
ocr: {supported: false},
|
||||
thinking: {supported: false},
|
||||
extendedThinking: {supported: false},
|
||||
tools: {supported: false},
|
||||
audio: {supported: false},
|
||||
documents: {supported: false},
|
||||
outputImages: {supported: false},
|
||||
speechToText: {supported: false},
|
||||
textToSpeech: {supported: false},
|
||||
...overrides,
|
||||
});
|
||||
}
|
||||
|
||||
function lowerModelName(model: string): string {
|
||||
return model.toLowerCase();
|
||||
}
|
||||
|
||||
function isOpenAiTextModel(model: string): boolean {
|
||||
const name = lowerModelName(model);
|
||||
if (!name) return false;
|
||||
if (/^(gpt-image|dall-e|tts-|whisper|text-embedding|text-moderation|omni-moderation)/.test(name)) return false;
|
||||
if (name.includes("transcribe")) return false;
|
||||
return /^(gpt-|o\d|chatgpt-|codex-|computer-use)/.test(name);
|
||||
}
|
||||
|
||||
function isOpenAiReasoningModel(model: string): boolean {
|
||||
const name = lowerModelName(model);
|
||||
return /^o\d/.test(name) || name.startsWith("gpt-5");
|
||||
}
|
||||
|
||||
function isOpenAiVisionModel(model: string): boolean {
|
||||
const name = lowerModelName(model);
|
||||
if (!isOpenAiTextModel(model)) return false;
|
||||
if (name.startsWith("gpt-3.5")) return false;
|
||||
if (name.includes("audio-preview") || name.includes("search-preview")) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
function isGeminiNonChatModel(model: string): boolean {
|
||||
const name = lowerModelName(model);
|
||||
return name.includes("lyria") || name.includes("-tts") || name.includes("image-preview") || name.endsWith("-image");
|
||||
}
|
||||
|
||||
function geminiSupportsAudioInput(model: string): boolean {
|
||||
const name = lowerModelName(model);
|
||||
return name.startsWith("gemini-") && !isGeminiNonChatModel(model);
|
||||
}
|
||||
|
||||
export async function getModelCapabilities(
|
||||
provider: AiProvider,
|
||||
model: string,
|
||||
purpose: AiCapabilityName | "chat" = "chat",
|
||||
): Promise<AiModelCapabilities | undefined> {
|
||||
if (!model) return undefined;
|
||||
|
||||
try {
|
||||
const runtimeTarget = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider));
|
||||
const target = resolveAiRuntimeTarget(provider, purpose, model);
|
||||
|
||||
switch (provider) {
|
||||
case AiProvider.OLLAMA: {
|
||||
const ollama = createOllamaClient(target);
|
||||
const info = await ollama.show({model});
|
||||
const modelCapabilities = Array.isArray(info.capabilities) ? info.capabilities : [];
|
||||
const has = (cap: string): boolean => modelCapabilities.includes(cap);
|
||||
const audioSupported = isOllamaSpeechToTextModel(model);
|
||||
const documentsTarget = resolveAiRuntimeTarget(provider, "documents");
|
||||
|
||||
return buildCapabilities({
|
||||
vision: capability(has("vision"), target, runtimeTarget),
|
||||
ocr: capability(has("ocr"), target, runtimeTarget),
|
||||
thinking: capability(has("thinking"), target, runtimeTarget),
|
||||
extendedThinking: capability(has("thinking") && model.includes("gpt-oss"), target, runtimeTarget),
|
||||
tools: capability(has("tools"), target, runtimeTarget),
|
||||
audio: capability(audioSupported, target, runtimeTarget),
|
||||
documents: capability(!!documentsTarget.model, documentsTarget, runtimeTarget),
|
||||
speechToText: capability(audioSupported, target, runtimeTarget),
|
||||
});
|
||||
}
|
||||
case AiProvider.GEMINI: {
|
||||
const chatLike = lowerModelName(model).startsWith("gemini-") && !isGeminiNonChatModel(model);
|
||||
const reasoningModel = lowerModelName(model).includes("2.5") || lowerModelName(model).includes("thinking");
|
||||
const imageTarget = resolveAiRuntimeTarget(provider, "outputImages");
|
||||
const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
|
||||
const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
|
||||
|
||||
return buildCapabilities({
|
||||
vision: capability(chatLike, target, runtimeTarget),
|
||||
ocr: capability(chatLike, target, runtimeTarget),
|
||||
thinking: capability(reasoningModel, target, runtimeTarget),
|
||||
extendedThinking: capability(reasoningModel, target, runtimeTarget),
|
||||
tools: capability(chatLike, target, runtimeTarget),
|
||||
audio: capability(geminiSupportsAudioInput(model), target, runtimeTarget),
|
||||
speechToText: capability(!!speechTarget.apiKey && geminiSupportsAudioInput(speechTarget.model), speechTarget, runtimeTarget),
|
||||
outputImages: capability(!!imageTarget.apiKey && !!imageTarget.model, imageTarget, runtimeTarget),
|
||||
textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
|
||||
});
|
||||
}
|
||||
case AiProvider.MISTRAL: {
|
||||
const mistral = createMistralClient(target);
|
||||
const info = await mistral.models.retrieve({modelId: model});
|
||||
const caps = info.type !== "UNKNOWN" ? info.capabilities : undefined;
|
||||
const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
|
||||
const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
|
||||
|
||||
return buildCapabilities({
|
||||
vision: capability(!!caps?.vision, target, runtimeTarget),
|
||||
ocr: capability(!!caps?.ocr, target, runtimeTarget),
|
||||
thinking: capability(!!caps?.reasoning, target, runtimeTarget),
|
||||
tools: capability(!!caps?.functionCalling, target, runtimeTarget),
|
||||
audio: capability(!!caps?.audio, target, runtimeTarget),
|
||||
documents: capability(true, target, runtimeTarget),
|
||||
speechToText: capability(!!speechTarget.model || !!caps?.audioTranscription, speechTarget, runtimeTarget),
|
||||
textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
|
||||
});
|
||||
}
|
||||
case AiProvider.OPENAI: {
|
||||
const textModel = isOpenAiTextModel(model);
|
||||
const reasoningModel = isOpenAiReasoningModel(model);
|
||||
const imageTarget = resolveAiRuntimeTarget(provider, "outputImages");
|
||||
const speechTarget = resolveAiRuntimeTarget(provider, "speechToText");
|
||||
const ttsTarget = resolveAiRuntimeTarget(provider, "textToSpeech");
|
||||
|
||||
return buildCapabilities({
|
||||
vision: capability(isOpenAiVisionModel(model), target, runtimeTarget),
|
||||
ocr: capability(isOpenAiVisionModel(model), target, runtimeTarget),
|
||||
thinking: capability(reasoningModel, target, runtimeTarget),
|
||||
extendedThinking: capability(reasoningModel, target, runtimeTarget),
|
||||
tools: capability(textModel, target, runtimeTarget),
|
||||
outputImages: capability(!!imageTarget.model, imageTarget, runtimeTarget),
|
||||
speechToText: capability(!!speechTarget.model, speechTarget, runtimeTarget),
|
||||
textToSpeech: capability(!!ttsTarget.apiKey && !!ttsTarget.model, ttsTarget, runtimeTarget),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
} catch (e) {
|
||||
logError(e);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export async function getRuntimeCapabilities(
|
||||
provider: AiProvider = Environment.DEFAULT_AI_PROVIDER,
|
||||
model: string | undefined = getRuntimeModel(provider)
|
||||
): Promise<AiModelCapabilities> {
|
||||
const runtimeTarget = resolveAiRuntimeTarget(provider, "chat", model ?? getRuntimeModel(provider));
|
||||
const result = await getModelCapabilities(provider, runtimeTarget.model, "chat") ?? buildCapabilities({});
|
||||
|
||||
for (const capabilityName of CAPABILITY_NAMES) {
|
||||
const target = resolveAiRuntimeTarget(provider, capabilityName);
|
||||
if (target.model === runtimeTarget.model && sameRuntimeEndpoint(target, runtimeTarget)) continue;
|
||||
|
||||
const targetCapabilities = await getModelCapabilities(provider, target.model, capabilityName);
|
||||
const capabilityInfo = targetCapabilities?.[capabilityName];
|
||||
if (capabilityInfo) {
|
||||
result[capabilityName] = capabilityInfo;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export async function formatRuntimeModelInfo(
|
||||
provider: AiProvider = Environment.DEFAULT_AI_PROVIDER,
|
||||
model: string | undefined = getRuntimeModel(provider),
|
||||
caps?: AiModelCapabilities
|
||||
): Promise<string> {
|
||||
if (!caps) caps = await getRuntimeCapabilities(provider, model);
|
||||
const line = (title: string, value?: AiCapabilityInfo) => {
|
||||
const state = value?.supported ? "✅" : "❌";
|
||||
const external = value?.external ?? (!!value?.model && value.model !== model);
|
||||
return Environment.getRuntimeCapabilityLineText({
|
||||
state,
|
||||
title,
|
||||
model: value?.model,
|
||||
endpointBaseUrl: value?.endpoint?.baseUrl,
|
||||
external,
|
||||
});
|
||||
};
|
||||
|
||||
return Environment.getRuntimeModelInfoText(
|
||||
provider.toString().toLowerCase(),
|
||||
model,
|
||||
[
|
||||
line(Environment.runtimeCapabilityVisionText, caps.vision),
|
||||
line(Environment.runtimeCapabilityOcrText, caps.ocr),
|
||||
line(Environment.runtimeCapabilityThinkingText, caps.thinking),
|
||||
line(Environment.runtimeCapabilityExtendedThinkingText, caps.extendedThinking),
|
||||
line(Environment.runtimeCapabilityToolsText, caps.tools),
|
||||
line(Environment.runtimeCapabilityAudioText, caps.audio),
|
||||
line(Environment.runtimeCapabilitySpeechToTextText, caps.speechToText),
|
||||
line(Environment.runtimeCapabilityTextToSpeechText, caps.textToSpeech),
|
||||
line(Environment.runtimeCapabilityDocumentsText, caps.documents),
|
||||
line(Environment.runtimeCapabilityOutputImagesText, caps.outputImages),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
export async function listProviderModels(provider: AiProvider): Promise<string[]> {
|
||||
const target = resolveAiRuntimeTarget(provider, "chat", getRuntimeModel(provider));
|
||||
|
||||
switch (provider) {
|
||||
case AiProvider.OLLAMA: {
|
||||
const ollama = createOllamaClient(target);
|
||||
const result: any = await ollama.list();
|
||||
return (result.models ?? []).map((m: any) => m.model || m.name).filter(Boolean);
|
||||
}
|
||||
case AiProvider.GEMINI: {
|
||||
const models: string[] = [];
|
||||
if (getGeminiApiMode(target) === "openai") {
|
||||
const geminiAi = createGeminiOpenAiClient(target);
|
||||
const iterable: any = await geminiAi.models.list();
|
||||
for await (const model of iterable) models.push(model.name || model.id || String(model));
|
||||
return models;
|
||||
}
|
||||
|
||||
const geminiAi = createGoogleGenAiClient(target);
|
||||
const iterable: any = await geminiAi.models.list();
|
||||
for await (const model of iterable) {
|
||||
const name = model.name || model.id || String(model);
|
||||
models.push(String(name).replace(/^models\//, ""));
|
||||
}
|
||||
return models;
|
||||
}
|
||||
case AiProvider.MISTRAL: {
|
||||
const mistralAi = createMistralClient(target);
|
||||
const result: any = await mistralAi.models.list();
|
||||
return (result.data ?? result.models ?? result ?? []).map((m: any) => m.id || m.name || String(m)).filter(Boolean);
|
||||
}
|
||||
case AiProvider.OPENAI: {
|
||||
const openAi = createOpenAiClient(target);
|
||||
const result: any = await openAi.models.list();
|
||||
return (result.data ?? []).map((m: any) => m.id).filter(Boolean);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,184 @@
|
||||
import {Environment} from "../common/environment";
|
||||
import {AiProvider} from "../model/ai-provider";
|
||||
|
||||
export type AiRequestQueueTarget = {
|
||||
provider: AiProvider;
|
||||
model: string;
|
||||
baseUrl?: string;
|
||||
};
|
||||
|
||||
type QueueEntry<T> = {
|
||||
target: AiRequestQueueTarget;
|
||||
queueKey: string;
|
||||
run: () => Promise<T>;
|
||||
resolve: (value: T | PromiseLike<T>) => void;
|
||||
reject: (reason?: unknown) => void;
|
||||
onPositionChange: (requestsBefore: number) => Promise<void> | void;
|
||||
signal?: AbortSignal;
|
||||
abortHandler?: () => void;
|
||||
started: boolean;
|
||||
};
|
||||
|
||||
type EnqueueOptions<T> = {
|
||||
signal?: AbortSignal;
|
||||
onPositionChange: (requestsBefore: number) => Promise<void> | void;
|
||||
run: () => Promise<T>;
|
||||
};
|
||||
|
||||
class AiProviderRequestQueue {
|
||||
private readonly waiting = new Map<string, Array<QueueEntry<any>>>();
|
||||
private readonly active = new Map<string, number>();
|
||||
|
||||
enqueue<T>(target: AiRequestQueueTarget, options: EnqueueOptions<T>): Promise<T> {
|
||||
if (options.signal?.aborted) {
|
||||
return Promise.reject(new Error("Aborted"));
|
||||
}
|
||||
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
const queueKey = this.queueKey(target);
|
||||
const entry: QueueEntry<T> = {
|
||||
target,
|
||||
queueKey,
|
||||
run: options.run,
|
||||
resolve,
|
||||
reject,
|
||||
onPositionChange: options.onPositionChange,
|
||||
signal: options.signal,
|
||||
started: false,
|
||||
};
|
||||
|
||||
entry.abortHandler = () => {
|
||||
if (entry.started) return;
|
||||
|
||||
const removed = this.removeWaitingEntry(entry);
|
||||
if (!removed) return;
|
||||
|
||||
reject(new Error("Aborted"));
|
||||
this.schedule(target);
|
||||
};
|
||||
|
||||
options.signal?.addEventListener("abort", entry.abortHandler, {once: true});
|
||||
this.getOrCreateQueue(queueKey).push(entry);
|
||||
this.schedule(target);
|
||||
});
|
||||
}
|
||||
|
||||
private getQueue(queueKey: string): Array<QueueEntry<any>> | undefined {
|
||||
return this.waiting.get(queueKey);
|
||||
}
|
||||
|
||||
private getOrCreateQueue(queueKey: string): Array<QueueEntry<any>> {
|
||||
let queue = this.waiting.get(queueKey);
|
||||
if (!queue) {
|
||||
queue = [];
|
||||
this.waiting.set(queueKey, queue);
|
||||
}
|
||||
return queue;
|
||||
}
|
||||
|
||||
private activeCount(queueKey: string): number {
|
||||
return this.active.get(queueKey) ?? 0;
|
||||
}
|
||||
|
||||
private setActiveCount(queueKey: string, count: number): void {
|
||||
if (count <= 0) {
|
||||
this.active.delete(queueKey);
|
||||
return;
|
||||
}
|
||||
this.active.set(queueKey, count);
|
||||
}
|
||||
|
||||
private maxActive(target: AiRequestQueueTarget): number {
|
||||
return Math.max(1, Environment.getAiProviderMaxConcurrentRequests(target.provider));
|
||||
}
|
||||
|
||||
private normalizeBaseUrl(baseUrl: string | undefined): string {
|
||||
return (baseUrl ?? "").trim().replace(/\/+$/, "");
|
||||
}
|
||||
|
||||
private queueKey(target: AiRequestQueueTarget): string {
|
||||
return JSON.stringify([
|
||||
target.provider,
|
||||
this.normalizeBaseUrl(target.baseUrl),
|
||||
target.model.trim(),
|
||||
]);
|
||||
}
|
||||
|
||||
private removeWaitingEntry(entry: QueueEntry<any>): boolean {
|
||||
const queue = this.getQueue(entry.queueKey);
|
||||
if (!queue) return false;
|
||||
|
||||
const index = queue.indexOf(entry);
|
||||
if (index < 0) return false;
|
||||
|
||||
queue.splice(index, 1);
|
||||
if (entry.abortHandler) {
|
||||
entry.signal?.removeEventListener("abort", entry.abortHandler);
|
||||
}
|
||||
this.deleteQueueIfIdle(entry.queueKey, queue);
|
||||
return true;
|
||||
}
|
||||
|
||||
private schedule(target: AiRequestQueueTarget): void {
|
||||
const queueKey = this.queueKey(target);
|
||||
const queue = this.getOrCreateQueue(queueKey);
|
||||
|
||||
while (queue.length && this.activeCount(queueKey) < this.maxActive(target)) {
|
||||
const entry = queue.shift();
|
||||
if (!entry) continue;
|
||||
|
||||
if (entry.abortHandler) {
|
||||
entry.signal?.removeEventListener("abort", entry.abortHandler);
|
||||
}
|
||||
|
||||
if (entry.signal?.aborted) {
|
||||
entry.reject(new Error("Aborted"));
|
||||
continue;
|
||||
}
|
||||
|
||||
entry.started = true;
|
||||
this.setActiveCount(queueKey, this.activeCount(queueKey) + 1);
|
||||
void this.runEntry(entry);
|
||||
}
|
||||
|
||||
this.updateWaitingMessages(target);
|
||||
if (!queue.length && this.activeCount(queueKey) <= 0) {
|
||||
this.waiting.delete(queueKey);
|
||||
}
|
||||
}
|
||||
|
||||
private async runEntry(entry: QueueEntry<any>): Promise<void> {
|
||||
try {
|
||||
entry.resolve(await entry.run());
|
||||
} catch (e) {
|
||||
entry.reject(e);
|
||||
} finally {
|
||||
this.setActiveCount(entry.queueKey, this.activeCount(entry.queueKey) - 1);
|
||||
this.schedule(entry.target);
|
||||
}
|
||||
}
|
||||
|
||||
private updateWaitingMessages(target: AiRequestQueueTarget): void {
|
||||
const queueKey = this.queueKey(target);
|
||||
const active = this.activeCount(queueKey);
|
||||
const queue = [...(this.getQueue(queueKey) ?? [])];
|
||||
|
||||
Promise.allSettled(queue.map((entry, index) => {
|
||||
return entry.onPositionChange(active + index);
|
||||
})).then(results => {
|
||||
for (const result of results) {
|
||||
if (result.status === "rejected") {
|
||||
console.error(result.reason);
|
||||
}
|
||||
}
|
||||
}).catch(console.error);
|
||||
}
|
||||
|
||||
private deleteQueueIfIdle(queueKey: string, queue: Array<QueueEntry<any>>): void {
|
||||
if (!queue.length && this.activeCount(queueKey) <= 0) {
|
||||
this.waiting.delete(queueKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const aiProviderRequestQueue = new AiProviderRequestQueue();
|
||||
@@ -0,0 +1,24 @@
|
||||
import {AiProvider} from "../model/ai-provider";
|
||||
|
||||
export const AI_REGENERATE_CALLBACK = "/regenerate_ai";
|
||||
|
||||
export type AiRegenerateCallbackData = {
|
||||
provider: AiProvider;
|
||||
think: boolean;
|
||||
};
|
||||
|
||||
export function buildAiRegenerateCallbackData(provider: AiProvider, think = false): string {
|
||||
return `${AI_REGENERATE_CALLBACK} ${provider} ${think ? "1" : "0"}`;
|
||||
}
|
||||
|
||||
export function parseAiRegenerateCallbackData(data: string): AiRegenerateCallbackData | null {
|
||||
if (!data.startsWith(AI_REGENERATE_CALLBACK)) return null;
|
||||
|
||||
const [, provider, think] = data.split(/\s+/);
|
||||
if (!Object.values(AiProvider).includes(provider as AiProvider)) return null;
|
||||
|
||||
return {
|
||||
provider: provider as AiProvider,
|
||||
think: think === "1" || think === "true",
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,227 @@
|
||||
import {Message} from "typescript-telegram-bot-api";
|
||||
import {bot} from "../index";
|
||||
import {downloadTelegramFile, logError} from "../util/utils";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import {Environment} from "../common/environment";
|
||||
import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment";
|
||||
import {performFFmpeg} from "../util/ffmpeg";
|
||||
import ffmpeg from "fluent-ffmpeg";
|
||||
import {AsyncSemaphore, KeyedAsyncLock} from "../util/async-lock";
|
||||
|
||||
export type AiDownloadedFile = {
|
||||
kind: StoredAttachmentKind;
|
||||
fileId: string;
|
||||
fileName: string;
|
||||
mimeType?: string;
|
||||
buffer: Buffer;
|
||||
path: string;
|
||||
};
|
||||
|
||||
const cachePathLocks = new KeyedAsyncLock();
|
||||
const ffmpegSemaphore = new AsyncSemaphore(2);
|
||||
|
||||
function safeFileName(value: string): string {
|
||||
return value.replace(/[\\/:*?"<>|\u0000-\u001F]/g, "_").slice(0, 180);
|
||||
}
|
||||
|
||||
function extensionFromMimeType(mimeType?: string): string {
|
||||
switch ((mimeType || "").toLowerCase()) {
|
||||
case "audio/ogg":
|
||||
case "audio/opus":
|
||||
return ".ogg";
|
||||
case "audio/mpeg":
|
||||
case "audio/mp3":
|
||||
return ".mp3";
|
||||
case "audio/mp4":
|
||||
case "audio/x-m4a":
|
||||
return ".m4a";
|
||||
case "audio/wav":
|
||||
case "audio/wave":
|
||||
case "audio/x-wav":
|
||||
return ".wav";
|
||||
case "audio/webm":
|
||||
return ".webm";
|
||||
case "image/jpeg":
|
||||
return ".jpg";
|
||||
case "image/png":
|
||||
return ".png";
|
||||
case "image/webp":
|
||||
return ".webp";
|
||||
case "application/pdf":
|
||||
return ".pdf";
|
||||
case "text/plain":
|
||||
return ".txt";
|
||||
case "application/zip":
|
||||
case "application/x-zip":
|
||||
case "application/x-zip-compressed":
|
||||
return ".zip";
|
||||
case "application/x-tar":
|
||||
case "application/tar":
|
||||
return ".tar";
|
||||
case "application/gzip":
|
||||
case "application/x-gzip":
|
||||
case "application/gzip-compressed":
|
||||
return ".gz";
|
||||
case "video/mp4":
|
||||
return ".mp4";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function fileNameWithExtension(fileName: string, mimeType?: string, telegramFilePath?: string): string {
|
||||
if (path.extname(fileName)) return fileName;
|
||||
|
||||
const telegramExt = telegramFilePath ? path.extname(telegramFilePath) : "";
|
||||
const ext = telegramExt || extensionFromMimeType(mimeType);
|
||||
return ext ? `${fileName}${ext}` : fileName;
|
||||
}
|
||||
|
||||
function cacheDirFor(kind: StoredAttachmentKind): string {
|
||||
const dirName = kind === "image" ? "photo" : kind;
|
||||
return path.join(Environment.DATA_PATH, "cache", dirName);
|
||||
}
|
||||
|
||||
function cachePathFor(kind: StoredAttachmentKind, fileUniqueId: string | undefined, fileId: string, fileName: string): string {
|
||||
const base = safeFileName(fileUniqueId || fileId);
|
||||
const ext = path.extname(fileName);
|
||||
return path.join(cacheDirFor(kind), `${base}${ext || ""}`);
|
||||
}
|
||||
|
||||
async function downloadToCache(kind: StoredAttachmentKind, fileId: string, fileName: string, mimeType?: string, fileUniqueId?: string): Promise<StoredAttachment | null> {
|
||||
const file = await bot.getFile({file_id: fileId});
|
||||
const finalFileName = fileNameWithExtension(fileName, mimeType, file.file_path);
|
||||
const location = cachePathFor(kind, fileUniqueId, fileId, finalFileName);
|
||||
|
||||
await cachePathLocks.runExclusive(location, async () => {
|
||||
if (fs.existsSync(location)) return;
|
||||
|
||||
const buffer = await downloadTelegramFile(file.file_path);
|
||||
if (!buffer) return;
|
||||
|
||||
const tempLocation = `${location}.${process.pid}.${Date.now()}.tmp`;
|
||||
fs.mkdirSync(path.dirname(location), {recursive: true});
|
||||
fs.writeFileSync(tempLocation, buffer);
|
||||
fs.renameSync(tempLocation, location);
|
||||
});
|
||||
|
||||
return {kind, fileId, fileUniqueId, fileName: finalFileName, mimeType, cachePath: location};
|
||||
}
|
||||
|
||||
async function convertAudioToWav(input: string, output: string, noVideo = false): Promise<void> {
|
||||
await cachePathLocks.runExclusive(output, async () => {
|
||||
if (fs.existsSync(output)) return;
|
||||
|
||||
await ffmpegSemaphore.runExclusive(async () => {
|
||||
if (fs.existsSync(output)) return;
|
||||
|
||||
const tempOutput = `${output}.${process.pid}.${Date.now()}.tmp.wav`;
|
||||
try {
|
||||
await performFFmpeg(() => {
|
||||
const command = ffmpeg(input);
|
||||
if (noVideo) command.noVideo();
|
||||
return command
|
||||
.toFormat("wav")
|
||||
.save(tempOutput)
|
||||
.on("progress", (progress) => {
|
||||
console.log("progress", progress);
|
||||
});
|
||||
});
|
||||
fs.renameSync(tempOutput, output);
|
||||
} catch (e) {
|
||||
if (fs.existsSync(tempOutput)) {
|
||||
fs.rmSync(tempOutput, {force: true});
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function cacheMessageAttachments(msg: Message): Promise<StoredAttachment[]> {
|
||||
const result: StoredAttachment[] = [];
|
||||
|
||||
try {
|
||||
if (msg.photo?.length) {
|
||||
const size = msg.photo[msg.photo.length - 1]!;
|
||||
const file = await downloadToCache("image", size.file_id, `${size.file_unique_id || size.file_id}.jpg`, "image/jpeg", size.file_unique_id);
|
||||
if (file) result.push(file);
|
||||
}
|
||||
|
||||
if (msg.document) {
|
||||
const doc = msg.document;
|
||||
const kind: StoredAttachmentKind = doc.mime_type?.startsWith("image/")
|
||||
? "image"
|
||||
: doc.mime_type?.startsWith("audio/")
|
||||
? "audio"
|
||||
: "document";
|
||||
const file = await downloadToCache(kind, doc.file_id, doc.file_name || `${doc.file_unique_id || doc.file_id}`, doc.mime_type, doc.file_unique_id);
|
||||
if (file) result.push(file);
|
||||
}
|
||||
|
||||
if (msg.voice) {
|
||||
const file = await downloadToCache("audio", msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.ogg`, msg.voice.mime_type || "audio/ogg", msg.voice.file_unique_id);
|
||||
if (file) {
|
||||
const output = cachePathFor("audio", msg.voice.file_unique_id, msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.wav`);
|
||||
try {
|
||||
await convertAudioToWav(file.cachePath, output);
|
||||
file.cachePath = output;
|
||||
file.fileName = file?.fileName?.replace(".ogg", ".wav");
|
||||
file.mimeType = "audio/wav";
|
||||
} catch (e) {
|
||||
logError(e);
|
||||
}
|
||||
}
|
||||
|
||||
if (file) result.push(file);
|
||||
}
|
||||
|
||||
if (msg.audio) {
|
||||
const file = await downloadToCache("audio", msg.audio.file_id, msg.audio.file_name || `${msg.audio.file_unique_id || msg.audio.file_id}.mp3`, msg.audio.mime_type, msg.audio.file_unique_id);
|
||||
if (file) result.push(file);
|
||||
}
|
||||
|
||||
if (msg.video_note) {
|
||||
const file = await downloadToCache("video-note", msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.mp4`, "video/mp4", msg.video_note.file_unique_id);
|
||||
if (file) {
|
||||
const output = cachePathFor("audio", msg.video_note.file_unique_id, msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.wav`);
|
||||
try {
|
||||
await convertAudioToWav(file.cachePath, output, true);
|
||||
file.cachePath = output;
|
||||
file.fileName = file?.fileName?.replace(".mp4", ".wav");
|
||||
file.mimeType = "audio/wav";
|
||||
} catch (e) {
|
||||
logError(e);
|
||||
}
|
||||
}
|
||||
|
||||
if (file) result.push(file);
|
||||
}
|
||||
} catch (e) {
|
||||
logError(e);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export function attachmentsToDownloadedFiles(attachments: StoredAttachment[]): AiDownloadedFile[] {
|
||||
return attachments
|
||||
.filter(attachment => fs.existsSync(attachment.cachePath))
|
||||
.map(attachment => ({
|
||||
kind: attachment.kind,
|
||||
fileId: attachment.fileId,
|
||||
fileName: attachment.fileName,
|
||||
mimeType: attachment.mimeType,
|
||||
buffer: fs.readFileSync(attachment.cachePath),
|
||||
path: attachment.cachePath,
|
||||
}));
|
||||
}
|
||||
|
||||
export function cleanupDownloads(files: AiDownloadedFile[]): void {
|
||||
// Files stay on disk in the message cache; drop in-memory buffers eagerly.
|
||||
for (const file of files) {
|
||||
file.buffer = Buffer.alloc(0);
|
||||
}
|
||||
files.length = 0;
|
||||
}
|
||||
@@ -0,0 +1,541 @@
|
||||
import {FileOptions, InlineKeyboardMarkup, Message} from "typescript-telegram-bot-api";
|
||||
import {bot} from "../index";
|
||||
import {buildCancelledGenerationText, logError, replyToMessage} from "../util/utils";
|
||||
import {Environment} from "../common/environment";
|
||||
import {MessageStore} from "../common/message-store";
|
||||
import {createQueuedFunction} from "../util/async-lock";
|
||||
import {enqueueTelegramApiCall} from "../util/telegram-api-queue";
|
||||
import fs from "node:fs";
|
||||
import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment";
|
||||
import {StoredMessage} from "../model/stored-message";
|
||||
import {prepareTelegramMarkdownV2} from "../util/markdown-v2-renderer";
|
||||
import {AiProvider} from "../model/ai-provider";
|
||||
|
||||
const TELEGRAM_LIMIT = 4096;
|
||||
const TELEGRAM_CAPTION_LIMIT = 1024;
|
||||
const TELEGRAM_FILE_LIMIT_BYTES = 50 * 1024 * 1024;
|
||||
const TELEGRAM_PHOTO_LIMIT_BYTES = 10 * 1024 * 1024;
|
||||
const EDIT_INTERVAL_MS = 4500;
|
||||
|
||||
export type TelegramArtifactFile = {
|
||||
kind: "image" | "file";
|
||||
path: string;
|
||||
fileName: string;
|
||||
mimeType?: string;
|
||||
sizeBytes: number;
|
||||
};
|
||||
|
||||
export class TelegramStreamMessage {
|
||||
private waitMessage: Message | null = null;
|
||||
private timer: NodeJS.Timeout | null = null;
|
||||
private lastSent = "";
|
||||
private text = "";
|
||||
private status = "";
|
||||
private mediaMode = false;
|
||||
private cancelled = false;
|
||||
private cancelledProvider = "";
|
||||
private readonly startedAt = Date.now();
|
||||
private readonly enqueueEdit = createQueuedFunction();
|
||||
|
||||
constructor(
|
||||
private readonly sourceMessage: Message,
|
||||
private readonly cancelRequestId: string,
|
||||
private readonly stream: boolean,
|
||||
private readonly regenerateCallbackData?: string,
|
||||
private readonly targetMessage?: Message,
|
||||
private readonly cancelProvider?: AiProvider,
|
||||
private readonly isGuest?: boolean,
|
||||
) {
|
||||
}
|
||||
|
||||
keyboard(): InlineKeyboardMarkup {
|
||||
return {
|
||||
inline_keyboard: [[{
|
||||
text: Environment.cancelText,
|
||||
callback_data: this.cancelProvider
|
||||
? `/cancel_ai ${this.cancelRequestId} ${this.cancelProvider}`
|
||||
: `/cancel_ai ${this.cancelRequestId}`,
|
||||
}]],
|
||||
};
|
||||
}
|
||||
|
||||
emptyKeyboard(): InlineKeyboardMarkup {
|
||||
return {inline_keyboard: []};
|
||||
}
|
||||
|
||||
regenerateKeyboard(): InlineKeyboardMarkup | null {
|
||||
if (!this.regenerateCallbackData) return null;
|
||||
|
||||
return {
|
||||
inline_keyboard: [[{
|
||||
text: Environment.regenerateText,
|
||||
callback_data: this.regenerateCallbackData,
|
||||
}]],
|
||||
};
|
||||
}
|
||||
|
||||
private isMessageNotModified(error: unknown): boolean {
|
||||
const textToLookUp = "message is not modified";
|
||||
|
||||
if (error && error instanceof Error) {
|
||||
return String(error.message).includes(textToLookUp);
|
||||
}
|
||||
|
||||
if (error && error instanceof String) {
|
||||
return error.includes(textToLookUp);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private async updateKeyboard(replyMarkup: InlineKeyboardMarkup): Promise<void> {
|
||||
if (!this.waitMessage) return;
|
||||
|
||||
try {
|
||||
await enqueueTelegramApiCall(
|
||||
() => bot.editMessageReplyMarkup({
|
||||
chat_id: this.waitMessage!.chat.id,
|
||||
message_id: this.waitMessage!.message_id,
|
||||
reply_markup: replyMarkup,
|
||||
}),
|
||||
{
|
||||
method: "editMessageReplyMarkup",
|
||||
chatId: this.waitMessage.chat.id,
|
||||
chatType: this.waitMessage.chat.type,
|
||||
}
|
||||
);
|
||||
} catch (e) {
|
||||
if (!this.isMessageNotModified(e)) logError(e);
|
||||
}
|
||||
}
|
||||
|
||||
private async removeKeyboard(): Promise<void> {
|
||||
await this.updateKeyboard(this.emptyKeyboard());
|
||||
}
|
||||
|
||||
private startFlushTimer(): void {
|
||||
if (this.timer) clearInterval(this.timer);
|
||||
this.timer = setInterval(() => this.flush().catch(logError), EDIT_INTERVAL_MS);
|
||||
}
|
||||
|
||||
private visibleText(): string {
|
||||
const parts = [this.text, this.status].filter(v => v && v.trim().length);
|
||||
let value = parts.join("\n\n").trim() || Environment.waitThinkText;
|
||||
if (value.length > TELEGRAM_LIMIT) {
|
||||
value = value.substring(0, TELEGRAM_LIMIT - 1);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
private visibleCaption(): string {
|
||||
let value = this.visibleText();
|
||||
if (value.length > TELEGRAM_CAPTION_LIMIT) {
|
||||
value = value.substring(0, TELEGRAM_CAPTION_LIMIT - 1);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
async start(initialStatus: string): Promise<Message> {
|
||||
this.status = initialStatus;
|
||||
const rawText = this.visibleText();
|
||||
const formatted = prepareTelegramMarkdownV2(rawText, {mode: "draft"});
|
||||
|
||||
if (this.targetMessage) {
|
||||
this.waitMessage = this.targetMessage;
|
||||
|
||||
try {
|
||||
await MessageStore.put(this.targetMessage).catch(logError);
|
||||
const result = await enqueueTelegramApiCall(
|
||||
() => bot.editMessageText({
|
||||
chat_id: this.targetMessage!.chat.id,
|
||||
message_id: this.targetMessage!.message_id,
|
||||
text: formatted,
|
||||
parse_mode: "MarkdownV2",
|
||||
reply_markup: this.keyboard(),
|
||||
}),
|
||||
{
|
||||
method: "editMessageText",
|
||||
chatId: this.targetMessage.chat.id,
|
||||
chatType: this.targetMessage.chat.type,
|
||||
}
|
||||
);
|
||||
if (result && result !== true) this.waitMessage = result;
|
||||
this.mediaMode = false;
|
||||
this.lastSent = rawText;
|
||||
await this.store();
|
||||
this.startFlushTimer();
|
||||
return this.waitMessage;
|
||||
} catch (e) {
|
||||
if (this.isMessageNotModified(e)) {
|
||||
this.lastSent = rawText;
|
||||
await this.updateKeyboard(this.keyboard());
|
||||
await this.store();
|
||||
this.startFlushTimer();
|
||||
return this.waitMessage;
|
||||
}
|
||||
|
||||
logError(e);
|
||||
this.waitMessage = null;
|
||||
this.mediaMode = false;
|
||||
}
|
||||
}
|
||||
|
||||
this.waitMessage = await replyToMessage({
|
||||
message: this.sourceMessage,
|
||||
text: formatted,
|
||||
reply_markup: this.keyboard(),
|
||||
parse_mode: "MarkdownV2"
|
||||
});
|
||||
this.lastSent = rawText;
|
||||
this.startFlushTimer();
|
||||
return this.waitMessage;
|
||||
}
|
||||
|
||||
setStatus(status: string): void {
|
||||
if (this.cancelled) return;
|
||||
this.status = status;
|
||||
}
|
||||
|
||||
getStatus(): string {
|
||||
return this.status;
|
||||
}
|
||||
|
||||
clearStatus(): void {
|
||||
if (this.cancelled) return;
|
||||
this.status = "";
|
||||
}
|
||||
|
||||
append(delta: string): void {
|
||||
if (this.cancelled) return;
|
||||
if (!delta) return;
|
||||
this.text += delta;
|
||||
}
|
||||
|
||||
replaceText(text: string): void {
|
||||
if (this.cancelled) return;
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
getText(): string {
|
||||
return this.text;
|
||||
}
|
||||
|
||||
async flush(replyMarkup: InlineKeyboardMarkup | null = this.keyboard(), end?: boolean): Promise<void> {
|
||||
return this.enqueueEdit(() => this.flushUnsafe(replyMarkup, end));
|
||||
}
|
||||
|
||||
private async flushUnsafe(replyMarkup: InlineKeyboardMarkup | null = this.keyboard(), end?: boolean): Promise<void> {
|
||||
if (!this.waitMessage && this.stream) return;
|
||||
|
||||
const next = this.mediaMode ? this.visibleCaption() : this.visibleText();
|
||||
const shouldRemoveKeyboard = replyMarkup === null;
|
||||
if (next === this.lastSent && shouldRemoveKeyboard) {
|
||||
await this.removeKeyboard();
|
||||
return;
|
||||
}
|
||||
|
||||
const formatted = prepareTelegramMarkdownV2(next, {mode: end ? "final" : "draft"});
|
||||
|
||||
if (next === this.lastSent && replyMarkup !== null) {
|
||||
if (end) await this.updateKeyboard(replyMarkup);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
if (!this.stream && end && !this.waitMessage) {
|
||||
if (this.isGuest) {
|
||||
// await enqueueTelegramApiCall(() => bot.answerGuestQuery({
|
||||
// guest_query_id: this.sourceMessage.guest_query_id ?? "",
|
||||
// result: {}
|
||||
// }),
|
||||
// {});
|
||||
} else {
|
||||
await replyToMessage({
|
||||
message: this.sourceMessage,
|
||||
text: formatted,
|
||||
parse_mode: "MarkdownV2",
|
||||
});
|
||||
}
|
||||
} else {
|
||||
if (this.waitMessage) {
|
||||
const result = this.mediaMode
|
||||
? await enqueueTelegramApiCall(
|
||||
() => bot.editMessageCaption({
|
||||
chat_id: this.waitMessage!.chat.id,
|
||||
message_id: this.waitMessage!.message_id,
|
||||
caption: formatted,
|
||||
parse_mode: "MarkdownV2",
|
||||
reply_markup: replyMarkup ?? this.emptyKeyboard(),
|
||||
}),
|
||||
{
|
||||
method: "editMessageCaption",
|
||||
chatId: this.waitMessage.chat.id,
|
||||
chatType: this.waitMessage.chat.type,
|
||||
}
|
||||
)
|
||||
: await enqueueTelegramApiCall(
|
||||
() => bot.editMessageText({
|
||||
chat_id: this.waitMessage!.chat.id,
|
||||
message_id: this.waitMessage!.message_id,
|
||||
text: formatted,
|
||||
parse_mode: "MarkdownV2",
|
||||
reply_markup: replyMarkup ?? this.emptyKeyboard(),
|
||||
}),
|
||||
{
|
||||
method: "editMessageText",
|
||||
chatId: this.waitMessage.chat.id,
|
||||
chatType: this.waitMessage.chat.type,
|
||||
}
|
||||
);
|
||||
if (result && result !== true) this.waitMessage = result;
|
||||
}
|
||||
}
|
||||
if (shouldRemoveKeyboard) await this.removeKeyboard();
|
||||
this.lastSent = next;
|
||||
} catch (e: any) {
|
||||
if (shouldRemoveKeyboard && this.isMessageNotModified(e)) {
|
||||
await this.removeKeyboard();
|
||||
this.lastSent = next;
|
||||
return;
|
||||
}
|
||||
if (!this.isMessageNotModified(e)) logError(e);
|
||||
}
|
||||
}
|
||||
|
||||
async cancel(provider: string): Promise<void> {
|
||||
if (this.timer) clearInterval(this.timer);
|
||||
this.timer = null;
|
||||
this.cancelled = true;
|
||||
this.cancelledProvider = provider;
|
||||
this.status = "";
|
||||
this.text = buildCancelledGenerationText(this.text, this.cancelledProvider, this.mediaMode ? TELEGRAM_CAPTION_LIMIT : TELEGRAM_LIMIT);
|
||||
await this.flush(this.regenerateKeyboard(), true);
|
||||
await this.store();
|
||||
}
|
||||
|
||||
async showImage(image: Buffer): Promise<void> {
|
||||
return this.enqueueEdit(() => this.showImageUnsafe(image));
|
||||
}
|
||||
|
||||
async sendArtifact(file: TelegramArtifactFile): Promise<Message | null> {
|
||||
return this.enqueueEdit(() => this.sendArtifactUnsafe(file));
|
||||
}
|
||||
|
||||
private async showImageUnsafe(image: Buffer): Promise<void> {
|
||||
if (this.cancelled) return;
|
||||
const next = this.visibleCaption();
|
||||
|
||||
if (!this.waitMessage) {
|
||||
if (this.stream) return;
|
||||
|
||||
this.waitMessage = await enqueueTelegramApiCall(
|
||||
() => bot.sendPhoto({
|
||||
chat_id: this.sourceMessage.chat.id,
|
||||
photo: image,
|
||||
caption: prepareTelegramMarkdownV2(next, {mode: "final"}),
|
||||
parse_mode: "MarkdownV2",
|
||||
reply_parameters: {message_id: this.sourceMessage.message_id},
|
||||
}),
|
||||
{
|
||||
method: "sendPhoto",
|
||||
chatId: this.sourceMessage.chat.id,
|
||||
chatType: this.sourceMessage.chat.type,
|
||||
}
|
||||
);
|
||||
this.mediaMode = true;
|
||||
this.lastSent = next;
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await enqueueTelegramApiCall(
|
||||
() => bot.editMessageMedia({
|
||||
chat_id: this.waitMessage!.chat.id,
|
||||
message_id: this.waitMessage!.message_id,
|
||||
media: {
|
||||
type: "photo",
|
||||
media: image,
|
||||
caption: prepareTelegramMarkdownV2(next, {mode: "final"}),
|
||||
parse_mode: "MarkdownV2",
|
||||
},
|
||||
reply_markup: this.keyboard(),
|
||||
}),
|
||||
{
|
||||
method: "editMessageMedia",
|
||||
chatId: this.waitMessage.chat.id,
|
||||
chatType: this.waitMessage.chat.type,
|
||||
}
|
||||
);
|
||||
if (result && result !== true) this.waitMessage = result;
|
||||
this.mediaMode = true;
|
||||
this.lastSent = next;
|
||||
} catch (e: any) {
|
||||
if (!String(e?.message ?? e).includes("message is not modified")) logError(e);
|
||||
}
|
||||
}
|
||||
|
||||
private async sendArtifactUnsafe(file: TelegramArtifactFile): Promise<Message | null> {
|
||||
if (this.cancelled) return null;
|
||||
|
||||
if (file.sizeBytes > TELEGRAM_FILE_LIMIT_BYTES) {
|
||||
throw new Error(Environment.getTelegramFileTooLargeText(
|
||||
file.fileName,
|
||||
TELEGRAM_FILE_LIMIT_BYTES / 1024 / 1024,
|
||||
));
|
||||
}
|
||||
|
||||
const caption = file.fileName.slice(0, TELEGRAM_CAPTION_LIMIT);
|
||||
const isPhoto = this.isPhotoArtifact(file);
|
||||
|
||||
await enqueueTelegramApiCall(
|
||||
() => bot.sendChatAction({
|
||||
chat_id: this.sourceMessage.chat.id,
|
||||
action: isPhoto ? "upload_photo" : "upload_document",
|
||||
}),
|
||||
{
|
||||
method: "sendChatAction",
|
||||
chatId: this.sourceMessage.chat.id,
|
||||
chatType: this.sourceMessage.chat.type,
|
||||
}
|
||||
).catch(logError);
|
||||
|
||||
let sent: Message;
|
||||
if (isPhoto) {
|
||||
try {
|
||||
sent = await enqueueTelegramApiCall(
|
||||
async () => {
|
||||
const upload = this.createArtifactUpload(file);
|
||||
try {
|
||||
return await bot.sendPhoto({
|
||||
chat_id: this.sourceMessage.chat.id,
|
||||
photo: upload,
|
||||
caption,
|
||||
reply_parameters: {message_id: this.sourceMessage.message_id},
|
||||
});
|
||||
} finally {
|
||||
this.destroyUpload(upload);
|
||||
}
|
||||
},
|
||||
{
|
||||
method: "sendPhoto",
|
||||
chatId: this.sourceMessage.chat.id,
|
||||
chatType: this.sourceMessage.chat.type,
|
||||
}
|
||||
);
|
||||
} catch (e) {
|
||||
logError(e);
|
||||
sent = await this.sendArtifactAsDocument(file, caption);
|
||||
}
|
||||
} else {
|
||||
sent = await this.sendArtifactAsDocument(file, caption);
|
||||
}
|
||||
|
||||
await this.storeArtifactMessage(sent, file);
|
||||
return sent;
|
||||
}
|
||||
|
||||
private isPhotoArtifact(file: TelegramArtifactFile): boolean {
|
||||
return file.kind === "image"
|
||||
&& file.sizeBytes <= TELEGRAM_PHOTO_LIMIT_BYTES
|
||||
&& ["image/jpeg", "image/png", "image/webp"].includes((file.mimeType || "").toLowerCase());
|
||||
}
|
||||
|
||||
private createArtifactUpload(file: TelegramArtifactFile): FileOptions {
|
||||
return new FileOptions(fs.createReadStream(file.path), {
|
||||
filename: file.fileName,
|
||||
contentType: file.mimeType || "application/octet-stream",
|
||||
});
|
||||
}
|
||||
|
||||
private destroyUpload(upload: FileOptions): void {
|
||||
if ("destroy" in upload.file && typeof upload.file.destroy === "function") {
|
||||
upload.file.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
private async sendArtifactAsDocument(file: TelegramArtifactFile, caption: string): Promise<Message> {
|
||||
return enqueueTelegramApiCall(
|
||||
async () => {
|
||||
const upload = this.createArtifactUpload(file);
|
||||
try {
|
||||
return await bot.sendDocument({
|
||||
chat_id: this.sourceMessage.chat.id,
|
||||
document: upload,
|
||||
caption,
|
||||
reply_parameters: {message_id: this.sourceMessage.message_id},
|
||||
});
|
||||
} finally {
|
||||
this.destroyUpload(upload);
|
||||
}
|
||||
},
|
||||
{
|
||||
method: "sendDocument",
|
||||
chatId: this.sourceMessage.chat.id,
|
||||
chatType: this.sourceMessage.chat.type,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
private async storeArtifactMessage(sent: Message, file: TelegramArtifactFile): Promise<void> {
|
||||
const photo = sent.photo?.[sent.photo.length - 1];
|
||||
const attachmentKind: StoredAttachmentKind = file.kind === "image" ? "image" : "document";
|
||||
const attachment: StoredAttachment = {
|
||||
kind: attachmentKind,
|
||||
fileId: sent.document?.file_id ?? photo?.file_id ?? file.path,
|
||||
fileUniqueId: sent.document?.file_unique_id ?? photo?.file_unique_id,
|
||||
fileName: file.fileName,
|
||||
mimeType: file.mimeType,
|
||||
cachePath: file.path,
|
||||
};
|
||||
|
||||
const stored: StoredMessage = {
|
||||
chatId: sent.chat.id,
|
||||
id: sent.message_id,
|
||||
replyToMessageId: sent.reply_to_message?.message_id ?? this.sourceMessage.message_id,
|
||||
fromId: sent.from?.id ?? 0,
|
||||
text: sent.caption ?? file.fileName,
|
||||
date: sent.date ?? Math.floor(Date.now() / 1000),
|
||||
attachments: [attachment],
|
||||
};
|
||||
|
||||
await MessageStore.put(stored);
|
||||
}
|
||||
|
||||
async finish(removeKeyboard = true): Promise<void> {
|
||||
if (this.timer) clearInterval(this.timer);
|
||||
this.timer = null;
|
||||
|
||||
if (this.cancelled) {
|
||||
await this.flush(removeKeyboard ? this.regenerateKeyboard() : this.keyboard(), true);
|
||||
await this.store();
|
||||
return;
|
||||
}
|
||||
|
||||
if (Environment.SEND_TIME_TOOK) {
|
||||
const diff = Date.now() - this.startedAt;
|
||||
if (this.text.length + 32 < TELEGRAM_LIMIT) this.text += `\n\n⏱️ ${diff}ms`;
|
||||
}
|
||||
|
||||
this.clearStatus();
|
||||
await this.flush(removeKeyboard ? this.regenerateKeyboard() : this.keyboard(), true);
|
||||
|
||||
await this.store();
|
||||
}
|
||||
|
||||
async fail(error: unknown): Promise<void> {
|
||||
if (this.timer) clearInterval(this.timer);
|
||||
this.timer = null;
|
||||
this.status = "";
|
||||
this.text = `${Environment.errorText}\n${error instanceof Error ? error.message : String(error)}`;
|
||||
await this.flush(this.regenerateKeyboard(), true);
|
||||
}
|
||||
|
||||
private async store(): Promise<void> {
|
||||
if (!this.waitMessage) return;
|
||||
try {
|
||||
await MessageStore.put({...this.waitMessage, text: this.visibleText()} as Message);
|
||||
} catch (e) {
|
||||
logError(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,14 @@
|
||||
import {AiProvider} from "./ai-provider";
|
||||
|
||||
export type AiEndpointInfo = {
|
||||
provider?: AiProvider;
|
||||
baseUrl?: string;
|
||||
external?: boolean;
|
||||
};
|
||||
|
||||
export type AiCapabilityInfo = {
|
||||
supported?: boolean,
|
||||
external?: boolean,
|
||||
model?: string
|
||||
model?: string,
|
||||
endpoint?: AiEndpointInfo,
|
||||
};
|
||||
@@ -1,9 +1,14 @@
|
||||
import {AiCapabilityInfo} from "./ai-capability-info";
|
||||
|
||||
export class AiModelCapabilities {
|
||||
vision?: AiCapabilityInfo;
|
||||
ocr?: AiCapabilityInfo;
|
||||
thinking?: AiCapabilityInfo;
|
||||
tools?: AiCapabilityInfo;
|
||||
audio?: AiCapabilityInfo;
|
||||
vision: AiCapabilityInfo | undefined;
|
||||
ocr: AiCapabilityInfo | undefined;
|
||||
thinking: AiCapabilityInfo | undefined;
|
||||
extendedThinking: AiCapabilityInfo | undefined;
|
||||
tools: AiCapabilityInfo | undefined;
|
||||
audio: AiCapabilityInfo | undefined;
|
||||
documents: AiCapabilityInfo | undefined;
|
||||
outputImages: AiCapabilityInfo | undefined;
|
||||
speechToText: AiCapabilityInfo | undefined;
|
||||
textToSpeech: AiCapabilityInfo | undefined;
|
||||
}
|
||||
Reference in New Issue
Block a user