feat: add Ollama audio transcription and runtime config reload

- add audio capability reporting for Ollama models
- support Telegram voice messages via ffmpeg conversion and Ollama transcription
- add USE_SYSTEM_PROMPT toggle and runtime reloading of .env/system prompt settings
- support ollama_options.json for custom Ollama request options
- improve Telegram MarkdownV2 escaping and formatting preservation
- add environment setters for AI provider credentials and models
- show audio capability in info/model commands
This commit is contained in:
2026-05-01 05:09:10 +03:00
parent 382e00ce31
commit 0f91e43ea0
11 changed files with 937 additions and 217 deletions
+1 -1
View File
@@ -45,7 +45,7 @@ export class GeminiChat extends ChatCommand {
}); });
chatMessages.reverse(); chatMessages.reverse();
if (Environment.SYSTEM_PROMPT) { if (Environment.SYSTEM_PROMPT && Environment.USE_SYSTEM_PROMPT) {
chatMessages.unshift({role: "system", content: Environment.SYSTEM_PROMPT}); chatMessages.unshift({role: "system", content: Environment.SYSTEM_PROMPT});
} }
+2 -1
View File
@@ -36,7 +36,8 @@ export class Info extends Command {
`vision${aiModelCapabilities.vision?.external ? "(ext)" : ""}: ${boolToEmoji(aiModelCapabilities.vision?.supported)}\n` + `vision${aiModelCapabilities.vision?.external ? "(ext)" : ""}: ${boolToEmoji(aiModelCapabilities.vision?.supported)}\n` +
`ocr${aiModelCapabilities.ocr?.external ? "(ext)" : ""}: ${boolToEmoji(aiModelCapabilities.ocr?.supported)}\n` + `ocr${aiModelCapabilities.ocr?.external ? "(ext)" : ""}: ${boolToEmoji(aiModelCapabilities.ocr?.supported)}\n` +
`thinking${aiModelCapabilities.thinking?.external ? "(ext)" : ""}: ${boolToEmoji(aiModelCapabilities.thinking?.supported)}\n` + `thinking${aiModelCapabilities.thinking?.external ? "(ext)" : ""}: ${boolToEmoji(aiModelCapabilities.thinking?.supported)}\n` +
`tools${aiModelCapabilities.tools?.external ? "(ext)" : ""}: ${boolToEmoji(aiModelCapabilities.tools?.supported)}` + `tools${aiModelCapabilities.tools?.external ? "(ext)" : ""}: ${boolToEmoji(aiModelCapabilities.tools?.supported)}\n` +
`audio${aiModelCapabilities.audio?.external ? "(ext)": ""}: ${boolToEmoji(aiModelCapabilities.audio?.supported)}` +
"```"; "```";
const cmds = commands.filter(c => !(c instanceof ChatCommand)); const cmds = commands.filter(c => !(c instanceof ChatCommand));
+1 -1
View File
@@ -59,7 +59,7 @@ export class MistralChat extends ChatCommand {
}); });
chatMessages.reverse(); chatMessages.reverse();
if (Environment.SYSTEM_PROMPT) { if (Environment.SYSTEM_PROMPT && Environment.USE_SYSTEM_PROMPT) {
chatMessages.unshift({role: "system", content: [{type: "text", text: Environment.SYSTEM_PROMPT}]}); chatMessages.unshift({role: "system", content: [{type: "text", text: Environment.SYSTEM_PROMPT}]});
} }
+54 -22
View File
@@ -14,6 +14,10 @@ import {Cancel} from "../callback_commands/cancel";
import {OllamaCancel} from "../callback_commands/ollama-cancel"; import {OllamaCancel} from "../callback_commands/ollama-cancel";
import {OllamaGetModel} from "./ollama-get-model"; import {OllamaGetModel} from "./ollama-get-model";
import {ChatCommand} from "../base/chat-command"; import {ChatCommand} from "../base/chat-command";
import {MessagePart} from "../common/message-part";
import {Options} from "ollama";
import fs from "node:fs";
import path from "node:path";
export class OllamaChat extends ChatCommand { export class OllamaChat extends ChatCommand {
command = ["ollamaThink", "ollama"]; command = ["ollamaThink", "ollama"];
@@ -27,25 +31,38 @@ export class OllamaChat extends ChatCommand {
return this.executeOllama(msg, match?.[3], match?.[1]?.toLowerCase()?.startsWith("ollamathink")); return this.executeOllama(msg, match?.[3], match?.[1]?.toLowerCase()?.startsWith("ollamathink"));
} }
async executeOllama(msg: Message, text: string, think: boolean = false): Promise<void> { async executeOllama(msg: Message, text: string, think: boolean = false, voiceB64?: string): Promise<void> {
if (!text || text.trim().length === 0) return; if ((!text || text.trim().length === 0) && !voiceB64) return;
const chatId = msg.chat.id; const chatId = msg.chat.id;
const storedMsg = await MessageStore.get(chatId, msg.message_id); const storedMsg = await MessageStore.get(chatId, msg.message_id);
const messageParts = await collectReplyChainText(storedMsg);
console.log("MESSAGE PARTS", messageParts);
const chatMessages = messageParts.map(part => { let messageParts: MessagePart[] = [];
if (!voiceB64) {
messageParts = await collectReplyChainText(storedMsg);
console.log("MESSAGE PARTS", messageParts);
}
const chatMessages = !voiceB64 ? messageParts.map(part => {
return { return {
role: part.bot ? "assistant" : "user", role: part.bot ? "assistant" : "user",
content: (Environment.USE_NAMES_IN_PROMPT && !part.bot ? `MESSAGE FROM USER "${part.name}":\n` : "") + part.content, content: (Environment.USE_NAMES_IN_PROMPT && !part.bot ? `"${part.name}":\n` : "") + part.content,
images: part.images images: part.images
}; };
}); }) : [
{
role: "user",
content: "Transcribe this audio file",
images: [voiceB64]
}
];
chatMessages.reverse(); chatMessages.reverse();
if (Environment.SYSTEM_PROMPT) { console.log("PARTS", chatMessages);
if (Environment.SYSTEM_PROMPT && !voiceB64 && Environment.USE_SYSTEM_PROMPT) {
chatMessages.unshift({role: "system", content: Environment.SYSTEM_PROMPT, images: []}); chatMessages.unshift({role: "system", content: Environment.SYSTEM_PROMPT, images: []});
} }
@@ -97,16 +114,28 @@ export class OllamaChat extends ChatCommand {
waitMessage = await replyToMessage({ waitMessage = await replyToMessage({
message: msg, message: msg,
text: (!think && imagesCount) ? text: (!think && imagesCount && !voiceB64) ?
imagesCount > 1 ? Environment.analyzingPicturesText : Environment.analyzingPictureText imagesCount > 1 ? Environment.analyzingPicturesText : Environment.analyzingPictureText
: Environment.waitThinkText : voiceB64 ? Environment.transcribingAudioText : Environment.waitThinkText
}); });
let options: Partial<Options> | null = null
try {
const optionsPath = path.join(Environment.DATA_PATH, "ollama_options.json");
if (fs.existsSync(optionsPath)) {
options = JSON.parse(fs.readFileSync(optionsPath).toString());
}
} catch (e) {
logError(e);
}
const stream = await ollama.chat({ const stream = await ollama.chat({
model: think ? Environment.OLLAMA_THINK_MODEL : imagesCount ? Environment.OLLAMA_IMAGE_MODEL : Environment.OLLAMA_MODEL, model: think ? Environment.OLLAMA_THINK_MODEL : imagesCount ? Environment.OLLAMA_IMAGE_MODEL : Environment.OLLAMA_MODEL,
stream: true, stream: true,
think: think, think: think,
messages: chatMessages, messages: chatMessages,
options: options
}); });
const newRequest = { const newRequest = {
@@ -171,7 +200,7 @@ export class OllamaChat extends ChatCommand {
chat_id: chatId, chat_id: chatId,
message_id: waitMessage.message_id, message_id: waitMessage.message_id,
text: "🤔 Размышляю...", text: "🤔 Размышляю...",
parse_mode: "Markdown", parse_mode: "MarkdownV2",
reply_markup: cancelMarkup reply_markup: cancelMarkup
}).catch(logError); }).catch(logError);
} }
@@ -210,29 +239,32 @@ export class OllamaChat extends ChatCommand {
console.log("ended", true); console.log("ended", true);
} }
const diff = Math.abs(Date.now() - startTime) / 1000;
await editor.tick();
await editor.stop();
console.log(`aborted request ${uuid}:`, abortOllamaRequest(uuid));
waitMessage.reply_to_message = msg; waitMessage.reply_to_message = msg;
waitMessage.text = currentText; waitMessage.text = currentText;
await MessageStore.put(waitMessage); await MessageStore.put(waitMessage);
if (Environment.SEND_TIME_TOOK) {
await replyToMessage({message: waitMessage, text: `⏱️ ${diff}s`});
}
break; break;
} }
} }
} finally { } finally {
const diff = Math.abs(Date.now() - startTime) / 1000;
await editor.tick();
await editor.stop();
waitMessage.reply_to_message = msg;
waitMessage.text = currentText;
await MessageStore.put(waitMessage);
if (Environment.SEND_TIME_TOOK) {
await replyToMessage({message: waitMessage, text: `⏱️ ${diff}s`});
}
await bot.editMessageReplyMarkup({ await bot.editMessageReplyMarkup({
chat_id: chatId, chat_id: chatId,
message_id: waitMessage.message_id, message_id: waitMessage.message_id,
reply_markup: {inline_keyboard: []} reply_markup: {inline_keyboard: []}
}).catch(logError); }).catch(logError);
console.log(`aborted request ${uuid}:`, abortOllamaRequest(uuid));
} }
} catch (error) { } catch (error) {
if (error.message.toLowerCase().includes("aborted")) return; if (error.message.toLowerCase().includes("aborted")) return;
+8 -1
View File
@@ -65,8 +65,10 @@ export class OllamaGetModel extends Command {
private getModelText(model: string, info: AiModelCapabilities): string { private getModelText(model: string, info: AiModelCapabilities): string {
return `model: ${model}\n\n` + return `model: ${model}\n\n` +
`vision: ${boolToEmoji(info.vision?.supported)}\n` + `vision: ${boolToEmoji(info.vision?.supported)}\n` +
`ocr: ${boolToEmoji(info.ocr?.supported)}\n` +
`thinking: ${boolToEmoji(info.thinking?.supported)}\n` + `thinking: ${boolToEmoji(info.thinking?.supported)}\n` +
`tools: ${boolToEmoji(info.tools?.supported)}`; `tools: ${boolToEmoji(info.tools?.supported)}\n` +
`audio: ${boolToEmoji(info.audio?.supported)}`;
} }
async getModelCapabilities(model: string = Environment.OLLAMA_MODEL): Promise<AiModelCapabilities | null> { async getModelCapabilities(model: string = Environment.OLLAMA_MODEL): Promise<AiModelCapabilities | null> {
@@ -95,6 +97,11 @@ export class OllamaGetModel extends Command {
external: model !== Environment.OLLAMA_MODEL, external: model !== Environment.OLLAMA_MODEL,
model: model model: model
}, },
audio: {
supported: info.capabilities.includes("audio"),
external: model !== Environment.OLLAMA_MODEL,
model: model
}
}; };
} catch (e) { } catch (e) {
logError(e); logError(e);
+1 -1
View File
@@ -59,7 +59,7 @@ export class OpenAIChat extends ChatCommand {
}); });
chatMessages.reverse(); chatMessages.reverse();
if (Environment.SYSTEM_PROMPT) { if (Environment.SYSTEM_PROMPT && Environment.USE_SYSTEM_PROMPT) {
chatMessages.unshift({ chatMessages.unshift({
role: "system", role: "system",
content: [{type: "input_text", text: Environment.SYSTEM_PROMPT}], content: [{type: "input_text", text: Environment.SYSTEM_PROMPT}],
+70 -13
View File
@@ -24,8 +24,6 @@ export class Environment {
static ANSWERS: Answers; static ANSWERS: Answers;
static USE_NAMES_IN_PROMPT: boolean;
static MAX_PHOTO_SIZE: number; static MAX_PHOTO_SIZE: number;
static PROCESS_LINKS: boolean; static PROCESS_LINKS: boolean;
@@ -37,13 +35,15 @@ export class Environment {
static IMAGE_HANDLE_FALLBACK_POLICY: ImageHandleFallbackPolicy; static IMAGE_HANDLE_FALLBACK_POLICY: ImageHandleFallbackPolicy;
static SYSTEM_PROMPT?: string; static SYSTEM_PROMPT?: string;
static USE_NAMES_IN_PROMPT: boolean;
static USE_SYSTEM_PROMPT: boolean;
static SEND_TIME_TOOK: boolean; static SEND_TIME_TOOK: boolean;
static OLLAMA_API_KEY?: string;
static OLLAMA_ADDRESS?: string; static OLLAMA_ADDRESS?: string;
static OLLAMA_MODEL?: string; static OLLAMA_MODEL?: string;
static OLLAMA_IMAGE_MODEL?: string; static OLLAMA_IMAGE_MODEL?: string;
static OLLAMA_THINK_MODEL?: string; static OLLAMA_THINK_MODEL?: string;
static OLLAMA_API_KEY?: string;
static GEMINI_API_KEY?: string; static GEMINI_API_KEY?: string;
static GEMINI_MODEL: string; static GEMINI_MODEL: string;
@@ -62,6 +62,7 @@ export class Environment {
static waitThinkText = "⏳ Дайте-ка подумать..."; static waitThinkText = "⏳ Дайте-ка подумать...";
static analyzingPictureText = "🔍 Внимательно изучаю изображение..."; static analyzingPictureText = "🔍 Внимательно изучаю изображение...";
static analyzingPicturesText = "🔍 Внимательно изучаю изображения..."; static analyzingPicturesText = "🔍 Внимательно изучаю изображения...";
static transcribingAudioText = "🦻 Внимательно слушаю аудио...";
static genImageText = "👨‍🎨 Генерирую изображение..."; static genImageText = "👨‍🎨 Генерирую изображение...";
static ollamaCancelledText = "```Ollama\n❌ Отменено```"; static ollamaCancelledText = "```Ollama\n❌ Отменено```";
@@ -79,8 +80,6 @@ export class Environment {
Environment.ENABLE_UNSAFE_EVAL = ifTrue(process.env.ENABLE_UNSAFE_EVAL); Environment.ENABLE_UNSAFE_EVAL = ifTrue(process.env.ENABLE_UNSAFE_EVAL);
Environment.USE_NAMES_IN_PROMPT = ifTrue(process.env.USE_NAMES_IN_PROMPT);
Environment.MAX_PHOTO_SIZE = Number(process.env.MAX_PHOTO_SIZE || "1280"); Environment.MAX_PHOTO_SIZE = Number(process.env.MAX_PHOTO_SIZE || "1280");
Environment.PROCESS_LINKS = ifTrue(process.env.PROCESS_LINKS); Environment.PROCESS_LINKS = ifTrue(process.env.PROCESS_LINKS);
@@ -113,13 +112,15 @@ export class Environment {
Environment.IMAGE_HANDLE_FALLBACK_POLICY = ImageHandleFallbackPolicy.NOTIFY_USER; Environment.IMAGE_HANDLE_FALLBACK_POLICY = ImageHandleFallbackPolicy.NOTIFY_USER;
} }
Environment.SEND_TIME_TOOK = ifTrue(process.env.SEND_TOOK_TIME || false); Environment.USE_NAMES_IN_PROMPT = ifTrue(process.env.USE_NAMES_IN_PROMPT);
Environment.USE_SYSTEM_PROMPT = ifTrue(process.env.USE_SYSTEM_PROMPT || "true");
Environment.SEND_TIME_TOOK = ifTrue(process.env.SEND_TOOK_TIME || "false");
Environment.OLLAMA_API_KEY = process.env.OLLAMA_API_KEY;
Environment.OLLAMA_ADDRESS = process.env.OLLAMA_ADDRESS; Environment.OLLAMA_ADDRESS = process.env.OLLAMA_ADDRESS;
Environment.OLLAMA_MODEL = process.env.OLLAMA_MODEL || "gemma3:4b"; Environment.OLLAMA_MODEL = process.env.OLLAMA_MODEL || "gemma3:4b";
Environment.OLLAMA_IMAGE_MODEL = process.env.OLLAMA_IMAGE_MODEL || Environment.OLLAMA_MODEL; Environment.OLLAMA_IMAGE_MODEL = process.env.OLLAMA_IMAGE_MODEL || Environment.OLLAMA_MODEL;
Environment.OLLAMA_THINK_MODEL = process.env.OLLAMA_THINK_MODEL || Environment.OLLAMA_MODEL; Environment.OLLAMA_THINK_MODEL = process.env.OLLAMA_THINK_MODEL || Environment.OLLAMA_MODEL;
Environment.OLLAMA_API_KEY = process.env.OLLAMA_API_KEY;
Environment.GEMINI_API_KEY = process.env.GEMINI_API_KEY; Environment.GEMINI_API_KEY = process.env.GEMINI_API_KEY;
Environment.GEMINI_MODEL = process.env.GEMINI_MODEL || "gemini-2.5-flash-lite"; Environment.GEMINI_MODEL = process.env.GEMINI_MODEL || "gemini-2.5-flash-lite";
@@ -134,10 +135,26 @@ export class Environment {
Environment.OPENAI_IMAGE_MODEL = process.env.OPENAI_IMAGE_MODEL || "gpt-image-1-mini"; Environment.OPENAI_IMAGE_MODEL = process.env.OPENAI_IMAGE_MODEL || "gpt-image-1-mini";
} }
static setSystemPrompt(prompt: string) { static setOnlyForCreatorMode(enable: boolean) {
this.ONLY_FOR_CREATOR_MODE = enable;
}
static setSystemPrompt(prompt: string | undefined) {
this.SYSTEM_PROMPT = prompt; this.SYSTEM_PROMPT = prompt;
} }
static setUseNamesInPrompt(use: boolean) {
this.USE_NAMES_IN_PROMPT = use;
}
static setUseSystemPrompt(use: boolean) {
this.USE_SYSTEM_PROMPT = use;
}
static setSendTimeTook(send: boolean) {
this.SEND_TIME_TOOK = send;
}
static setAdmins(admins: Set<number>) { static setAdmins(admins: Set<number>) {
this.ADMIN_IDS = admins; this.ADMIN_IDS = admins;
} }
@@ -185,19 +202,59 @@ export class Environment {
this.ANSWERS = answers; this.ANSWERS = answers;
} }
static setOllamaModel(newModel: string) { static setOllamaApiKey(key: string) {
Environment.OLLAMA_MODEL = newModel; this.OLLAMA_API_KEY = key;
}
static setOllamaAddress(address: string) {
this.OLLAMA_ADDRESS = address;
}
static setOllamaModel(ollamaModel: string) {
this.OLLAMA_MODEL = ollamaModel;
}
static setOllamaThinkModel(ollamaThinkModel: string) {
this.OLLAMA_THINK_MODEL = ollamaThinkModel;
}
static setOllamaImageModel(ollamaImageModel: string) {
this.OLLAMA_IMAGE_MODEL = ollamaImageModel;
}
static setGeminiApiKey(geminiApiKey: string) {
this.GEMINI_API_KEY = geminiApiKey;
} }
static setGeminiModel(newModel: string) { static setGeminiModel(newModel: string) {
Environment.GEMINI_MODEL = newModel; this.GEMINI_MODEL = newModel;
}
static setGeminiImageModel(newImageModel: string) {
this.GEMINI_IMAGE_MODEL = newImageModel;
}
static setMistralApiKey(newMistralApiKey: string) {
this.MISTRAL_API_KEY = newMistralApiKey;
} }
static setMistralModel(newModel: string) { static setMistralModel(newModel: string) {
Environment.MISTRAL_MODEL = newModel; this.MISTRAL_MODEL = newModel;
}
static setOpenAIBaseUrl(newAIBaseUrl: string) {
this.OPENAI_BASE_URL = newAIBaseUrl;
}
static setOpenAIApiKey(newAIApiKey: string) {
this.OPENAI_API_KEY = newAIApiKey;
} }
static setOpenAIModel(newModel: string) { static setOpenAIModel(newModel: string) {
Environment.OPENAI_MODEL = newModel; this.OPENAI_MODEL = newModel;
}
static setOpenAIImageModel(newImageModel: string) {
this.OPENAI_IMAGE_MODEL = newImageModel;
} }
} }
-14
View File
@@ -2,7 +2,6 @@ import * as fs from "fs";
import {Environment} from "../common/environment"; import {Environment} from "../common/environment";
import {logError} from "../util/utils"; import {logError} from "../util/utils";
import {Answers} from "../model/answers"; import {Answers} from "../model/answers";
import path from "node:path";
type DataJsonFile = { type DataJsonFile = {
admins: number[] admins: number[]
@@ -28,19 +27,6 @@ export async function readData(): Promise<void> {
} }
} }
export async function readPrompts(): Promise<void> {
try {
const prompt = fs.readFileSync(path.join(Environment.DATA_PATH, "system_prompt.txt")).toString().trim();
if (prompt.length) {
Environment.setSystemPrompt(prompt);
}
} catch (e) {
logError(e);
}
return Promise.resolve();
}
export async function saveData(): Promise<void> { export async function saveData(): Promise<void> {
const adminIds: number[] = []; const adminIds: number[] = [];
Environment.ADMIN_IDS.forEach(id => adminIds.push(id)); Environment.ADMIN_IDS.forEach(id => adminIds.push(id));
+3 -5
View File
@@ -20,7 +20,7 @@ import {Ping} from "./commands/ping";
import {RandomString} from "./commands/random-string"; import {RandomString} from "./commands/random-string";
import {SystemInfo} from "./commands/system-info"; import {SystemInfo} from "./commands/system-info";
import {Test} from "./commands/test"; import {Test} from "./commands/test";
import {readData, readPrompts, retrieveAnswers} from "./db/database"; import {readData, retrieveAnswers} from "./db/database";
import {Uptime} from "./commands/uptime"; import {Uptime} from "./commands/uptime";
import {WhatBetter} from "./commands/what-better"; import {WhatBetter} from "./commands/what-better";
import {When} from "./commands/when"; import {When} from "./commands/when";
@@ -183,7 +183,7 @@ export const callbackCommands: CallbackCommand[] = [
new YtInfo() new YtInfo()
]; ];
if (Environment.OLLAMA_ADDRESS && Environment.OLLAMA_MODEL && Environment.SYSTEM_PROMPT) { if (Environment.OLLAMA_ADDRESS && Environment.OLLAMA_MODEL) {
commands.push( commands.push(
new OllamaChat(), new OllamaChat(),
new OllamaPrompt(), new OllamaPrompt(),
@@ -252,10 +252,8 @@ async function shutdown(signal: NodeJS.Signals) {
async function main() { async function main() {
const start = Date.now(); const start = Date.now();
await readPrompts();
console.log(Environment.SYSTEM_PROMPT); console.log(Environment.SYSTEM_PROMPT);
console.log( console.log(
`TEST_ENVIRONMENT: ${Environment.TEST_ENVIRONMENT}\n` + `TEST_ENVIRONMENT: ${Environment.TEST_ENVIRONMENT}\n` +
`DATA_PATH: ${Environment.DATA_PATH}\n` + `DATA_PATH: ${Environment.DATA_PATH}\n` +
+1
View File
@@ -5,4 +5,5 @@ export class AiModelCapabilities {
ocr?: AiCapabilityInfo; ocr?: AiCapabilityInfo;
thinking?: AiCapabilityInfo; thinking?: AiCapabilityInfo;
tools?: AiCapabilityInfo; tools?: AiCapabilityInfo;
audio?: AiCapabilityInfo;
} }
+796 -158
View File
File diff suppressed because it is too large Load Diff