feat: add Ollama audio transcription and runtime config reload
- add audio capability reporting for Ollama models - support Telegram voice messages via ffmpeg conversion and Ollama transcription - add USE_SYSTEM_PROMPT toggle and runtime reloading of .env/system prompt settings - support ollama_options.json for custom Ollama request options - improve Telegram MarkdownV2 escaping and formatting preservation - add environment setters for AI provider credentials and models - show audio capability in info/model commands
This commit is contained in:
+54
-22
@@ -14,6 +14,10 @@ import {Cancel} from "../callback_commands/cancel";
|
||||
import {OllamaCancel} from "../callback_commands/ollama-cancel";
|
||||
import {OllamaGetModel} from "./ollama-get-model";
|
||||
import {ChatCommand} from "../base/chat-command";
|
||||
import {MessagePart} from "../common/message-part";
|
||||
import {Options} from "ollama";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
export class OllamaChat extends ChatCommand {
|
||||
command = ["ollamaThink", "ollama"];
|
||||
@@ -27,25 +31,38 @@ export class OllamaChat extends ChatCommand {
|
||||
return this.executeOllama(msg, match?.[3], match?.[1]?.toLowerCase()?.startsWith("ollamathink"));
|
||||
}
|
||||
|
||||
async executeOllama(msg: Message, text: string, think: boolean = false): Promise<void> {
|
||||
if (!text || text.trim().length === 0) return;
|
||||
async executeOllama(msg: Message, text: string, think: boolean = false, voiceB64?: string): Promise<void> {
|
||||
if ((!text || text.trim().length === 0) && !voiceB64) return;
|
||||
|
||||
const chatId = msg.chat.id;
|
||||
|
||||
const storedMsg = await MessageStore.get(chatId, msg.message_id);
|
||||
const messageParts = await collectReplyChainText(storedMsg);
|
||||
console.log("MESSAGE PARTS", messageParts);
|
||||
|
||||
const chatMessages = messageParts.map(part => {
|
||||
let messageParts: MessagePart[] = [];
|
||||
|
||||
if (!voiceB64) {
|
||||
messageParts = await collectReplyChainText(storedMsg);
|
||||
console.log("MESSAGE PARTS", messageParts);
|
||||
}
|
||||
|
||||
const chatMessages = !voiceB64 ? messageParts.map(part => {
|
||||
return {
|
||||
role: part.bot ? "assistant" : "user",
|
||||
content: (Environment.USE_NAMES_IN_PROMPT && !part.bot ? `MESSAGE FROM USER "${part.name}":\n` : "") + part.content,
|
||||
content: (Environment.USE_NAMES_IN_PROMPT && !part.bot ? `"${part.name}":\n` : "") + part.content,
|
||||
images: part.images
|
||||
};
|
||||
});
|
||||
}) : [
|
||||
{
|
||||
role: "user",
|
||||
content: "Transcribe this audio file",
|
||||
images: [voiceB64]
|
||||
}
|
||||
];
|
||||
chatMessages.reverse();
|
||||
|
||||
if (Environment.SYSTEM_PROMPT) {
|
||||
console.log("PARTS", chatMessages);
|
||||
|
||||
if (Environment.SYSTEM_PROMPT && !voiceB64 && Environment.USE_SYSTEM_PROMPT) {
|
||||
chatMessages.unshift({role: "system", content: Environment.SYSTEM_PROMPT, images: []});
|
||||
}
|
||||
|
||||
@@ -97,16 +114,28 @@ export class OllamaChat extends ChatCommand {
|
||||
|
||||
waitMessage = await replyToMessage({
|
||||
message: msg,
|
||||
text: (!think && imagesCount) ?
|
||||
text: (!think && imagesCount && !voiceB64) ?
|
||||
imagesCount > 1 ? Environment.analyzingPicturesText : Environment.analyzingPictureText
|
||||
: Environment.waitThinkText
|
||||
: voiceB64 ? Environment.transcribingAudioText : Environment.waitThinkText
|
||||
});
|
||||
|
||||
let options: Partial<Options> | null = null
|
||||
try {
|
||||
const optionsPath = path.join(Environment.DATA_PATH, "ollama_options.json");
|
||||
|
||||
if (fs.existsSync(optionsPath)) {
|
||||
options = JSON.parse(fs.readFileSync(optionsPath).toString());
|
||||
}
|
||||
} catch (e) {
|
||||
logError(e);
|
||||
}
|
||||
|
||||
const stream = await ollama.chat({
|
||||
model: think ? Environment.OLLAMA_THINK_MODEL : imagesCount ? Environment.OLLAMA_IMAGE_MODEL : Environment.OLLAMA_MODEL,
|
||||
stream: true,
|
||||
think: think,
|
||||
messages: chatMessages,
|
||||
options: options
|
||||
});
|
||||
|
||||
const newRequest = {
|
||||
@@ -171,7 +200,7 @@ export class OllamaChat extends ChatCommand {
|
||||
chat_id: chatId,
|
||||
message_id: waitMessage.message_id,
|
||||
text: "🤔 Размышляю...",
|
||||
parse_mode: "Markdown",
|
||||
parse_mode: "MarkdownV2",
|
||||
reply_markup: cancelMarkup
|
||||
}).catch(logError);
|
||||
}
|
||||
@@ -210,29 +239,32 @@ export class OllamaChat extends ChatCommand {
|
||||
console.log("ended", true);
|
||||
}
|
||||
|
||||
const diff = Math.abs(Date.now() - startTime) / 1000;
|
||||
|
||||
await editor.tick();
|
||||
await editor.stop();
|
||||
|
||||
console.log(`aborted request ${uuid}:`, abortOllamaRequest(uuid));
|
||||
|
||||
waitMessage.reply_to_message = msg;
|
||||
waitMessage.text = currentText;
|
||||
await MessageStore.put(waitMessage);
|
||||
|
||||
if (Environment.SEND_TIME_TOOK) {
|
||||
await replyToMessage({message: waitMessage, text: `⏱️ ${diff}s`});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
const diff = Math.abs(Date.now() - startTime) / 1000;
|
||||
|
||||
await editor.tick();
|
||||
await editor.stop();
|
||||
|
||||
waitMessage.reply_to_message = msg;
|
||||
waitMessage.text = currentText;
|
||||
await MessageStore.put(waitMessage);
|
||||
|
||||
if (Environment.SEND_TIME_TOOK) {
|
||||
await replyToMessage({message: waitMessage, text: `⏱️ ${diff}s`});
|
||||
}
|
||||
|
||||
await bot.editMessageReplyMarkup({
|
||||
chat_id: chatId,
|
||||
message_id: waitMessage.message_id,
|
||||
reply_markup: {inline_keyboard: []}
|
||||
}).catch(logError);
|
||||
console.log(`aborted request ${uuid}:`, abortOllamaRequest(uuid));
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.message.toLowerCase().includes("aborted")) return;
|
||||
|
||||
Reference in New Issue
Block a user