Files
tg-chat-bot/src/ai/telegram-attachments.ts
T
2026-05-13 12:05:55 +03:00

254 lines
9.9 KiB
TypeScript

import {Message} from "typescript-telegram-bot-api";
import {bot} from "../index";
import {downloadTelegramFile, logError} from "../util/utils";
import fs from "node:fs";
import path from "node:path";
import {Environment} from "../common/environment";
import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment";
import {performFFmpeg} from "../util/ffmpeg";
import ffmpeg from "fluent-ffmpeg";
import {AsyncSemaphore, KeyedAsyncLock} from "../util/async-lock";
import {appLogger} from "../logging/logger";
export type AiDownloadedFile = {
kind: StoredAttachmentKind;
fileId: string;
fileName: string;
mimeType?: string;
buffer: Buffer;
path: string;
};
const cachePathLocks = new KeyedAsyncLock();
const ffmpegSemaphore = new AsyncSemaphore(2);
const logger = appLogger.child("attachments");
function safeFileName(value: string): string {
return value.replace(/[\\/:*?"<>|\u0000-\u001F]/g, "_").slice(0, 180);
}
function extensionFromMimeType(mimeType?: string): string {
switch ((mimeType || "").toLowerCase()) {
case "audio/ogg":
case "audio/opus":
return ".ogg";
case "audio/mpeg":
case "audio/mp3":
return ".mp3";
case "audio/mp4":
case "audio/x-m4a":
return ".m4a";
case "audio/wav":
case "audio/wave":
case "audio/x-wav":
return ".wav";
case "audio/webm":
return ".webm";
case "image/jpeg":
return ".jpg";
case "image/png":
return ".png";
case "image/webp":
return ".webp";
case "application/pdf":
return ".pdf";
case "text/plain":
return ".txt";
case "application/zip":
case "application/x-zip":
case "application/x-zip-compressed":
return ".zip";
case "application/x-tar":
case "application/tar":
return ".tar";
case "application/gzip":
case "application/x-gzip":
case "application/gzip-compressed":
return ".gz";
case "video/mp4":
return ".mp4";
default:
return "";
}
}
function fileNameWithExtension(fileName: string, mimeType?: string, telegramFilePath?: string): string {
if (path.extname(fileName)) return fileName;
const telegramExt = telegramFilePath ? path.extname(telegramFilePath) : "";
const ext = telegramExt || extensionFromMimeType(mimeType);
return ext ? `${fileName}${ext}` : fileName;
}
function cacheDirFor(kind: StoredAttachmentKind): string {
const dirName = kind === "image" ? "photo" : kind;
return path.join(Environment.DATA_PATH, "cache", dirName);
}
function cachePathFor(kind: StoredAttachmentKind, fileUniqueId: string | undefined, fileId: string, fileName: string): string {
const base = safeFileName(fileUniqueId || fileId);
const ext = path.extname(fileName);
return path.join(cacheDirFor(kind), `${base}${ext || ""}`);
}
async function downloadToCache(kind: StoredAttachmentKind, fileId: string, fileName: string, mimeType?: string, fileUniqueId?: string): Promise<StoredAttachment | null> {
const startedAt = Date.now();
logger.debug("download.start", {kind, fileId, fileName, mimeType});
const file = await bot.getFile({file_id: fileId});
const finalFileName = fileNameWithExtension(fileName, mimeType, file.file_path);
const location = cachePathFor(kind, fileUniqueId, fileId, finalFileName);
await cachePathLocks.runExclusive(location, async () => {
if (fs.existsSync(location)) {
logger.trace("download.cache_hit", {kind, location});
return;
}
const buffer = await downloadTelegramFile(file.file_path);
if (!buffer) {
logger.warn("download.empty", {kind, fileId, telegramFilePath: file.file_path});
return;
}
const tempLocation = `${location}.${process.pid}.${Date.now()}.tmp`;
fs.mkdirSync(path.dirname(location), {recursive: true});
fs.writeFileSync(tempLocation, buffer);
fs.renameSync(tempLocation, location);
logger.debug("download.saved", {kind, location, bytes: buffer.length, duration: logger.duration(startedAt)});
});
return {kind, fileId, fileUniqueId, fileName: finalFileName, mimeType, cachePath: location};
}
async function convertAudioToWav(input: string, output: string, noVideo = false): Promise<void> {
const startedAt = Date.now();
logger.debug("audio.convert.start", {input, output, noVideo});
await cachePathLocks.runExclusive(output, async () => {
if (fs.existsSync(output)) {
logger.trace("audio.convert.cache_hit", {output});
return;
}
await ffmpegSemaphore.runExclusive(async () => {
if (fs.existsSync(output)) {
logger.trace("audio.convert.cache_hit", {output});
return;
}
const tempOutput = `${output}.${process.pid}.${Date.now()}.tmp.wav`;
try {
await performFFmpeg(() => {
const command = ffmpeg(input);
if (noVideo) command.noVideo();
return command
.toFormat("wav")
.save(tempOutput)
.on("progress", (progress) => {
logger.trace("audio.convert.progress", {input, output, progress});
});
});
fs.renameSync(tempOutput, output);
logger.debug("audio.convert.done", {input, output, duration: logger.duration(startedAt)});
} catch (e) {
if (fs.existsSync(tempOutput)) {
fs.rmSync(tempOutput, {force: true});
}
logger.error("audio.convert.failed", {input, output, error: e});
throw e;
}
});
});
}
export async function cacheMessageAttachments(msg: Message): Promise<StoredAttachment[]> {
const startedAt = Date.now();
const result: StoredAttachment[] = [];
logger.debug("message.cache.start", {chatId: msg.chat?.id, messageId: msg.message_id});
try {
if (msg.photo?.length) {
const size = msg.photo[msg.photo.length - 1]!;
const file = await downloadToCache("image", size.file_id, `${size.file_unique_id || size.file_id}.jpg`, "image/jpeg", size.file_unique_id);
if (file) result.push(file);
}
if (msg.document) {
const doc = msg.document;
const kind: StoredAttachmentKind = doc.mime_type?.startsWith("image/")
? "image"
: doc.mime_type?.startsWith("audio/")
? "audio"
: "document";
const file = await downloadToCache(kind, doc.file_id, doc.file_name || `${doc.file_unique_id || doc.file_id}`, doc.mime_type, doc.file_unique_id);
if (file) result.push(file);
}
if (msg.voice) {
const file = await downloadToCache("audio", msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.ogg`, msg.voice.mime_type || "audio/ogg", msg.voice.file_unique_id);
if (file) {
const output = cachePathFor("audio", msg.voice.file_unique_id, msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.wav`);
try {
await convertAudioToWav(file.cachePath, output);
file.cachePath = output;
file.fileName = file?.fileName?.replace(".ogg", ".wav");
file.mimeType = "audio/wav";
} catch (e) {
logError(e);
}
}
if (file) result.push(file);
}
if (msg.audio) {
const file = await downloadToCache("audio", msg.audio.file_id, msg.audio.file_name || `${msg.audio.file_unique_id || msg.audio.file_id}.mp3`, msg.audio.mime_type, msg.audio.file_unique_id);
if (file) result.push(file);
}
if (msg.video_note) {
const file = await downloadToCache("video-note", msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.mp4`, "video/mp4", msg.video_note.file_unique_id);
if (file) {
const output = cachePathFor("audio", msg.video_note.file_unique_id, msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.wav`);
try {
await convertAudioToWav(file.cachePath, output, true);
file.cachePath = output;
file.fileName = file?.fileName?.replace(".mp4", ".wav");
file.mimeType = "audio/wav";
} catch (e) {
logError(e);
}
}
if (file) result.push(file);
}
} catch (e) {
logError(e);
}
logger.debug("message.cache.done", {chatId: msg.chat?.id, messageId: msg.message_id, attachments: result.length, duration: logger.duration(startedAt)});
return result;
}
export function attachmentsToDownloadedFiles(attachments: StoredAttachment[]): AiDownloadedFile[] {
logger.trace("downloaded_files.build", {attachments: attachments.length});
return attachments
.filter(attachment => fs.existsSync(attachment.cachePath))
.map(attachment => ({
kind: attachment.kind,
fileId: attachment.fileId,
fileName: attachment.fileName,
mimeType: attachment.mimeType,
buffer: fs.readFileSync(attachment.cachePath),
path: attachment.cachePath,
}));
}
export function cleanupDownloads(files: AiDownloadedFile[]): void {
logger.trace("downloaded_files.cleanup", {files: files.length});
// Files stay on disk in the message cache; drop in-memory buffers eagerly.
for (const file of files) {
file.buffer = Buffer.alloc(0);
}
files.length = 0;
}