254 lines
9.9 KiB
TypeScript
254 lines
9.9 KiB
TypeScript
import {Message} from "typescript-telegram-bot-api";
|
|
import {bot} from "../index";
|
|
import {downloadTelegramFile, logError} from "../util/utils";
|
|
import fs from "node:fs";
|
|
import path from "node:path";
|
|
import {Environment} from "../common/environment";
|
|
import {StoredAttachment, StoredAttachmentKind} from "../model/stored-attachment";
|
|
import {performFFmpeg} from "../util/ffmpeg";
|
|
import ffmpeg from "fluent-ffmpeg";
|
|
import {AsyncSemaphore, KeyedAsyncLock} from "../util/async-lock";
|
|
import {appLogger} from "../logging/logger";
|
|
|
|
export type AiDownloadedFile = {
|
|
kind: StoredAttachmentKind;
|
|
fileId: string;
|
|
fileName: string;
|
|
mimeType?: string;
|
|
buffer: Buffer;
|
|
path: string;
|
|
};
|
|
|
|
const cachePathLocks = new KeyedAsyncLock();
|
|
const ffmpegSemaphore = new AsyncSemaphore(2);
|
|
const logger = appLogger.child("attachments");
|
|
|
|
function safeFileName(value: string): string {
|
|
return value.replace(/[\\/:*?"<>|\u0000-\u001F]/g, "_").slice(0, 180);
|
|
}
|
|
|
|
function extensionFromMimeType(mimeType?: string): string {
|
|
switch ((mimeType || "").toLowerCase()) {
|
|
case "audio/ogg":
|
|
case "audio/opus":
|
|
return ".ogg";
|
|
case "audio/mpeg":
|
|
case "audio/mp3":
|
|
return ".mp3";
|
|
case "audio/mp4":
|
|
case "audio/x-m4a":
|
|
return ".m4a";
|
|
case "audio/wav":
|
|
case "audio/wave":
|
|
case "audio/x-wav":
|
|
return ".wav";
|
|
case "audio/webm":
|
|
return ".webm";
|
|
case "image/jpeg":
|
|
return ".jpg";
|
|
case "image/png":
|
|
return ".png";
|
|
case "image/webp":
|
|
return ".webp";
|
|
case "application/pdf":
|
|
return ".pdf";
|
|
case "text/plain":
|
|
return ".txt";
|
|
case "application/zip":
|
|
case "application/x-zip":
|
|
case "application/x-zip-compressed":
|
|
return ".zip";
|
|
case "application/x-tar":
|
|
case "application/tar":
|
|
return ".tar";
|
|
case "application/gzip":
|
|
case "application/x-gzip":
|
|
case "application/gzip-compressed":
|
|
return ".gz";
|
|
case "video/mp4":
|
|
return ".mp4";
|
|
default:
|
|
return "";
|
|
}
|
|
}
|
|
|
|
function fileNameWithExtension(fileName: string, mimeType?: string, telegramFilePath?: string): string {
|
|
if (path.extname(fileName)) return fileName;
|
|
|
|
const telegramExt = telegramFilePath ? path.extname(telegramFilePath) : "";
|
|
const ext = telegramExt || extensionFromMimeType(mimeType);
|
|
return ext ? `${fileName}${ext}` : fileName;
|
|
}
|
|
|
|
function cacheDirFor(kind: StoredAttachmentKind): string {
|
|
const dirName = kind === "image" ? "photo" : kind;
|
|
return path.join(Environment.DATA_PATH, "cache", dirName);
|
|
}
|
|
|
|
function cachePathFor(kind: StoredAttachmentKind, fileUniqueId: string | undefined, fileId: string, fileName: string): string {
|
|
const base = safeFileName(fileUniqueId || fileId);
|
|
const ext = path.extname(fileName);
|
|
return path.join(cacheDirFor(kind), `${base}${ext || ""}`);
|
|
}
|
|
|
|
async function downloadToCache(kind: StoredAttachmentKind, fileId: string, fileName: string, mimeType?: string, fileUniqueId?: string): Promise<StoredAttachment | null> {
|
|
const startedAt = Date.now();
|
|
logger.debug("download.start", {kind, fileId, fileName, mimeType});
|
|
const file = await bot.getFile({file_id: fileId});
|
|
const finalFileName = fileNameWithExtension(fileName, mimeType, file.file_path);
|
|
const location = cachePathFor(kind, fileUniqueId, fileId, finalFileName);
|
|
|
|
await cachePathLocks.runExclusive(location, async () => {
|
|
if (fs.existsSync(location)) {
|
|
logger.trace("download.cache_hit", {kind, location});
|
|
return;
|
|
}
|
|
|
|
const buffer = await downloadTelegramFile(file.file_path);
|
|
if (!buffer) {
|
|
logger.warn("download.empty", {kind, fileId, telegramFilePath: file.file_path});
|
|
return;
|
|
}
|
|
|
|
const tempLocation = `${location}.${process.pid}.${Date.now()}.tmp`;
|
|
fs.mkdirSync(path.dirname(location), {recursive: true});
|
|
fs.writeFileSync(tempLocation, buffer);
|
|
fs.renameSync(tempLocation, location);
|
|
logger.debug("download.saved", {kind, location, bytes: buffer.length, duration: logger.duration(startedAt)});
|
|
});
|
|
|
|
return {kind, fileId, fileUniqueId, fileName: finalFileName, mimeType, cachePath: location};
|
|
}
|
|
|
|
async function convertAudioToWav(input: string, output: string, noVideo = false): Promise<void> {
|
|
const startedAt = Date.now();
|
|
logger.debug("audio.convert.start", {input, output, noVideo});
|
|
await cachePathLocks.runExclusive(output, async () => {
|
|
if (fs.existsSync(output)) {
|
|
logger.trace("audio.convert.cache_hit", {output});
|
|
return;
|
|
}
|
|
|
|
await ffmpegSemaphore.runExclusive(async () => {
|
|
if (fs.existsSync(output)) {
|
|
logger.trace("audio.convert.cache_hit", {output});
|
|
return;
|
|
}
|
|
|
|
const tempOutput = `${output}.${process.pid}.${Date.now()}.tmp.wav`;
|
|
try {
|
|
await performFFmpeg(() => {
|
|
const command = ffmpeg(input);
|
|
if (noVideo) command.noVideo();
|
|
return command
|
|
.toFormat("wav")
|
|
.save(tempOutput)
|
|
.on("progress", (progress) => {
|
|
logger.trace("audio.convert.progress", {input, output, progress});
|
|
});
|
|
});
|
|
fs.renameSync(tempOutput, output);
|
|
logger.debug("audio.convert.done", {input, output, duration: logger.duration(startedAt)});
|
|
} catch (e) {
|
|
if (fs.existsSync(tempOutput)) {
|
|
fs.rmSync(tempOutput, {force: true});
|
|
}
|
|
logger.error("audio.convert.failed", {input, output, error: e});
|
|
throw e;
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
export async function cacheMessageAttachments(msg: Message): Promise<StoredAttachment[]> {
|
|
const startedAt = Date.now();
|
|
const result: StoredAttachment[] = [];
|
|
logger.debug("message.cache.start", {chatId: msg.chat?.id, messageId: msg.message_id});
|
|
|
|
try {
|
|
if (msg.photo?.length) {
|
|
const size = msg.photo[msg.photo.length - 1]!;
|
|
const file = await downloadToCache("image", size.file_id, `${size.file_unique_id || size.file_id}.jpg`, "image/jpeg", size.file_unique_id);
|
|
if (file) result.push(file);
|
|
}
|
|
|
|
if (msg.document) {
|
|
const doc = msg.document;
|
|
const kind: StoredAttachmentKind = doc.mime_type?.startsWith("image/")
|
|
? "image"
|
|
: doc.mime_type?.startsWith("audio/")
|
|
? "audio"
|
|
: "document";
|
|
const file = await downloadToCache(kind, doc.file_id, doc.file_name || `${doc.file_unique_id || doc.file_id}`, doc.mime_type, doc.file_unique_id);
|
|
if (file) result.push(file);
|
|
}
|
|
|
|
if (msg.voice) {
|
|
const file = await downloadToCache("audio", msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.ogg`, msg.voice.mime_type || "audio/ogg", msg.voice.file_unique_id);
|
|
if (file) {
|
|
const output = cachePathFor("audio", msg.voice.file_unique_id, msg.voice.file_id, `${msg.voice.file_unique_id || msg.voice.file_id}.wav`);
|
|
try {
|
|
await convertAudioToWav(file.cachePath, output);
|
|
file.cachePath = output;
|
|
file.fileName = file?.fileName?.replace(".ogg", ".wav");
|
|
file.mimeType = "audio/wav";
|
|
} catch (e) {
|
|
logError(e);
|
|
}
|
|
}
|
|
|
|
if (file) result.push(file);
|
|
}
|
|
|
|
if (msg.audio) {
|
|
const file = await downloadToCache("audio", msg.audio.file_id, msg.audio.file_name || `${msg.audio.file_unique_id || msg.audio.file_id}.mp3`, msg.audio.mime_type, msg.audio.file_unique_id);
|
|
if (file) result.push(file);
|
|
}
|
|
|
|
if (msg.video_note) {
|
|
const file = await downloadToCache("video-note", msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.mp4`, "video/mp4", msg.video_note.file_unique_id);
|
|
if (file) {
|
|
const output = cachePathFor("audio", msg.video_note.file_unique_id, msg.video_note.file_id, `${msg.video_note.file_unique_id || msg.video_note.file_id}.wav`);
|
|
try {
|
|
await convertAudioToWav(file.cachePath, output, true);
|
|
file.cachePath = output;
|
|
file.fileName = file?.fileName?.replace(".mp4", ".wav");
|
|
file.mimeType = "audio/wav";
|
|
} catch (e) {
|
|
logError(e);
|
|
}
|
|
}
|
|
|
|
if (file) result.push(file);
|
|
}
|
|
} catch (e) {
|
|
logError(e);
|
|
}
|
|
|
|
logger.debug("message.cache.done", {chatId: msg.chat?.id, messageId: msg.message_id, attachments: result.length, duration: logger.duration(startedAt)});
|
|
return result;
|
|
}
|
|
|
|
export function attachmentsToDownloadedFiles(attachments: StoredAttachment[]): AiDownloadedFile[] {
|
|
logger.trace("downloaded_files.build", {attachments: attachments.length});
|
|
return attachments
|
|
.filter(attachment => fs.existsSync(attachment.cachePath))
|
|
.map(attachment => ({
|
|
kind: attachment.kind,
|
|
fileId: attachment.fileId,
|
|
fileName: attachment.fileName,
|
|
mimeType: attachment.mimeType,
|
|
buffer: fs.readFileSync(attachment.cachePath),
|
|
path: attachment.cachePath,
|
|
}));
|
|
}
|
|
|
|
export function cleanupDownloads(files: AiDownloadedFile[]): void {
|
|
logger.trace("downloaded_files.cleanup", {files: files.length});
|
|
// Files stay on disk in the message cache; drop in-memory buffers eagerly.
|
|
for (const file of files) {
|
|
file.buffer = Buffer.alloc(0);
|
|
}
|
|
files.length = 0;
|
|
}
|