export type TelegramRenderMode = "draft" | "final"; export interface TelegramMarkdownV2RenderOptions { /** * draft: * - useful for streaming/editMessageText * - temporarily closes unfinished code blocks / inline code / bold * * final: * - use after LLM finished generation */ mode?: TelegramRenderMode; /** * Used when the rendered message is empty. */ fallbackText?: string; } /** * Main function. * * Flow: * LLM Markdown-lite * -> draft safety, if needed * -> normalize unsupported Markdown * -> parse Markdown-lite * -> render valid Telegram MarkdownV2 */ export function prepareTelegramMarkdownV2( input: string, options: TelegramMarkdownV2RenderOptions = {}, ): string { const mode = options.mode ?? "final"; const fallbackText = options.fallbackText ?? "…"; try { const safeInput = mode === "draft" ? makePartialMarkdownLiteSafe(input) : input; const normalized = normalizeUnsupportedMarkdown(safeInput); const ast = parseMarkdownLite(normalized); const rendered = renderMarkdownV2(ast).trim(); return rendered || escapeMarkdownV2Text(fallbackText); } catch { const fallback = escapeMarkdownV2Text(input).trim(); return fallback || escapeMarkdownV2Text(fallbackText); } } /** * Useful for editMessageText fallback. */ export function prepareTelegramPlainMarkdownV2(input: string, fallbackText = "…"): string { const escaped = escapeMarkdownV2Text(input).trim(); return escaped || escapeMarkdownV2Text(fallbackText); } /** * Draft-safe mode for streaming. * * Fixes cases like: * * ```ts * const x = * * or: * * *partial bold * * or: * * `partial code */ export function makePartialMarkdownLiteSafe(input: string): string { let text = input.replace(/\r\n?/g, "\n"); if (isInsideFencedCodeBlock(text)) { return closeUnclosedFencedCodeBlock(text); } return transformOutsideFencedCode(text, (outside) => { let result = outside; result = closeUnclosedInlineCode(result); result = closeUnclosedBold(result); return result; }); } /** * Converts unsupported / annoying Markdown into simpler Markdown-lite. * * Does not transform fenced code blocks. */ export function normalizeUnsupportedMarkdown(input: string): string { const text = input.replace(/\r\n?/g, "\n").trim(); return transformOutsideFencedCode(text, (raw) => { let result = raw; result = normalizeMarkdownTables(result); result = result // Images: ![alt](url) -> [alt](url) .replace(/!\[([^\]\n]*)]\(([^)\n]+)\)/g, "[$1]($2)") // Common Markdown bold -> Markdown-lite bold .replace(/\*\*([^*\n]+)\*\*/g, "*$1*") .replace(/__([^_\n]+)__/g, "*$1*") .replace(/^`([^`\n]+)$/gm, (_, title: string) => { const cleanTitle = title.trim(); return cleanTitle ? `*${cleanTitle}*` : ""; }) // Headings -> bold labels .replace(/^#{1,6}\s+(.+)$/gm, (_, title: string) => { const cleanTitle = title .replace(/[*_`[\]()~>#+\-=|{}.!]/g, "") .trim(); return cleanTitle ? `*${cleanTitle}*` : ""; }) // Horizontal rules .replace(/^\s*(-{3,}|\*{3,}|_{3,})\s*$/gm, "") // Task lists -> normal bullets .replace(/^(\s*)[-*]\s+\[[ xX]]\s+/gm, "$1- ") // HTML line breaks -> newline .replace(//gi, "\n") // Strip simple raw HTML tags, keep content .replace(/<\/?(?:p|div|span|strong|b|em|i|u|s|del|code|pre)[^>]*>/gi, "") // Too many blank lines .replace(/\n{3,}/g, "\n\n"); return result.trim(); }); } /** * AST */ type InlineNode = | { type: "text"; value: string } | { type: "bold"; children: InlineNode[] } | { type: "code"; value: string } | { type: "link"; text: string; url: string }; type BlockNode = | { type: "paragraph"; children: InlineNode[] } | { type: "pre"; lang?: string; value: string } | { type: "quote"; lines: InlineNode[][] }; /** * Block parser: * - fenced code blocks * - quotes * - paragraphs */ export function parseMarkdownLite(input: string): BlockNode[] { const lines = input.replace(/\r\n?/g, "\n").split("\n"); const blocks: BlockNode[] = []; let i = 0; while (i < lines.length) { const line = lines[i]; if (!line.trim()) { i++; continue; } const fenceStart = line.match(/^```\s*([^`]*)\s*$/); if (fenceStart) { const lang = sanitizeCodeLanguage(fenceStart[1]); const body: string[] = []; i++; while (i < lines.length && !/^```\s*$/.test(lines[i])) { body.push(lines[i]); i++; } if (i < lines.length) { i++; } blocks.push({ type: "pre", lang, value: body.join("\n"), }); continue; } if (/^\s*>\s?/.test(line)) { const quoteLines: InlineNode[][] = []; while (i < lines.length && /^\s*>\s?/.test(lines[i])) { const quoteLine = lines[i].replace(/^\s*>\s?/, ""); quoteLines.push(parseInlineMarkdownLite(quoteLine)); i++; } blocks.push({ type: "quote", lines: quoteLines, }); continue; } const paragraphLines: string[] = []; while ( i < lines.length && lines[i].trim() && !/^```\s*([^`]*)\s*$/.test(lines[i]) && !/^\s*>\s?/.test(lines[i]) ) { paragraphLines.push(lines[i]); i++; } if (paragraphLines.length === 0) { paragraphLines.push(lines[i]); i++; } blocks.push({ type: "paragraph", children: parseInlineMarkdownLite(paragraphLines.join("\n")), }); } return blocks; } /** * Inline parser: * - *bold* * - `code` * - [text](url) * * This is intentionally not a full Markdown parser. */ export function parseInlineMarkdownLite(source: string): InlineNode[] { const nodes: InlineNode[] = []; let buffer = ""; let i = 0; const flushText = () => { if (buffer) { nodes.push({ type: "text", value: buffer }); buffer = ""; } }; while (i < source.length) { const ch = source[i]; if (ch === "`") { const end = findNextUnescaped(source, "`", i + 1); if (end !== -1) { flushText(); nodes.push({ type: "code", value: source.slice(i + 1, end), }); i = end + 1; continue; } } if (ch === "[") { const labelEnd = findNextUnescaped(source, "]", i + 1); if (labelEnd !== -1 && source[labelEnd + 1] === "(") { const urlStart = labelEnd + 2; const urlEnd = findMarkdownLinkEnd(source, urlStart); if (urlEnd !== -1) { const text = source.slice(i + 1, labelEnd).trim(); const url = source.slice(urlStart, urlEnd).trim(); if (text && isSafeUrl(url)) { flushText(); nodes.push({ type: "link", text, url, }); i = urlEnd + 1; continue; } } } } if (ch === "*" && canStartBold(source, i)) { const end = findBoldEnd(source, i + 1); if (end !== -1 && canEndBold(source, end)) { const content = source.slice(i + 1, end); if (content.trim()) { flushText(); nodes.push({ type: "bold", children: parseInlineMarkdownLite(content), }); i = end + 1; continue; } } } buffer += ch; i++; } flushText(); return nodes; } /** * MarkdownV2 renderer */ export function renderMarkdownV2(blocks: BlockNode[]): string { return blocks .map(renderBlockMarkdownV2) .filter(Boolean) .join("\n\n") .trim(); } function renderBlockMarkdownV2(block: BlockNode): string { switch (block.type) { case "paragraph": return renderInlineMarkdownV2(block.children); case "pre": { const lang = block.lang ? block.lang : ""; const code = escapeMarkdownV2Code(block.value); if (lang) { return "```" + lang + "\n" + code + "\n```"; } return "```\n" + code + "\n```"; } case "quote": return block.lines .map((line) => ">" + renderInlineMarkdownV2(line)) .join("\n"); } } function renderInlineMarkdownV2(nodes: InlineNode[]): string { return nodes.map(renderInlineNodeMarkdownV2).join(""); } function renderInlineNodeMarkdownV2(node: InlineNode): string { switch (node.type) { case "text": return escapeMarkdownV2Text(node.value); case "bold": return "*" + renderInlineMarkdownV2(node.children) + "*"; case "code": return "`" + escapeMarkdownV2Code(node.value) + "`"; case "link": return `[${escapeMarkdownV2Text(node.text)}](${escapeMarkdownV2LinkUrl(node.url)})`; } } /** * Telegram MarkdownV2 escaping */ export function escapeMarkdownV2Text(value: string): string { return value .replace(/\\/g, "\\\\") .replace(/([_*\[\]()~`>#+\-=|{}.!])/g, "\\$1"); } export function escapeMarkdownV2Code(value: string): string { return value .replace(/\\/g, "\\\\") .replace(/`/g, "\\`"); } export function escapeMarkdownV2LinkUrl(value: string): string { return value .replace(/\\/g, "\\\\") .replace(/\)/g, "\\)"); } /** * Draft safety helpers */ function closeUnclosedFencedCodeBlock(input: string): string { if (!isInsideFencedCodeBlock(input)) { return input; } return input.endsWith("\n") ? input + "```" : input + "\n```"; } function isInsideFencedCodeBlock(input: string): boolean { const fenceMatches = [...input.matchAll(/^```/gm)]; return fenceMatches.length % 2 === 1; } function closeUnclosedInlineCode(input: string): string { let count = 0; let escaped = false; for (const ch of input) { if (escaped) { escaped = false; continue; } if (ch === "\\") { escaped = true; continue; } if (ch === "`") { count++; } } return count % 2 === 1 ? input + "`" : input; } function closeUnclosedBold(input: string): string { let count = 0; let escaped = false; for (let i = 0; i < input.length; i++) { const ch = input[i]; if (escaped) { escaped = false; continue; } if (ch === "\\") { escaped = true; continue; } if (ch !== "*") { continue; } if (isLikelyListMarker(input, i)) { continue; } count++; } return count % 2 === 1 ? input + "*" : input; } function isLikelyListMarker(input: string, index: number): boolean { const prev = input[index - 1]; const next = input[index + 1]; const isLineStart = index === 0 || prev === "\n"; return isLineStart && next === " "; } /** * Generic helpers */ function findNextUnescaped(source: string, target: string, from: number): number { for (let i = from; i < source.length; i++) { if (source[i] === "\\" && i + 1 < source.length) { i++; continue; } if (source[i] === target) { return i; } } return -1; } function findBoldEnd(source: string, from: number): number { for (let i = from; i < source.length; i++) { if (source[i] === "\\" && i + 1 < source.length) { i++; continue; } if (source[i] === "*") { return i; } } return -1; } function findMarkdownLinkEnd(source: string, from: number): number { let depth = 0; for (let i = from; i < source.length; i++) { const ch = source[i]; if (ch === "\\" && i + 1 < source.length) { i++; continue; } if (ch === "\n") { return -1; } if (ch === "(") { depth++; continue; } if (ch === ")") { if (depth === 0) { return i; } depth--; } } return -1; } function canStartBold(source: string, index: number): boolean { const prev = source[index - 1]; const next = source[index + 1]; if (!next || /\s/.test(next)) { return false; } if (prev && /\w/.test(prev) && /\w/.test(next)) { return false; } return true; } function canEndBold(source: string, index: number): boolean { const prev = source[index - 1]; const next = source[index + 1]; if (!prev || /\s/.test(prev)) { return false; } if (next && /\w/.test(prev) && /\w/.test(next)) { return false; } return true; } function sanitizeCodeLanguage(value: string | undefined): string | undefined { if (!value) return undefined; const lang = value.trim(); if (!lang) return undefined; // Telegram language hint after ``` can be used as a visual label too. // Keep it permissive, but reject dangerous/newline/weird marker chars. if (!/^[^\s`\\]{1,32}$/.test(lang)) { return undefined; } return lang; } function isSafeUrl(url: string): boolean { return /^(https?:\/\/|tg:\/\/|mailto:)/i.test(url); } /** * Applies transform only outside fenced code blocks. */ function transformOutsideFencedCode( input: string, transform: (text: string) => string, ): string { const fences: string[] = []; const fenceRegex = /```[^\n]*\n[\s\S]*?(?:\n```|$)/g; const protectedText = input.replace(fenceRegex, (match) => { const index = fences.push(match) - 1; return `\uE000FENCE_${index}\uE001`; }); const transformed = transform(protectedText); return transformed.replace(/\uE000FENCE_(\d+)\uE001/g, (_, index: string) => { return fences[Number(index)] ?? ""; }); } /** * Converts Markdown tables into simple list rows. * * Example: * | A | B | * |---|---| * | 1 | 2 | * * -> * - A: 1; B: 2 */ function normalizeMarkdownTables(input: string): string { const lines = input.split("\n"); const output: string[] = []; let i = 0; while (i < lines.length) { const current = lines[i]; const next = lines[i + 1]; if (next && isMarkdownTableSeparator(next) && current.includes("|")) { const headers = parseTableRow(current); const rows: string[][] = []; i += 2; while (i < lines.length && lines[i].includes("|") && lines[i].trim()) { rows.push(parseTableRow(lines[i])); i++; } if (rows.length === 0) { output.push(headers.join(" / ")); continue; } for (const row of rows) { const cells = row .map((cell, index) => { const header = headers[index]; if (!cell) return ""; if (!header) return cell; return `${header}: ${cell}`; }) .filter(Boolean); output.push(`- ${cells.join("; ")}`); } continue; } output.push(current); i++; } return output.join("\n"); } function isMarkdownTableSeparator(line: string): boolean { const cells = parseTableRow(line); return ( cells.length >= 2 && cells.every((cell) => /^:?-{3,}:?$/.test(cell.trim())) ); } function parseTableRow(line: string): string[] { return line .trim() .replace(/^\|/, "") .replace(/\|$/, "") .split("|") .map((cell) => cell.trim()); } /** * Optional helper for streaming/editing. * * You can adapt this to your own bot wrapper. */ export function shouldEditRenderedMessage(previous: string, next: string): boolean { return previous !== next && next.trim().length > 0; }