Merge reply-chain documents into AI requests

This commit is contained in:
2026-05-18 20:43:35 +03:00
parent df39d89ea8
commit 53e9798193
9 changed files with 269 additions and 28 deletions
+13
View File
@@ -32,6 +32,7 @@ export type ConversationTurn = {
content: string;
deletedByBotAt?: number | null;
attachments: ConversationAttachment[];
documentNames?: string[];
};
export type ConversationSnapshot = {
@@ -123,6 +124,13 @@ function attachmentSummary(attachments: ConversationAttachment[]): string {
return ["[attachments]:", ...lines].join("\n");
}
function namesSummary(kind: string, names: string[]): string {
const filtered = names.map(name => name.trim()).filter(Boolean);
if (!filtered.length) return "";
return [`[${kind}]:`, ...filtered.map(name => `- ${name}`)].join("\n");
}
function supportedAttachmentKinds(provider: AiProvider, bot: boolean): Set<AttachmentKind> {
if (bot) return new Set<AttachmentKind>();
@@ -160,6 +168,10 @@ function renderContentText(
parts.push("[message_state]: deleted_by_bot");
}
if (turn.documentNames?.length) {
parts.push(namesSummary("documents", turn.documentNames));
}
if (unsupported.length) {
parts.push(attachmentSummary(unsupported));
}
@@ -291,6 +303,7 @@ export async function buildConversationSnapshot(
content: part.content,
deletedByBotAt: part.deletedByBotAt,
attachments: buildConversationAttachments(part),
documentNames: part.documentNames,
}));
const imageCount = turns.reduce((sum, turn) => {
+39
View File
@@ -0,0 +1,39 @@
import type {AiDownloadedFile} from "./telegram-attachments.js";
function downloadKey(download: AiDownloadedFile): string {
return [
download.kind,
download.fileId,
download.sha256 ?? "",
download.fileName,
].join(":");
}
export function mergeReplyChainDownloads(
currentDownloads: readonly AiDownloadedFile[],
replyChainDownloads: readonly AiDownloadedFile[],
): AiDownloadedFile[] {
const result: AiDownloadedFile[] = [];
const seen = new Set<string>();
for (const download of [...currentDownloads, ...replyChainDownloads]) {
const key = downloadKey(download);
if (seen.has(key)) continue;
seen.add(key);
result.push(download);
}
return result;
}
export function shouldPreferCurrentDownloads(text: string, currentDownloads: readonly AiDownloadedFile[]): boolean {
if (!currentDownloads.length) return false;
const normalized = text.trim().toLowerCase();
if (!normalized) return false;
return normalized.includes("this file")
|| normalized.includes("this document")
|| normalized.includes("этот файл")
|| normalized.includes("этот документ");
}
+18 -10
View File
@@ -4,7 +4,8 @@ import {Environment} from "../common/environment";
import {UserRequestPipeline, type UserRequestPipelineState, type UserRequestPipelineStage} from "./user-request-pipeline";
import {PipelineFallbackNotifier} from "./user-request-pipeline/fallback-notifier";
import {buildToolRankFallbackTargetDetails} from "./user-request-pipeline/fallback-target-details";
import type {AiDownloadedFile} from "./telegram-attachments";
import {mergeReplyChainDownloads, shouldPreferCurrentDownloads} from "./reply-chain-downloads";
import {attachmentsToDownloadedFiles, type AiDownloadedFile} from "./telegram-attachments";
import type {TelegramStreamMessage} from "./telegram-stream-message";
import type {ChatMessage} from "./chat-messages-types";
import type {OpenAIChatMessage} from "./openai-chat-message";
@@ -23,6 +24,7 @@ import {
stripAudioFromRunnerMessages,
toolRuntimeContextFromDownloads,
transcribeAudioIfNeeded,
collectStoredReplyChainAttachments,
UnifiedRunOptions,
} from "./unified-ai-runner.shared";
import {aiLog} from "../logging/ai-logger";
@@ -92,6 +94,12 @@ export async function prepareUnifiedAiRequestPipeline(params: {
controller: AbortController;
}): Promise<PreparedUnifiedAiRequest> {
const {options, config, downloads, streamMessage, controller} = params;
const replyChainDownloads = shouldPreferCurrentDownloads(options.text, downloads)
? downloads
: mergeReplyChainDownloads(
downloads,
attachmentsToDownloadedFiles(await collectStoredReplyChainAttachments(options.msg)),
);
const prepared: MutablePreparedContext = {
chatMessages: [],
imageCount: 0,
@@ -111,7 +119,7 @@ export async function prepareUnifiedAiRequestPipeline(params: {
details: {
phase: "ai_request_prepare",
provider: options.provider,
downloads: downloads.map(download => ({
downloads: replyChainDownloads.map(download => ({
kind: download.kind,
fileName: download.fileName,
mimeType: download.mimeType,
@@ -128,15 +136,15 @@ export async function prepareUnifiedAiRequestPipeline(params: {
options.msg,
options.text,
options.provider,
downloads,
replyChainDownloads,
config,
runtimeTargetFor(options, config),
options.responseLanguage ?? DEFAULT_AI_RESPONSE_LANGUAGE,
);
prepared.chatMessages = collected.chatMessages as typeof prepared.chatMessages;
prepared.imageCount = collected.imageCount;
prepared.firstRoundStatus = initialStatus(downloads, prepared.imageCount);
prepared.toolContext = toolRuntimeContextFromDownloads(downloads);
prepared.firstRoundStatus = initialStatus(replyChainDownloads, prepared.imageCount);
prepared.toolContext = toolRuntimeContextFromDownloads(replyChainDownloads);
return {
stage: "collect_conversation_context",
@@ -171,11 +179,11 @@ export async function prepareUnifiedAiRequestPipeline(params: {
prepared.transcript = await transcribeAudioIfNeeded(
options.provider,
options.msg.from?.id,
downloads,
replyChainDownloads,
streamMessage,
controller.signal,
).catch(error => {
if (downloads.some(isTranscribableAudioDownload)) throw error;
if (replyChainDownloads.some(isTranscribableAudioDownload)) throw error;
return "";
});
@@ -190,7 +198,7 @@ export async function prepareUnifiedAiRequestPipeline(params: {
const transcriptArtifact = await persistTranscriptArtifactAttachment({
provider: options.provider,
transcript,
downloads,
downloads: replyChainDownloads,
chatId: options.msg.chat.id,
messageId: options.msg.message_id,
});
@@ -235,7 +243,7 @@ export async function prepareUnifiedAiRequestPipeline(params: {
prepared.preparedDocumentRag = await prepareDocumentRag(
options.provider,
downloads,
replyChainDownloads,
prepared.chatMessages,
streamMessage,
config,
@@ -246,7 +254,7 @@ export async function prepareUnifiedAiRequestPipeline(params: {
const ragArtifact = await persistRagArtifactAttachment({
provider: options.provider,
prepared: prepared.preparedDocumentRag,
downloads,
downloads: replyChainDownloads,
chatId: options.msg.chat.id,
messageId: options.msg.message_id,
details: prepared.preparedDocumentRag?.provider === AiProvider.OPENAI
+3 -3
View File
@@ -34,7 +34,7 @@ import {aiLog, aiLogDuration, aiLogProviderTarget, aiLogToolCall} from "../loggi
import {buildConversationSnapshot, serializeConversationSnapshot} from "./conversation-pipeline.js";
import type {ResponseInputMessageContentList} from "openai/resources/responses/responses";
import {persistToolResultArtifactAttachment} from "./tool-result-artifact-store.js";
import {filterUserVisibleStoredAttachments} from "../common/attachment-visibility.js";
import {filterUserInputStoredAttachments} from "../common/attachment-visibility.js";
export type {Message} from "typescript-telegram-bot-api";
export type {AiRuntimeTarget} from "./ai-runtime-target";
@@ -515,13 +515,13 @@ export function addMessageAttachmentKinds(msg: Message | undefined, kinds: Set<A
if (msg.video) kinds.add("video");
}
export async function collectStoredReplyChainAttachments(msg: Message, limit: number = 1): Promise<StoredAttachment[]> {
export async function collectStoredReplyChainAttachments(msg: Message, limit: number = 40): Promise<StoredAttachment[]> {
const attachments: StoredAttachment[] = [];
const seen = new Set<string>();
let current = await MessageStore.get(msg.chat.id, msg.message_id);
for (let i = 0; current && i < limit; i++) {
for (const attachment of filterUserVisibleStoredAttachments(current?.attachments ?? [])) {
for (const attachment of filterUserInputStoredAttachments(current?.attachments ?? [])) {
const key = [
attachment.kind,
attachment.fileUniqueId || attachment.fileId,
+4
View File
@@ -3,3 +3,7 @@ import type {StoredAttachment} from "../model/stored-attachment";
export function filterUserVisibleStoredAttachments(attachments: StoredAttachment[]): StoredAttachment[] {
return attachments.filter(attachment => attachment.scope !== "internal_artifact");
}
export function filterUserInputStoredAttachments(attachments: StoredAttachment[]): StoredAttachment[] {
return attachments.filter(attachment => attachment.scope === "user_input" || attachment.scope === undefined);
}
+3
View File
@@ -20,6 +20,9 @@ export type MessagePart = {
audios?: string[];
audioParts?: MessageAudioPart[];
documents?: string[];
documentNames?: string[];
videos?: string[];
videoNotes?: string[];
videoNames?: string[];
videoNoteNames?: string[];
}
+48 -5
View File
@@ -27,6 +27,7 @@ import {UserStore} from "../common/user-store.js";
import fs from "node:fs";
import path from "node:path";
import {MessageStore} from "../common/message-store.js";
import {filterUserInputStoredAttachments} from "../common/attachment-visibility.js";
import {SystemInfo} from "../commands/system-info.js";
import {PrefixResponse} from "../commands/prefix-response.js";
import {ChatCommand} from "../base/chat-command.js";
@@ -1487,12 +1488,13 @@ export async function collectReplyChainText(options: ReplyChainOptions): Promise
const cleanText = cutPrefix ? cutPrefixes(rawText) : rawText;
const imageNames = await loadImagesIfExists(msg);
const messageDownloads = includeDownloads ? downloads : [];
const storedImageAttachments = isStoredMessage(msg)
? (msg.attachments ?? []).filter(attachment => attachment.kind === "image" && fs.existsSync(attachment.cachePath))
const storedAttachments = isStoredMessage(msg)
? filterUserInputStoredAttachments(msg.attachments ?? []).filter(attachment => fs.existsSync(attachment.cachePath))
: [];
const storedImageAttachments = storedAttachments.filter(attachment => attachment.kind === "image");
if (!cleanText && !quoteText && textRequired) return;
if (!cleanText && !quoteText && !imageNames?.length && !storedImageAttachments.length && !messageDownloads.length) return;
if (!cleanText && !quoteText && !imageNames?.length && !storedAttachments.length && !messageDownloads.length) return;
const fromId = isStoredMessage(msg) ? msg.fromId : msg.from?.id;
const user = await UserStore.get(isStoredMessage(msg) ? msg.fromId : msg.from?.id ?? -1);
@@ -1527,11 +1529,19 @@ export async function collectReplyChainText(options: ReplyChainOptions): Promise
});
const imageParts = [...photoImageParts, ...cachedImageParts];
const storedDocumentAttachments = storedAttachments.filter(attachment => attachment.kind === "document");
const storedVideoAttachments = storedAttachments.filter(attachment => attachment.kind === "video");
const storedVideoNoteAttachments = storedAttachments.filter(attachment => attachment.kind === "video-note");
const storedAudioAttachments = storedAttachments.filter(attachment => attachment.kind === "audio");
const audios: string[] = [];
const audioParts: MessageAudioPart[] = [];
const documents: string[] = [];
const documentNames: string[] = [];
const videos: string[] = [];
const videoNames: string[] = [];
const videoNotes: string[] = [];
const videoNoteNames: string[] = [];
if (messageDownloads.length) {
messageDownloads
@@ -1544,21 +1554,51 @@ export async function collectReplyChainText(options: ReplyChainOptions): Promise
messageDownloads
.filter(d => d.kind === "document")
.forEach(d => documents.push(d.buffer.toString("base64")));
.forEach(d => {
documents.push(d.buffer.toString("base64"));
documentNames.push(d.fileName);
});
messageDownloads
.filter(d => d.kind === "video")
.forEach(v => videos.push(v.buffer.toString("base64")));
.forEach(v => {
videos.push(v.buffer.toString("base64"));
videoNames.push(v.fileName);
});
messageDownloads
.filter(d => d.kind === "video-note")
.forEach(v => {
const data = v.buffer.toString("base64");
videoNotes.push(data);
videoNoteNames.push(v.fileName);
audioParts.push({data, mimeType: mimeTypeFromAudioDownload(v)});
});
}
storedAudioAttachments.forEach(attachment => {
const data = Buffer.from(fs.readFileSync(attachment.cachePath)).toString("base64");
audios.push(data);
audioParts.push({data, mimeType: attachment.mimeType || "audio/ogg"});
});
storedDocumentAttachments.forEach(attachment => {
documents.push(Buffer.from(fs.readFileSync(attachment.cachePath)).toString("base64"));
documentNames.push(attachment.fileName);
});
storedVideoAttachments.forEach(attachment => {
videos.push(Buffer.from(fs.readFileSync(attachment.cachePath)).toString("base64"));
videoNames.push(attachment.fileName);
});
storedVideoNoteAttachments.forEach(attachment => {
const data = Buffer.from(fs.readFileSync(attachment.cachePath)).toString("base64");
videoNotes.push(data);
videoNoteNames.push(attachment.fileName);
audioParts.push({data, mimeType: attachment.mimeType || "video/mp4"});
});
const content = [
quoteText ? `[citation]:\n${quoteText}\n\n[message]:\n` : "",
cleanText ?? ""
@@ -1576,8 +1616,11 @@ export async function collectReplyChainText(options: ReplyChainOptions): Promise
audios: audios.length ? audios : undefined,
audioParts: audioParts.length ? audioParts : undefined,
documents: documents.length ? documents : undefined,
documentNames: documentNames.length ? documentNames : undefined,
videos: videos.length ? videos : undefined,
videoNames: videoNames.length ? videoNames : undefined,
videoNotes: videoNotes.length ? videoNotes : undefined,
videoNoteNames: videoNoteNames.length ? videoNoteNames : undefined,
});
}
};