Merge reply-chain documents into AI requests

This commit is contained in:
2026-05-18 20:43:35 +03:00
parent df39d89ea8
commit 53e9798193
9 changed files with 269 additions and 28 deletions
+10 -10
View File
@@ -106,16 +106,16 @@
## 8. Улучшить поведение reply-chain с документами ## 8. Улучшить поведение reply-chain с документами
- [ ] Явно описать стратегию merge: current user attachments + reply-chain user attachments. - [x] Явно описать стратегию merge: current user attachments + reply-chain user attachments.
- [ ] Исключать `scope: internal_artifact` всегда. - [x] Исключать `scope: internal_artifact` всегда.
- [ ] Исключать `scope: bot_output`, если это не user-provided file. - [x] Исключать `scope: bot_output`, если это не user-provided file.
- [ ] Если пользователь отвечает новым документом на ответ бота с предыдущим документом, использовать оба документа. - [x] Если пользователь отвечает новым документом на ответ бота с предыдущим документом, использовать оба документа.
- [ ] Если пользователь отвечает текстом на ответ бота, использовать документы из reply-chain. - [x] Если пользователь отвечает текстом на ответ бота, использовать документы из reply-chain.
- [ ] Если пользователь явно говорит "этот файл", приоритет отдавать новому вложению. - [x] Если пользователь явно говорит "этот файл", приоритет отдавать новому вложению.
- [ ] Если несколько документов, добавлять их имена в prompt/RAG context. - [x] Если несколько документов, добавлять их имена в prompt/RAG context.
- [ ] Добавить tests на follow-up с новым документом. - [x] Добавить tests на follow-up с новым документом.
- [ ] Добавить tests на follow-up без нового документа. - [x] Добавить tests на follow-up без нового документа.
- [ ] Добавить tests на то, что RAG internal JSON не становится пользовательским документом. - [x] Добавить tests на то, что RAG internal JSON не становится пользовательским документом.
## 9. Интеграционные tests без реальных Telegram/AI API ## 9. Интеграционные tests без реальных Telegram/AI API
+13
View File
@@ -32,6 +32,7 @@ export type ConversationTurn = {
content: string; content: string;
deletedByBotAt?: number | null; deletedByBotAt?: number | null;
attachments: ConversationAttachment[]; attachments: ConversationAttachment[];
documentNames?: string[];
}; };
export type ConversationSnapshot = { export type ConversationSnapshot = {
@@ -123,6 +124,13 @@ function attachmentSummary(attachments: ConversationAttachment[]): string {
return ["[attachments]:", ...lines].join("\n"); return ["[attachments]:", ...lines].join("\n");
} }
function namesSummary(kind: string, names: string[]): string {
const filtered = names.map(name => name.trim()).filter(Boolean);
if (!filtered.length) return "";
return [`[${kind}]:`, ...filtered.map(name => `- ${name}`)].join("\n");
}
function supportedAttachmentKinds(provider: AiProvider, bot: boolean): Set<AttachmentKind> { function supportedAttachmentKinds(provider: AiProvider, bot: boolean): Set<AttachmentKind> {
if (bot) return new Set<AttachmentKind>(); if (bot) return new Set<AttachmentKind>();
@@ -160,6 +168,10 @@ function renderContentText(
parts.push("[message_state]: deleted_by_bot"); parts.push("[message_state]: deleted_by_bot");
} }
if (turn.documentNames?.length) {
parts.push(namesSummary("documents", turn.documentNames));
}
if (unsupported.length) { if (unsupported.length) {
parts.push(attachmentSummary(unsupported)); parts.push(attachmentSummary(unsupported));
} }
@@ -291,6 +303,7 @@ export async function buildConversationSnapshot(
content: part.content, content: part.content,
deletedByBotAt: part.deletedByBotAt, deletedByBotAt: part.deletedByBotAt,
attachments: buildConversationAttachments(part), attachments: buildConversationAttachments(part),
documentNames: part.documentNames,
})); }));
const imageCount = turns.reduce((sum, turn) => { const imageCount = turns.reduce((sum, turn) => {
+39
View File
@@ -0,0 +1,39 @@
import type {AiDownloadedFile} from "./telegram-attachments.js";
function downloadKey(download: AiDownloadedFile): string {
return [
download.kind,
download.fileId,
download.sha256 ?? "",
download.fileName,
].join(":");
}
export function mergeReplyChainDownloads(
currentDownloads: readonly AiDownloadedFile[],
replyChainDownloads: readonly AiDownloadedFile[],
): AiDownloadedFile[] {
const result: AiDownloadedFile[] = [];
const seen = new Set<string>();
for (const download of [...currentDownloads, ...replyChainDownloads]) {
const key = downloadKey(download);
if (seen.has(key)) continue;
seen.add(key);
result.push(download);
}
return result;
}
export function shouldPreferCurrentDownloads(text: string, currentDownloads: readonly AiDownloadedFile[]): boolean {
if (!currentDownloads.length) return false;
const normalized = text.trim().toLowerCase();
if (!normalized) return false;
return normalized.includes("this file")
|| normalized.includes("this document")
|| normalized.includes("этот файл")
|| normalized.includes("этот документ");
}
+18 -10
View File
@@ -4,7 +4,8 @@ import {Environment} from "../common/environment";
import {UserRequestPipeline, type UserRequestPipelineState, type UserRequestPipelineStage} from "./user-request-pipeline"; import {UserRequestPipeline, type UserRequestPipelineState, type UserRequestPipelineStage} from "./user-request-pipeline";
import {PipelineFallbackNotifier} from "./user-request-pipeline/fallback-notifier"; import {PipelineFallbackNotifier} from "./user-request-pipeline/fallback-notifier";
import {buildToolRankFallbackTargetDetails} from "./user-request-pipeline/fallback-target-details"; import {buildToolRankFallbackTargetDetails} from "./user-request-pipeline/fallback-target-details";
import type {AiDownloadedFile} from "./telegram-attachments"; import {mergeReplyChainDownloads, shouldPreferCurrentDownloads} from "./reply-chain-downloads";
import {attachmentsToDownloadedFiles, type AiDownloadedFile} from "./telegram-attachments";
import type {TelegramStreamMessage} from "./telegram-stream-message"; import type {TelegramStreamMessage} from "./telegram-stream-message";
import type {ChatMessage} from "./chat-messages-types"; import type {ChatMessage} from "./chat-messages-types";
import type {OpenAIChatMessage} from "./openai-chat-message"; import type {OpenAIChatMessage} from "./openai-chat-message";
@@ -23,6 +24,7 @@ import {
stripAudioFromRunnerMessages, stripAudioFromRunnerMessages,
toolRuntimeContextFromDownloads, toolRuntimeContextFromDownloads,
transcribeAudioIfNeeded, transcribeAudioIfNeeded,
collectStoredReplyChainAttachments,
UnifiedRunOptions, UnifiedRunOptions,
} from "./unified-ai-runner.shared"; } from "./unified-ai-runner.shared";
import {aiLog} from "../logging/ai-logger"; import {aiLog} from "../logging/ai-logger";
@@ -92,6 +94,12 @@ export async function prepareUnifiedAiRequestPipeline(params: {
controller: AbortController; controller: AbortController;
}): Promise<PreparedUnifiedAiRequest> { }): Promise<PreparedUnifiedAiRequest> {
const {options, config, downloads, streamMessage, controller} = params; const {options, config, downloads, streamMessage, controller} = params;
const replyChainDownloads = shouldPreferCurrentDownloads(options.text, downloads)
? downloads
: mergeReplyChainDownloads(
downloads,
attachmentsToDownloadedFiles(await collectStoredReplyChainAttachments(options.msg)),
);
const prepared: MutablePreparedContext = { const prepared: MutablePreparedContext = {
chatMessages: [], chatMessages: [],
imageCount: 0, imageCount: 0,
@@ -111,7 +119,7 @@ export async function prepareUnifiedAiRequestPipeline(params: {
details: { details: {
phase: "ai_request_prepare", phase: "ai_request_prepare",
provider: options.provider, provider: options.provider,
downloads: downloads.map(download => ({ downloads: replyChainDownloads.map(download => ({
kind: download.kind, kind: download.kind,
fileName: download.fileName, fileName: download.fileName,
mimeType: download.mimeType, mimeType: download.mimeType,
@@ -128,15 +136,15 @@ export async function prepareUnifiedAiRequestPipeline(params: {
options.msg, options.msg,
options.text, options.text,
options.provider, options.provider,
downloads, replyChainDownloads,
config, config,
runtimeTargetFor(options, config), runtimeTargetFor(options, config),
options.responseLanguage ?? DEFAULT_AI_RESPONSE_LANGUAGE, options.responseLanguage ?? DEFAULT_AI_RESPONSE_LANGUAGE,
); );
prepared.chatMessages = collected.chatMessages as typeof prepared.chatMessages; prepared.chatMessages = collected.chatMessages as typeof prepared.chatMessages;
prepared.imageCount = collected.imageCount; prepared.imageCount = collected.imageCount;
prepared.firstRoundStatus = initialStatus(downloads, prepared.imageCount); prepared.firstRoundStatus = initialStatus(replyChainDownloads, prepared.imageCount);
prepared.toolContext = toolRuntimeContextFromDownloads(downloads); prepared.toolContext = toolRuntimeContextFromDownloads(replyChainDownloads);
return { return {
stage: "collect_conversation_context", stage: "collect_conversation_context",
@@ -171,11 +179,11 @@ export async function prepareUnifiedAiRequestPipeline(params: {
prepared.transcript = await transcribeAudioIfNeeded( prepared.transcript = await transcribeAudioIfNeeded(
options.provider, options.provider,
options.msg.from?.id, options.msg.from?.id,
downloads, replyChainDownloads,
streamMessage, streamMessage,
controller.signal, controller.signal,
).catch(error => { ).catch(error => {
if (downloads.some(isTranscribableAudioDownload)) throw error; if (replyChainDownloads.some(isTranscribableAudioDownload)) throw error;
return ""; return "";
}); });
@@ -190,7 +198,7 @@ export async function prepareUnifiedAiRequestPipeline(params: {
const transcriptArtifact = await persistTranscriptArtifactAttachment({ const transcriptArtifact = await persistTranscriptArtifactAttachment({
provider: options.provider, provider: options.provider,
transcript, transcript,
downloads, downloads: replyChainDownloads,
chatId: options.msg.chat.id, chatId: options.msg.chat.id,
messageId: options.msg.message_id, messageId: options.msg.message_id,
}); });
@@ -235,7 +243,7 @@ export async function prepareUnifiedAiRequestPipeline(params: {
prepared.preparedDocumentRag = await prepareDocumentRag( prepared.preparedDocumentRag = await prepareDocumentRag(
options.provider, options.provider,
downloads, replyChainDownloads,
prepared.chatMessages, prepared.chatMessages,
streamMessage, streamMessage,
config, config,
@@ -246,7 +254,7 @@ export async function prepareUnifiedAiRequestPipeline(params: {
const ragArtifact = await persistRagArtifactAttachment({ const ragArtifact = await persistRagArtifactAttachment({
provider: options.provider, provider: options.provider,
prepared: prepared.preparedDocumentRag, prepared: prepared.preparedDocumentRag,
downloads, downloads: replyChainDownloads,
chatId: options.msg.chat.id, chatId: options.msg.chat.id,
messageId: options.msg.message_id, messageId: options.msg.message_id,
details: prepared.preparedDocumentRag?.provider === AiProvider.OPENAI details: prepared.preparedDocumentRag?.provider === AiProvider.OPENAI
+3 -3
View File
@@ -34,7 +34,7 @@ import {aiLog, aiLogDuration, aiLogProviderTarget, aiLogToolCall} from "../loggi
import {buildConversationSnapshot, serializeConversationSnapshot} from "./conversation-pipeline.js"; import {buildConversationSnapshot, serializeConversationSnapshot} from "./conversation-pipeline.js";
import type {ResponseInputMessageContentList} from "openai/resources/responses/responses"; import type {ResponseInputMessageContentList} from "openai/resources/responses/responses";
import {persistToolResultArtifactAttachment} from "./tool-result-artifact-store.js"; import {persistToolResultArtifactAttachment} from "./tool-result-artifact-store.js";
import {filterUserVisibleStoredAttachments} from "../common/attachment-visibility.js"; import {filterUserInputStoredAttachments} from "../common/attachment-visibility.js";
export type {Message} from "typescript-telegram-bot-api"; export type {Message} from "typescript-telegram-bot-api";
export type {AiRuntimeTarget} from "./ai-runtime-target"; export type {AiRuntimeTarget} from "./ai-runtime-target";
@@ -515,13 +515,13 @@ export function addMessageAttachmentKinds(msg: Message | undefined, kinds: Set<A
if (msg.video) kinds.add("video"); if (msg.video) kinds.add("video");
} }
export async function collectStoredReplyChainAttachments(msg: Message, limit: number = 1): Promise<StoredAttachment[]> { export async function collectStoredReplyChainAttachments(msg: Message, limit: number = 40): Promise<StoredAttachment[]> {
const attachments: StoredAttachment[] = []; const attachments: StoredAttachment[] = [];
const seen = new Set<string>(); const seen = new Set<string>();
let current = await MessageStore.get(msg.chat.id, msg.message_id); let current = await MessageStore.get(msg.chat.id, msg.message_id);
for (let i = 0; current && i < limit; i++) { for (let i = 0; current && i < limit; i++) {
for (const attachment of filterUserVisibleStoredAttachments(current?.attachments ?? [])) { for (const attachment of filterUserInputStoredAttachments(current?.attachments ?? [])) {
const key = [ const key = [
attachment.kind, attachment.kind,
attachment.fileUniqueId || attachment.fileId, attachment.fileUniqueId || attachment.fileId,
+4
View File
@@ -3,3 +3,7 @@ import type {StoredAttachment} from "../model/stored-attachment";
export function filterUserVisibleStoredAttachments(attachments: StoredAttachment[]): StoredAttachment[] { export function filterUserVisibleStoredAttachments(attachments: StoredAttachment[]): StoredAttachment[] {
return attachments.filter(attachment => attachment.scope !== "internal_artifact"); return attachments.filter(attachment => attachment.scope !== "internal_artifact");
} }
export function filterUserInputStoredAttachments(attachments: StoredAttachment[]): StoredAttachment[] {
return attachments.filter(attachment => attachment.scope === "user_input" || attachment.scope === undefined);
}
+3
View File
@@ -20,6 +20,9 @@ export type MessagePart = {
audios?: string[]; audios?: string[];
audioParts?: MessageAudioPart[]; audioParts?: MessageAudioPart[];
documents?: string[]; documents?: string[];
documentNames?: string[];
videos?: string[]; videos?: string[];
videoNotes?: string[]; videoNotes?: string[];
videoNames?: string[];
videoNoteNames?: string[];
} }
+48 -5
View File
@@ -27,6 +27,7 @@ import {UserStore} from "../common/user-store.js";
import fs from "node:fs"; import fs from "node:fs";
import path from "node:path"; import path from "node:path";
import {MessageStore} from "../common/message-store.js"; import {MessageStore} from "../common/message-store.js";
import {filterUserInputStoredAttachments} from "../common/attachment-visibility.js";
import {SystemInfo} from "../commands/system-info.js"; import {SystemInfo} from "../commands/system-info.js";
import {PrefixResponse} from "../commands/prefix-response.js"; import {PrefixResponse} from "../commands/prefix-response.js";
import {ChatCommand} from "../base/chat-command.js"; import {ChatCommand} from "../base/chat-command.js";
@@ -1487,12 +1488,13 @@ export async function collectReplyChainText(options: ReplyChainOptions): Promise
const cleanText = cutPrefix ? cutPrefixes(rawText) : rawText; const cleanText = cutPrefix ? cutPrefixes(rawText) : rawText;
const imageNames = await loadImagesIfExists(msg); const imageNames = await loadImagesIfExists(msg);
const messageDownloads = includeDownloads ? downloads : []; const messageDownloads = includeDownloads ? downloads : [];
const storedImageAttachments = isStoredMessage(msg) const storedAttachments = isStoredMessage(msg)
? (msg.attachments ?? []).filter(attachment => attachment.kind === "image" && fs.existsSync(attachment.cachePath)) ? filterUserInputStoredAttachments(msg.attachments ?? []).filter(attachment => fs.existsSync(attachment.cachePath))
: []; : [];
const storedImageAttachments = storedAttachments.filter(attachment => attachment.kind === "image");
if (!cleanText && !quoteText && textRequired) return; if (!cleanText && !quoteText && textRequired) return;
if (!cleanText && !quoteText && !imageNames?.length && !storedImageAttachments.length && !messageDownloads.length) return; if (!cleanText && !quoteText && !imageNames?.length && !storedAttachments.length && !messageDownloads.length) return;
const fromId = isStoredMessage(msg) ? msg.fromId : msg.from?.id; const fromId = isStoredMessage(msg) ? msg.fromId : msg.from?.id;
const user = await UserStore.get(isStoredMessage(msg) ? msg.fromId : msg.from?.id ?? -1); const user = await UserStore.get(isStoredMessage(msg) ? msg.fromId : msg.from?.id ?? -1);
@@ -1527,11 +1529,19 @@ export async function collectReplyChainText(options: ReplyChainOptions): Promise
}); });
const imageParts = [...photoImageParts, ...cachedImageParts]; const imageParts = [...photoImageParts, ...cachedImageParts];
const storedDocumentAttachments = storedAttachments.filter(attachment => attachment.kind === "document");
const storedVideoAttachments = storedAttachments.filter(attachment => attachment.kind === "video");
const storedVideoNoteAttachments = storedAttachments.filter(attachment => attachment.kind === "video-note");
const storedAudioAttachments = storedAttachments.filter(attachment => attachment.kind === "audio");
const audios: string[] = []; const audios: string[] = [];
const audioParts: MessageAudioPart[] = []; const audioParts: MessageAudioPart[] = [];
const documents: string[] = []; const documents: string[] = [];
const documentNames: string[] = [];
const videos: string[] = []; const videos: string[] = [];
const videoNames: string[] = [];
const videoNotes: string[] = []; const videoNotes: string[] = [];
const videoNoteNames: string[] = [];
if (messageDownloads.length) { if (messageDownloads.length) {
messageDownloads messageDownloads
@@ -1544,21 +1554,51 @@ export async function collectReplyChainText(options: ReplyChainOptions): Promise
messageDownloads messageDownloads
.filter(d => d.kind === "document") .filter(d => d.kind === "document")
.forEach(d => documents.push(d.buffer.toString("base64"))); .forEach(d => {
documents.push(d.buffer.toString("base64"));
documentNames.push(d.fileName);
});
messageDownloads messageDownloads
.filter(d => d.kind === "video") .filter(d => d.kind === "video")
.forEach(v => videos.push(v.buffer.toString("base64"))); .forEach(v => {
videos.push(v.buffer.toString("base64"));
videoNames.push(v.fileName);
});
messageDownloads messageDownloads
.filter(d => d.kind === "video-note") .filter(d => d.kind === "video-note")
.forEach(v => { .forEach(v => {
const data = v.buffer.toString("base64"); const data = v.buffer.toString("base64");
videoNotes.push(data); videoNotes.push(data);
videoNoteNames.push(v.fileName);
audioParts.push({data, mimeType: mimeTypeFromAudioDownload(v)}); audioParts.push({data, mimeType: mimeTypeFromAudioDownload(v)});
}); });
} }
storedAudioAttachments.forEach(attachment => {
const data = Buffer.from(fs.readFileSync(attachment.cachePath)).toString("base64");
audios.push(data);
audioParts.push({data, mimeType: attachment.mimeType || "audio/ogg"});
});
storedDocumentAttachments.forEach(attachment => {
documents.push(Buffer.from(fs.readFileSync(attachment.cachePath)).toString("base64"));
documentNames.push(attachment.fileName);
});
storedVideoAttachments.forEach(attachment => {
videos.push(Buffer.from(fs.readFileSync(attachment.cachePath)).toString("base64"));
videoNames.push(attachment.fileName);
});
storedVideoNoteAttachments.forEach(attachment => {
const data = Buffer.from(fs.readFileSync(attachment.cachePath)).toString("base64");
videoNotes.push(data);
videoNoteNames.push(attachment.fileName);
audioParts.push({data, mimeType: attachment.mimeType || "video/mp4"});
});
const content = [ const content = [
quoteText ? `[citation]:\n${quoteText}\n\n[message]:\n` : "", quoteText ? `[citation]:\n${quoteText}\n\n[message]:\n` : "",
cleanText ?? "" cleanText ?? ""
@@ -1576,8 +1616,11 @@ export async function collectReplyChainText(options: ReplyChainOptions): Promise
audios: audios.length ? audios : undefined, audios: audios.length ? audios : undefined,
audioParts: audioParts.length ? audioParts : undefined, audioParts: audioParts.length ? audioParts : undefined,
documents: documents.length ? documents : undefined, documents: documents.length ? documents : undefined,
documentNames: documentNames.length ? documentNames : undefined,
videos: videos.length ? videos : undefined, videos: videos.length ? videos : undefined,
videoNames: videoNames.length ? videoNames : undefined,
videoNotes: videoNotes.length ? videoNotes : undefined, videoNotes: videoNotes.length ? videoNotes : undefined,
videoNoteNames: videoNoteNames.length ? videoNoteNames : undefined,
}); });
} }
}; };
+131
View File
@@ -0,0 +1,131 @@
import test from "node:test";
import assert from "node:assert/strict";
const {filterUserInputStoredAttachments} = await import("../dist/common/attachment-visibility.js");
const {mergeReplyChainDownloads, shouldPreferCurrentDownloads} = await import("../dist/ai/reply-chain-downloads.js");
test("reply chain attachment visibility keeps only user input attachments", () => {
const attachments = filterUserInputStoredAttachments([
{
kind: "document",
fileId: "user-doc",
fileName: "user.txt",
cachePath: "/tmp/user.txt",
scope: "user_input",
},
{
kind: "document",
fileId: "bot-doc",
fileName: "bot.txt",
cachePath: "/tmp/bot.txt",
scope: "bot_output",
},
{
kind: "document",
fileId: "internal-doc",
fileName: "internal.json",
cachePath: "/tmp/internal.json",
scope: "internal_artifact",
},
]);
assert.equal(attachments.length, 1);
assert.equal(attachments[0].fileId, "user-doc");
});
test("reply chain downloads keep current input first and deduplicate chain copies", () => {
const merged = mergeReplyChainDownloads(
[
{
kind: "document",
fileId: "new-doc",
fileName: "new.txt",
buffer: Buffer.from("new"),
path: "/tmp/new.txt",
},
{
kind: "document",
fileId: "shared-doc",
fileName: "shared.txt",
buffer: Buffer.from("current"),
path: "/tmp/current-shared.txt",
},
],
[
{
kind: "document",
fileId: "shared-doc",
fileName: "shared.txt",
buffer: Buffer.from("reply-chain"),
path: "/tmp/reply-shared.txt",
},
{
kind: "document",
fileId: "old-doc",
fileName: "old.txt",
buffer: Buffer.from("old"),
path: "/tmp/old.txt",
},
],
);
assert.equal(merged.length, 3);
assert.equal(merged[0].fileId, "new-doc");
assert.equal(merged[1].fileId, "shared-doc");
assert.equal(merged[2].fileId, "old-doc");
});
test("reply chain downloads are used when there is no new document", () => {
const merged = mergeReplyChainDownloads([], [
{
kind: "document",
fileId: "reply-doc",
fileName: "reply.txt",
buffer: Buffer.from("reply"),
path: "/tmp/reply.txt",
},
]);
assert.equal(merged.length, 1);
assert.equal(merged[0].fileId, "reply-doc");
});
test("reply chain prefers current downloads when user points to this file", () => {
assert.equal(
shouldPreferCurrentDownloads("Please answer about this file", [{
kind: "document",
fileId: "new-doc",
fileName: "new.txt",
buffer: Buffer.from("new"),
path: "/tmp/new.txt",
}]),
true,
);
assert.equal(
shouldPreferCurrentDownloads("ответь по этому файлу", [{
kind: "document",
fileId: "new-doc",
fileName: "new.txt",
buffer: Buffer.from("new"),
path: "/tmp/new.txt",
}]),
false,
);
assert.equal(
shouldPreferCurrentDownloads("ответь на этот файл", [{
kind: "document",
fileId: "new-doc",
fileName: "new.txt",
buffer: Buffer.from("new"),
path: "/tmp/new.txt",
}]),
true,
);
assert.equal(
shouldPreferCurrentDownloads("this file", []),
false,
);
});