Add stale RAG provider cleanup

This commit is contained in:
2026-05-18 21:27:41 +03:00
parent 75253534d8
commit 7b2bc93bc1
5 changed files with 248 additions and 1 deletions
+75
View File
@@ -0,0 +1,75 @@
import type {RagArtifactPayload} from "./rag-artifact-payload";
export type ArtifactLike = {
id: string;
createdAt: string;
payload: string;
};
export type RagCleanupTarget = {
artifactId: string;
createdAt: string;
provider: RagArtifactPayload["providerState"]["provider"];
vectorStoreIds?: string[];
uploadedFileIds?: string[];
libraryId?: string;
};
export type RagCleanupPlan = {
cutoffAt: string;
targets: RagCleanupTarget[];
};
function parseRagArtifactPayload(payload: string): RagArtifactPayload | null {
try {
const parsed = JSON.parse(payload) as Partial<RagArtifactPayload>;
if (!parsed || parsed.artifactKind !== "rag" || !parsed.providerState) return null;
return parsed as RagArtifactPayload;
} catch {
return null;
}
}
export function buildStaleRagCleanupPlan(
artifacts: ArtifactLike[],
retentionDays = 14,
now = new Date(),
): RagCleanupPlan {
const cutoffAt = new Date(now.getTime() - retentionDays * 24 * 60 * 60 * 1000).toISOString();
const targets: RagCleanupTarget[] = [];
for (const artifact of artifacts) {
if (artifact.createdAt > cutoffAt) continue;
const payload = parseRagArtifactPayload(artifact.payload);
if (!payload || payload.artifactKind !== "rag") continue;
switch (payload.providerState.provider) {
case "OPENAI":
if (payload.providerState.vectorStoreIds.length || payload.providerState.uploadedFileIds.length) {
targets.push({
artifactId: artifact.id,
createdAt: artifact.createdAt,
provider: payload.providerState.provider,
vectorStoreIds: [...payload.providerState.vectorStoreIds],
uploadedFileIds: [...payload.providerState.uploadedFileIds],
});
}
break;
case "MISTRAL":
if (payload.providerState.libraryId) {
targets.push({
artifactId: artifact.id,
createdAt: artifact.createdAt,
provider: payload.providerState.provider,
libraryId: payload.providerState.libraryId,
});
}
break;
case "OLLAMA":
break;
}
}
return {cutoffAt, targets};
}
+117
View File
@@ -0,0 +1,117 @@
import {appLogger} from "../logging/logger.js";
import {DatabaseManager} from "../db/database-manager.js";
import {AiProvider} from "../model/ai-provider.js";
import {createOpenAiClient, resolveAiRuntimeTarget} from "./ai-runtime-target.js";
import {deleteMistralLibrary} from "./unified-ai-runner.shared.js";
import {buildStaleRagCleanupPlan} from "./rag-retention-planner.js";
const logger = appLogger.child("rag-retention");
function unique(values: string[]): string[] {
return [...new Set(values.filter(Boolean))];
}
async function cleanupOpenAiRag(vectorStoreIds: string[], uploadedFileIds: string[]): Promise<void> {
const target = resolveAiRuntimeTarget(AiProvider.OPENAI, "documents");
const client = createOpenAiClient(target);
for (const vectorStoreId of unique(vectorStoreIds)) {
const startedAt = Date.now();
logger.info("openai.vector_store.cleanup.start", {vectorStoreId});
try {
await client.vectorStores.delete(vectorStoreId);
logger.success("openai.vector_store.cleanup.done", {vectorStoreId, duration: `${Date.now() - startedAt}ms`});
} catch (error) {
logger.warn("openai.vector_store.cleanup.failed", {
vectorStoreId,
duration: `${Date.now() - startedAt}ms`,
error: error instanceof Error ? error : String(error),
});
}
}
for (const fileId of unique(uploadedFileIds)) {
const startedAt = Date.now();
logger.info("openai.file.cleanup.start", {fileId});
try {
await client.files.delete(fileId);
logger.success("openai.file.cleanup.done", {fileId, duration: `${Date.now() - startedAt}ms`});
} catch (error) {
logger.warn("openai.file.cleanup.failed", {
fileId,
duration: `${Date.now() - startedAt}ms`,
error: error instanceof Error ? error : String(error),
});
}
}
}
async function cleanupMistralRag(libraryId: string): Promise<void> {
const target = resolveAiRuntimeTarget(AiProvider.MISTRAL, "documents");
const startedAt = Date.now();
logger.info("mistral.library.cleanup.start", {libraryId});
try {
await deleteMistralLibrary(libraryId, target);
logger.success("mistral.library.cleanup.done", {libraryId, duration: `${Date.now() - startedAt}ms`});
} catch (error) {
logger.warn("mistral.library.cleanup.failed", {
libraryId,
duration: `${Date.now() - startedAt}ms`,
error: error instanceof Error ? error : String(error),
});
}
}
export async function cleanupStaleRagProviderState(retentionDays = 14): Promise<{
scannedArtifacts: number;
cleanupTargets: number;
openaiTargets: number;
mistralTargets: number;
}> {
const startedAt = Date.now();
const artifacts = await DatabaseManager.getAllArtifacts().catch(() => []);
const plan = buildStaleRagCleanupPlan(artifacts, retentionDays);
logger.info("cleanup.start", {
retentionDays,
scannedArtifacts: artifacts.length,
cleanupTargets: plan.targets.length,
cutoffAt: plan.cutoffAt,
});
let openaiTargets = 0;
let mistralTargets = 0;
for (const target of plan.targets) {
switch (target.provider) {
case "OPENAI":
openaiTargets += 1;
await cleanupOpenAiRag(target.vectorStoreIds ?? [], target.uploadedFileIds ?? []);
break;
case "MISTRAL":
mistralTargets += 1;
if (target.libraryId) {
await cleanupMistralRag(target.libraryId);
}
break;
case "OLLAMA":
break;
}
}
logger.success("cleanup.done", {
retentionDays,
scannedArtifacts: artifacts.length,
cleanupTargets: plan.targets.length,
openaiTargets,
mistralTargets,
duration: `${Date.now() - startedAt}ms`,
});
return {
scannedArtifacts: artifacts.length,
cleanupTargets: plan.targets.length,
openaiTargets,
mistralTargets,
};
}