Files
tg-chat-bot/src/ai/tools/search-notes.ts
T
2026-05-13 12:05:55 +03:00

394 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import {AiTool} from "../tool-types";
import path from "node:path";
import {readdir, readFile} from "node:fs/promises";
import {notesDir, notesRootFile} from "../../index";
import {asNonEmptyString} from "./utils";
import {toolsLogger} from "./tool-logger";
const logger = toolsLogger.child("search-notes");
export type SearchNoteMatchedField = "file_name" | "title" | "content";
export type SearchNoteItem = {
fileName: string;
filePath: string;
relativePath: string;
title: string;
score: number;
matchedFields: SearchNoteMatchedField[];
snippet?: string;
};
export type SearchNotesResult =
| { success: true; results: SearchNoteItem[] }
| { success: false; error: string };
export const searchNotesTool = {
type: "function",
function: {
name: "search_notes",
description:
"Search Markdown notes by file name, note title, and full note content. Supports fuzzy matching. Use this when the user refers to a note by title, topic, partial title, approximate name, keyword, or something written inside the note. Returns success=true and results[], where each result contains fileName, title, score, matchedFields, relativePath, and optional snippet. Later note tools should use results[0].fileName unless multiple results are ambiguous.",
parameters: {
type: "object",
properties: {
query: {
type: "string",
description:
"Search query for finding notes by file name, title, topic, keywords, or content. Can be partial, approximate, or contain typos. Use a short clean phrase, not the full user sentence.",
},
limit: {
type: "integer",
description:
"Maximum number of search results to return. Defaults to 3. Maximum is 10.",
minimum: 1,
maximum: 10,
default: 3,
},
},
required: ["query"],
},
},
} satisfies AiTool;
export async function searchNotes(
args?: Record<string, unknown>,
): Promise<SearchNotesResult> {
const startedAt = Date.now();
logger.debug("start", {args});
const query = asNonEmptyString(args?.query) ?? "";
if (!query.trim().length) {
return {success: false, error: "No query provided"};
}
const limit = parseSearchLimit(args?.limit);
try {
const entries = await readdir(notesDir, {withFileTypes: true});
const markdownFiles = entries
.filter((entry) => entry.isFile())
.map((entry) => entry.name)
.filter((fileName) => fileName.endsWith(".md"));
const notes = await Promise.all(
markdownFiles.map(async (fileName) => {
const filePath = path.join(notesDir, fileName);
const relativePath = path.relative(path.dirname(notesRootFile), filePath);
let content = "";
try {
content = await readFile(filePath, "utf-8");
} catch {
// Ignore content read errors for individual files.
}
const title = extractNoteTitle(fileName, content);
const fileNameWithoutExtension = path.basename(fileName, ".md");
const fileNameScore = calculateFuzzyScore(query, fileNameWithoutExtension);
const titleScore = calculateFuzzyScore(query, title);
const contentScore = calculateContentScore(query, content);
const matchedFields: SearchNoteMatchedField[] = [];
if (fileNameScore > 0) {
matchedFields.push("file_name");
}
if (titleScore > 0) {
matchedFields.push("title");
}
if (contentScore > 0) {
matchedFields.push("content");
}
const score = Math.max(
fileNameScore,
titleScore,
contentScore,
);
return {
fileName,
filePath,
relativePath,
title,
score,
matchedFields,
snippet:
contentScore > 0
? buildContentSnippet(query, content)
: undefined,
};
}),
);
const results = notes
.filter((note) => note.score > 0)
.sort((a, b) => b.score - a.score)
.slice(0, limit);
logger.debug("done", {query, limit, results: results.length, duration: logger.duration(startedAt)});
return {success: true, results};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {success: false, error: `Failed to search notes: ${errorMessage}`};
}
}
function parseSearchLimit(value: unknown): number {
const parsed =
typeof value === "number"
? value
: typeof value === "string"
? Number.parseInt(value, 10)
: 3;
if (!Number.isFinite(parsed)) {
return 3;
}
return Math.max(1, Math.min(10, Math.floor(parsed)));
}
function extractNoteTitle(fileName: string, content: string): string {
const headingMatch = content.match(/^#\s+(.+)$/m);
const heading = headingMatch?.[1]?.trim();
if (heading) {
return heading;
}
return path.basename(fileName, ".md");
}
function calculateFuzzyScore(query: string, value: string): number {
const normalizedQuery = normalizeSearchText(query);
const normalizedValue = normalizeSearchText(value);
if (!normalizedQuery.length || !normalizedValue.length) {
return 0;
}
if (normalizedValue === normalizedQuery) {
return 100;
}
if (normalizedValue.startsWith(normalizedQuery)) {
return 90;
}
if (normalizedValue.includes(normalizedQuery)) {
return 85;
}
const queryWords = normalizedQuery.split(" ").filter(Boolean);
const valueWords = normalizedValue.split(" ").filter(Boolean);
const wordMatchScore = calculateWordMatchScore(queryWords, valueWords);
const subsequenceScore = isSubsequence(normalizedQuery, normalizedValue) ? 55 : 0;
const distanceScore = calculateLevenshteinScore(normalizedQuery, normalizedValue);
return Math.max(wordMatchScore, subsequenceScore, distanceScore);
}
function calculateContentScore(query: string, content: string): number {
const normalizedQuery = normalizeSearchText(query);
const normalizedContent = normalizeSearchText(content);
if (!normalizedQuery.length || !normalizedContent.length) {
return 0;
}
if (normalizedContent.includes(normalizedQuery)) {
return 70;
}
const queryWords = normalizedQuery.split(" ").filter(Boolean);
const contentWords = new Set(normalizedContent.split(" ").filter(Boolean));
if (!queryWords.length) {
return 0;
}
let matchedWords = 0;
for (const queryWord of queryWords) {
if (contentWords.has(queryWord)) {
matchedWords++;
continue;
}
const hasPartialMatch = [...contentWords].some((contentWord) => {
if (contentWord.includes(queryWord) || queryWord.includes(contentWord)) {
return true;
}
if (queryWord.length < 4 || contentWord.length < 4) {
return false;
}
const distance = levenshteinDistance(queryWord, contentWord);
const maxLength = Math.max(queryWord.length, contentWord.length);
const similarity = 1 - distance / maxLength;
return similarity >= 0.75;
});
if (hasPartialMatch) {
matchedWords += 0.75;
}
}
const matchRatio = matchedWords / queryWords.length;
if (matchRatio <= 0) {
return 0;
}
return Math.round(matchRatio * 60);
}
function normalizeSearchText(value: string): string {
return value
.toLowerCase()
.trim()
.normalize("NFKD")
.replace(/[\u0300-\u036f]/g, "")
.replace(/ё/g, "е")
.replace(/[^a-zа-я0-9\s-]/gi, " ")
.replace(/[-_]+/g, " ")
.replace(/\s+/g, " ");
}
function calculateWordMatchScore(queryWords: string[], valueWords: string[]): number {
if (!queryWords.length || !valueWords.length) {
return 0;
}
let matchedWords = 0;
for (const queryWord of queryWords) {
const bestWordScore = Math.max(
...valueWords.map((valueWord) => {
if (valueWord === queryWord) {
return 1;
}
if (valueWord.startsWith(queryWord) || valueWord.includes(queryWord)) {
return 0.85;
}
const distance = levenshteinDistance(queryWord, valueWord);
const maxLength = Math.max(queryWord.length, valueWord.length);
const similarity = 1 - distance / maxLength;
return similarity >= 0.7 ? similarity : 0;
}),
);
if (bestWordScore > 0) {
matchedWords += bestWordScore;
}
}
const ratio = matchedWords / queryWords.length;
return Math.round(ratio * 75);
}
function calculateLevenshteinScore(query: string, value: string): number {
const distance = levenshteinDistance(query, value);
const maxLength = Math.max(query.length, value.length);
if (maxLength === 0) {
return 0;
}
const similarity = 1 - distance / maxLength;
if (similarity < 0.45) {
return 0;
}
return Math.round(similarity * 65);
}
function isSubsequence(query: string, value: string): boolean {
let queryIndex = 0;
for (const valueChar of value) {
if (valueChar === query[queryIndex]) {
queryIndex++;
}
if (queryIndex === query.length) {
return true;
}
}
return false;
}
function levenshteinDistance(a: string, b: string): number {
const matrix: number[][] = Array.from({length: a.length + 1}, () =>
Array.from({length: b.length + 1}, () => 0),
);
for (let i = 0; i <= a.length; i++) {
matrix[i][0] = i;
}
for (let j = 0; j <= b.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= a.length; i++) {
for (let j = 1; j <= b.length; j++) {
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
matrix[i][j] = Math.min(
matrix[i - 1][j] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j - 1] + cost,
);
}
}
return matrix[a.length][b.length];
}
function buildContentSnippet(query: string, content: string): string | undefined {
const normalizedQuery = query.trim().toLowerCase();
const normalizedContent = content.toLowerCase();
let matchIndex = normalizedContent.indexOf(normalizedQuery);
if (matchIndex < 0) {
const queryWords = normalizeSearchText(query)
.split(" ")
.filter((word) => word.length >= 3);
for (const word of queryWords) {
matchIndex = normalizedContent.indexOf(word);
if (matchIndex >= 0) {
break;
}
}
}
if (matchIndex < 0) {
return undefined;
}
const snippetRadius = 120;
const start = Math.max(0, matchIndex - snippetRadius);
const end = Math.min(content.length, matchIndex + normalizedQuery.length + snippetRadius);
const prefix = start > 0 ? "..." : "";
const suffix = end < content.length ? "..." : "";
return `${prefix}${content.slice(start, end).replace(/\s+/g, " ").trim()}${suffix}`;
}