add transliteration command (en <-> ru)
This commit is contained in:
@@ -0,0 +1,113 @@
|
|||||||
|
import {ChatCommand} from "../base/chat-command";
|
||||||
|
import {Message} from "typescript-telegram-bot-api";
|
||||||
|
import {logError, replyToMessage} from "../util/utils";
|
||||||
|
|
||||||
|
const EN =
|
||||||
|
"`qwertyuiop[]asdfghjkl;'zxcvbnm,./" +
|
||||||
|
"~QWERTYUIOP{}ASDFGHJKL:\"ZXCVBNM<>?" +
|
||||||
|
"1234567890-=" +
|
||||||
|
"!@#$%^&*()_+";
|
||||||
|
|
||||||
|
const RU =
|
||||||
|
"ёйцукенгшщзхъфывапролджэячсмитьбю." +
|
||||||
|
"ЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ," +
|
||||||
|
"1234567890-=" +
|
||||||
|
"!\"№;%:?*()_+";
|
||||||
|
|
||||||
|
function makeMap(from: string, to: string): Map<string, string> {
|
||||||
|
if (from.length !== to.length) {
|
||||||
|
throw new Error(`Layout maps must be same length: ${from.length} vs ${to.length}`);
|
||||||
|
}
|
||||||
|
const m = new Map<string, string>();
|
||||||
|
for (let i = 0; i < from.length; i++) m.set(from[i], to[i]);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
const enToRu = makeMap(EN, RU);
|
||||||
|
const ruToEn = makeMap(RU, EN);
|
||||||
|
|
||||||
|
function swapLayout(text: string, map: Map<string, string>): string {
|
||||||
|
let out = "";
|
||||||
|
for (const ch of text) out += map.get(ch) ?? ch;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const toRuLayout = (text: string) => swapLayout(text, enToRu);
|
||||||
|
export const toEnLayout = (text: string) => swapLayout(text, ruToEn);
|
||||||
|
|
||||||
|
const reCyr = /\p{Script=Cyrillic}/u;
|
||||||
|
const reLat = /\p{Script=Latin}/u;
|
||||||
|
|
||||||
|
export type ScriptGuess = "ru" | "en" | "mixed" | "unknown";
|
||||||
|
|
||||||
|
export function detectScript(text: string): ScriptGuess {
|
||||||
|
let cyr = 0, lat = 0;
|
||||||
|
|
||||||
|
for (const ch of text) {
|
||||||
|
if (reCyr.test(ch)) cyr++;
|
||||||
|
else if (reLat.test(ch)) lat++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cyr === 0 && lat === 0) return "unknown";
|
||||||
|
if (cyr > 0 && lat > 0) return "mixed";
|
||||||
|
return cyr > 0 ? "ru" : "en";
|
||||||
|
}
|
||||||
|
|
||||||
|
const EN_VOWELS = /[aeiouy]/i;
|
||||||
|
const RU_VOWELS = /[аеёиоуыэюя]/i;
|
||||||
|
|
||||||
|
function vowelRatio(text: string, reLetter: RegExp, reVowel: RegExp): number {
|
||||||
|
let letters = 0, vowels = 0;
|
||||||
|
for (const ch of text) {
|
||||||
|
if (reLetter.test(ch)) {
|
||||||
|
letters++;
|
||||||
|
if (reVowel.test(ch)) vowels++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return letters === 0 ? 0 : vowels / letters;
|
||||||
|
}
|
||||||
|
|
||||||
|
function looksLikeEnglish(text: string): boolean {
|
||||||
|
const ratio = vowelRatio(text, /\p{Script=Latin}/u, EN_VOWELS);
|
||||||
|
return ratio >= 0.20;
|
||||||
|
}
|
||||||
|
|
||||||
|
function looksLikeRussian(text: string): boolean {
|
||||||
|
const ratio = vowelRatio(text, /\p{Script=Cyrillic}/u, RU_VOWELS);
|
||||||
|
return ratio >= 0.18;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function fixLayoutAuto(
|
||||||
|
text: string,
|
||||||
|
toRuLayout: (s: string) => string,
|
||||||
|
toEnLayout: (s: string) => string,
|
||||||
|
): string {
|
||||||
|
const guess = detectScript(text);
|
||||||
|
|
||||||
|
if (guess === "en") {
|
||||||
|
if (looksLikeEnglish(text)) return text;
|
||||||
|
return toRuLayout(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (guess === "ru") {
|
||||||
|
if (looksLikeRussian(text)) return text;
|
||||||
|
return toEnLayout(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Transliteration extends ChatCommand {
|
||||||
|
regexp = /^\/tr\s([^]+)/i;
|
||||||
|
|
||||||
|
async execute(msg: Message, match?: RegExpExecArray): Promise<void> {
|
||||||
|
const text = (match ? match[1] : "").trim();
|
||||||
|
if (text.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const newText = fixLayoutAuto(text, toRuLayout, toEnLayout);
|
||||||
|
|
||||||
|
await replyToMessage(msg, newText).catch(logError);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -56,6 +56,7 @@ import {OllamaRequest} from "./model/ollama-request";
|
|||||||
import {CallbackCommand} from "./base/callback-command";
|
import {CallbackCommand} from "./base/callback-command";
|
||||||
import {OllamaCancel} from "./callback_commands/ollama-cancel";
|
import {OllamaCancel} from "./callback_commands/ollama-cancel";
|
||||||
import {MistralChat} from "./commands/mistral-chat";
|
import {MistralChat} from "./commands/mistral-chat";
|
||||||
|
import {Transliteration} from "./commands/transliteration";
|
||||||
|
|
||||||
process.setUncaughtExceptionCaptureCallback(console.error);
|
process.setUncaughtExceptionCaptureCallback(console.error);
|
||||||
|
|
||||||
@@ -133,6 +134,7 @@ export const chatCommands: ChatCommand[] = [
|
|||||||
new Distort(),
|
new Distort(),
|
||||||
new Dice(),
|
new Dice(),
|
||||||
new Title(),
|
new Title(),
|
||||||
|
new Transliteration(),
|
||||||
|
|
||||||
new AdminsAdd(),
|
new AdminsAdd(),
|
||||||
new AdminsRemove(),
|
new AdminsRemove(),
|
||||||
|
|||||||
@@ -799,3 +799,9 @@ export async function imageToBase64(filePath: string): Promise<string> {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function ifTrue(exp?: never): boolean {
|
||||||
|
if (!exp) return false;
|
||||||
|
|
||||||
|
return ["true", "t", "y", 1, "1"].includes(exp);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user