add google lens ocr

This commit is contained in:
artie 2025-02-11 20:49:53 +01:00
parent c194de0c9b
commit 79a0a61a15
14 changed files with 121 additions and 48 deletions

View File

@ -7,6 +7,7 @@
"@discordjs/core": "^2.0.1",
"@sapphire/discord.js-utilities": "^7.3.2",
"cheerio": "^1.0.0",
"chrome-lens-ocr": "^4.0.4",
"deepl-node": "^1.16.0",
"discord.js": "^14.17.3",
"execa": "^9.5.2",
@ -64,6 +65,8 @@
"@eslint/plugin-kit": ["@eslint/plugin-kit@0.2.5", "", { "dependencies": { "@eslint/core": "^0.10.0", "levn": "^0.4.1" } }, "sha512-lB05FkqEdUg2AA0xEbUz0SnkXT1LcCTa438W4IWTUh4hdOnVbQyOJ81OrDXsJk/LSiJHubgGEFoR5EHq1NsH1A=="],
"@fastify/busboy": ["@fastify/busboy@2.1.1", "", {}, "sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA=="],
"@humanfs/core": ["@humanfs/core@0.19.1", "", {}, "sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA=="],
"@humanfs/node": ["@humanfs/node@0.16.6", "", { "dependencies": { "@humanfs/core": "^0.19.1", "@humanwhocodes/retry": "^0.3.0" } }, "sha512-YuI2ZHQL78Q5HbhDiBA1X4LmYdXCKCMQIfw0pw7piHJwyREFebJUvrQN4cMssyES6x+vfUbx1CIpaQUKYdQZOw=="],
@ -202,6 +205,8 @@
"cheerio-select": ["cheerio-select@2.1.0", "", { "dependencies": { "boolbase": "^1.0.0", "css-select": "^5.1.0", "css-what": "^6.1.0", "domelementtype": "^2.3.0", "domhandler": "^5.0.3", "domutils": "^3.0.1" } }, "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g=="],
"chrome-lens-ocr": ["chrome-lens-ocr@4.0.4", "", { "dependencies": { "file-type": "^19.0.0", "image-dimensions": "^2.3.0", "set-cookie-parser": "^2.6.0", "sharp": "^0.33.2", "undici": "^5.28.3" }, "bin": { "chrome-lens-ocr": "cli.js" } }, "sha512-oI2sB3/tOTP7tAtYlo12YHejMTTZiQbX6Z/W4qgtXIv5gff2odsxVHjm8r+gEKxrYH3ZdI39G9O/q/2xrLtP/Q=="],
"clipboardy": ["clipboardy@4.0.0", "", { "dependencies": { "execa": "^8.0.1", "is-wsl": "^3.1.0", "is64bit": "^2.0.0" } }, "sha512-5mOlNS0mhX0707P2I0aZ2V/cmHUEO/fL7VFLqszkhUsxt7RwnmrInf/eEQKlf5GzvYeHIjT+Ov1HRfNmymlG0w=="],
"color": ["color@4.2.3", "", { "dependencies": { "color-convert": "^2.0.1", "color-string": "^1.9.0" } }, "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A=="],
@ -332,6 +337,8 @@
"ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
"image-dimensions": ["image-dimensions@2.3.0", "", { "bin": { "image-dimensions": "cli.js" } }, "sha512-8Ar3lsO6+/JLfnUeHnR8Jp/IyQR85Jut5t4Swy1yiXNwj/xM9h5V53v5KE/m/ZSMG4qGRopnSy37uPzKyQCv0A=="],
"import-fresh": ["import-fresh@3.3.1", "", { "dependencies": { "parent-module": "^1.0.0", "resolve-from": "^4.0.0" } }, "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ=="],
"imurmurhash": ["imurmurhash@0.1.4", "", {}, "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA=="],
@ -486,6 +493,8 @@
"semver": ["semver@7.7.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-hlq8tAfn0m/61p4BVRcPzIGr6LKiMwo4VM6dGi6pt4qcRkmNzTcWq6eCEjEh+qXjkMDvPlOFFSGwQjoEa6gyMA=="],
"set-cookie-parser": ["set-cookie-parser@2.7.1", "", {}, "sha512-IOc8uWeOZgnb3ptbCURJWNjWUPcO3ZnTTdzsurqERrP6nPyv+paC55vJM0LpOlT2ne+Ix+9+CRG1MNLlyZ4GjQ=="],
"sharp": ["sharp@0.33.5", "", { "dependencies": { "color": "^4.2.3", "detect-libc": "^2.0.3", "semver": "^7.6.3" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "0.33.5", "@img/sharp-darwin-x64": "0.33.5", "@img/sharp-libvips-darwin-arm64": "1.0.4", "@img/sharp-libvips-darwin-x64": "1.0.4", "@img/sharp-libvips-linux-arm": "1.0.5", "@img/sharp-libvips-linux-arm64": "1.0.4", "@img/sharp-libvips-linux-s390x": "1.0.4", "@img/sharp-libvips-linux-x64": "1.0.4", "@img/sharp-libvips-linuxmusl-arm64": "1.0.4", "@img/sharp-libvips-linuxmusl-x64": "1.0.4", "@img/sharp-linux-arm": "0.33.5", "@img/sharp-linux-arm64": "0.33.5", "@img/sharp-linux-s390x": "0.33.5", "@img/sharp-linux-x64": "0.33.5", "@img/sharp-linuxmusl-arm64": "0.33.5", "@img/sharp-linuxmusl-x64": "0.33.5", "@img/sharp-wasm32": "0.33.5", "@img/sharp-win32-ia32": "0.33.5", "@img/sharp-win32-x64": "0.33.5" } }, "sha512-haPVm1EkS9pgvHrQ/F3Xy+hgcuMV0Wm9vfIBSiwZ05k+xgb0PkBQpGsAA/oWdDobNaZTH5ppvHtzCFbnSEwHVw=="],
"shebang-command": ["shebang-command@2.0.0", "", { "dependencies": { "shebang-regex": "^3.0.0" } }, "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA=="],
@ -572,6 +581,10 @@
"axios/form-data": ["form-data@4.0.1", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "mime-types": "^2.1.12" } }, "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw=="],
"chrome-lens-ocr/file-type": ["file-type@19.6.0", "", { "dependencies": { "get-stream": "^9.0.1", "strtok3": "^9.0.1", "token-types": "^6.0.0", "uint8array-extras": "^1.3.0" } }, "sha512-VZR5I7k5wkD0HgFnMsq5hOsSc710MJMu5Nc5QYsbe38NN5iPV/XTObYLc/cpttRTf6lX538+5uO1ZQRhYibiZQ=="],
"chrome-lens-ocr/undici": ["undici@5.28.5", "", { "dependencies": { "@fastify/busboy": "^2.0.0" } }, "sha512-zICwjrDrcrUE0pyyJc1I2QzBkLM8FINsgOrt6WjA+BgajVq9Nxu2PbFFXUrAggLfDXlZGZBVZYw7WNV5KiBiBA=="],
"clipboardy/execa": ["execa@8.0.1", "", { "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^8.0.1", "human-signals": "^5.0.0", "is-stream": "^3.0.0", "merge-stream": "^2.0.0", "npm-run-path": "^5.1.0", "onetime": "^6.0.0", "signal-exit": "^4.1.0", "strip-final-newline": "^3.0.0" } }, "sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg=="],
"color-string/color-name": ["color-name@1.1.3", "", {}, "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw=="],
@ -586,6 +599,8 @@
"@typescript-eslint/typescript-estree/minimatch/brace-expansion": ["brace-expansion@2.0.1", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA=="],
"chrome-lens-ocr/file-type/strtok3": ["strtok3@9.1.1", "", { "dependencies": { "@tokenizer/token": "^0.3.0", "peek-readable": "^5.3.1" } }, "sha512-FhwotcEqjr241ZbjFzjlIYg6c5/L/s4yBGWSMvJ9UoExiSqL+FnFA/CaeZx17WGaZMS/4SOZp8wH18jSS4R4lw=="],
"clipboardy/execa/get-stream": ["get-stream@8.0.1", "", {}, "sha512-VaUJspBffn/LMCJVoMvSAdmscJyS1auj5Zulnn5UoYcY531UWmdwhRWkcGKnGU93m5HSXP9LP2usOryrBtQowA=="],
"clipboardy/execa/human-signals": ["human-signals@5.0.0", "", {}, "sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ=="],
@ -600,6 +615,8 @@
"ya-ocr/file-type/strtok3": ["strtok3@9.1.1", "", { "dependencies": { "@tokenizer/token": "^0.3.0", "peek-readable": "^5.3.1" } }, "sha512-FhwotcEqjr241ZbjFzjlIYg6c5/L/s4yBGWSMvJ9UoExiSqL+FnFA/CaeZx17WGaZMS/4SOZp8wH18jSS4R4lw=="],
"chrome-lens-ocr/file-type/strtok3/peek-readable": ["peek-readable@5.4.2", "", {}, "sha512-peBp3qZyuS6cNIJ2akRNG1uo1WJ1d0wTxg/fxMdZ0BqCVhx242bSFHM9eNqflfJVS9SsgkzgT/1UgnsurBOTMg=="],
"clipboardy/execa/npm-run-path/path-key": ["path-key@4.0.0", "", {}, "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ=="],
"ya-ocr/file-type/strtok3/peek-readable": ["peek-readable@5.4.2", "", {}, "sha512-peBp3qZyuS6cNIJ2akRNG1uo1WJ1d0wTxg/fxMdZ0BqCVhx242bSFHM9eNqflfJVS9SsgkzgT/1UgnsurBOTMg=="],

View File

@ -14,6 +14,7 @@
"@discordjs/core": "^2.0.1",
"@sapphire/discord.js-utilities": "^7.3.2",
"cheerio": "^1.0.0",
"chrome-lens-ocr": "^4.0.4",
"deepl-node": "^1.16.0",
"discord.js": "^14.17.3",
"execa": "^9.5.2",

View File

@ -1,6 +1,7 @@
import {
AutocompleteInteraction,
hyperlink,
inlineCode,
SlashCommandBuilder,
type InteractionEditReplyOptions,
} from "discord.js";
@ -14,6 +15,8 @@ import {
translate,
} from "../../utils/deepl";
import { abort } from "../../utils/error";
import type { OCRResult } from "../../types/ocr";
import { capitalize } from "../../utils/functions";
export async function translateAutocompleteImpl(
interaction: AutocompleteInteraction
@ -39,6 +42,7 @@ export async function translateImpl(
text: string,
source: string | null,
target: string,
ocrModel?: OCRResult["model"],
imageUrl?: string
): Promise<InteractionEditReplyOptions> {
const {
@ -56,7 +60,9 @@ export async function translateImpl(
if (translatedText.length > 4096) {
return {
content: imageUrl ? `${hyperlink("Image", imageUrl)}\n\n` : undefined,
content: ocrModel
? `OCR: ${inlineCode(capitalize(ocrModel))}`
: "" + (imageUrl ? `\n${hyperlink("Image", imageUrl)}` : ""),
files: [
{
name: `${displaySource}-${displayTarget}.txt`,
@ -80,7 +86,12 @@ export async function translateImpl(
icon_url: "https://www.google.com/s2/favicons?domain=deepl.com&sz=64",
},
footer: {
text: `Billed characters: ${billedCharacters}`,
text: ocrModel
? `OCR: ${capitalize(ocrModel)}`
: `Billed characters: ${billedCharacters}`,
icon_url: ocrModel
? `https://www.google.com/s2/favicons?domain=${ocrModel}.com&sz=64`
: undefined,
},
},
],

View File

@ -1,5 +1,4 @@
import {
codeBlock,
hyperlink,
inlineCode,
SlashCommandBuilder,
@ -10,32 +9,36 @@ import { downloadFile } from "../../utils/http";
import { abort } from "../../utils/error";
import { yandexOcr } from "../../utils/yandex";
import sharp from "sharp";
import { getImageFromAttachmentOrString, run } from "../../utils/functions";
import {
capitalize,
getImageFromAttachmentOrString,
run,
} from "../../utils/functions";
import { lensOcr } from "../../utils/lens";
import type { OCRResult } from "../../types/ocr";
export function buildOcrPayload(
text: string,
detected_lang: string,
language: string,
model: OCRResult["model"],
imageUrl?: string
): InteractionEditReplyOptions {
const languageName = run(() => {
try {
return (
new Intl.DisplayNames(["en"], { type: "language" }).of(detected_lang) ??
new Intl.DisplayNames(["en"], { type: "language" }).of(language) ??
"unknown"
);
} catch {
return "unknown";
return "Unknown";
}
});
const content =
`Detected language: ${inlineCode(languageName)}\n${codeBlock(text)}` +
(imageUrl ? `\n${hyperlink("Image", imageUrl)}` : "");
if (content.length > 4096) {
if (text.length > 4096) {
return {
content:
`Detected language: ${inlineCode(languageName)}` +
`\nOCR: ${inlineCode(capitalize(model))}` +
(imageUrl ? `\n${hyperlink("Image", imageUrl)}` : ""),
files: [
{
@ -49,19 +52,18 @@ export function buildOcrPayload(
return {
embeds: [
{
description: codeBlock(text),
color: 0xffdb4d,
description: text,
color: model === "yandex" ? 0xffdb4d : 0x4285f4,
fields: [
{
name: "Detected language",
value: inlineCode(languageName),
value: languageName,
},
],
...(imageUrl ? { image: { url: imageUrl } } : {}),
author: {
name: "Yandex",
icon_url:
"https://www.google.com/s2/favicons?domain=yandex.com&sz=64",
name: capitalize(model),
icon_url: `https://www.google.com/s2/favicons?domain=${model}.com&sz=64`,
},
},
],
@ -86,7 +88,10 @@ export async function ocrImpl(url: string) {
.jpeg({ quality: 90 })
.toBuffer();
const result = await yandexOcr(compressed, type.mime);
const result = await lensOcr(compressed)
.catch(() => yandexOcr(compressed, type.mime))
.catch(() => abort("Failed to OCR the image"));
if (!result.text) {
result.text = "No text detected";
}
@ -112,12 +117,8 @@ export default defineCommand({
await interaction.deferReply();
const result = await ocrImpl(imageUrl);
const payload = buildOcrPayload(
result.text,
result.detected_lang,
imageUrl
);
const { text, language, model } = await ocrImpl(imageUrl);
const payload = buildOcrPayload(text, language, model, imageUrl);
await interaction.editReply(payload);
},
});

View File

@ -19,8 +19,8 @@ export default defineCommand({
await interaction.deferReply();
const result = await ocrImpl(imageUrl);
const payload = buildOcrPayload(result.text, result.detected_lang);
const { text, language, model } = await ocrImpl(imageUrl);
const payload = buildOcrPayload(text, language, model);
await interaction.editReply(payload);
},
});

View File

@ -57,8 +57,8 @@ export default defineCommand({
abort("Target language not supported");
}
const { text } = await ocrImpl(imageUrl);
const payload = await translateImpl(text, source, target, imageUrl);
const { text, model } = await ocrImpl(imageUrl);
const payload = await translateImpl(text, source, target, model, imageUrl);
await interaction.editReply(payload);
},
});

View File

@ -20,8 +20,8 @@ export default defineCommand({
await interaction.deferReply();
const { text } = await ocrImpl(imageUrl);
const payload = await translateImpl(text, null, "en-US");
const { text, model } = await ocrImpl(imageUrl);
const payload = await translateImpl(text, null, "en-US", model);
await interaction.editReply(payload);
},
});

View File

@ -11,6 +11,7 @@ const envSchema = z.object({
DEV_GUILD_ID: z.string(),
DEV_CHANNEL_ID: z.string(),
DEEPL_API_KEY: z.string(),
GOOGLE_COOKIE: z.string(),
});
export const env = envSchema.parse(process.env);

View File

@ -9,11 +9,14 @@ export default defineEvent({
once: true,
async execute(client) {
logger.info("Logged in", {
tag: client.user.tag,
id: client.user.id,
env: env.NODE_ENV,
});
logger.info(
{
tag: client.user.tag,
id: client.user.id,
env: env.NODE_ENV,
},
"Logged in"
);
await maybeSendRestarted();
},

View File

@ -1,6 +0,0 @@
import { findFirstUrl } from "../utils/functions";
const text =
"I ended up doing https://cdn.discordapp.com/attachments/338689901111541760/1338884762785288253/image.png?ex=67acb51a&is=67ab639a&hm=92ed060dabffccf9544157da2922bce79386eca444a182db92f19d833d66fba6&b because yah";
console.log(findFirstUrl(text));

5
src/types/ocr.ts Normal file
View File

@ -0,0 +1,5 @@
export type OCRResult = {
text: string;
language: string;
model: "yandex" | "google";
};

View File

@ -75,6 +75,10 @@ export function findFirstUrl(text: string) {
return findUrls(text)[0];
}
export function capitalize(str: string) {
return str.charAt(0).toUpperCase() + str.slice(1);
}
export function getImageFromAttachmentOrString(
attachment?: Attachment | null,
str?: string | null

25
src/utils/lens.ts Normal file
View File

@ -0,0 +1,25 @@
import Lens from "chrome-lens-ocr";
import { env } from "../env";
import type { OCRResult } from "../types/ocr";
const lens = new Lens({
headers: {
cookie: env.GOOGLE_COOKIE,
},
});
export async function lensOcr(resource: string | Buffer): Promise<OCRResult> {
let result;
if (typeof resource === "string") {
result = await lens.scanByURL(resource);
} else {
result = await lens.scanByBuffer(resource);
}
return {
text: result.segments.map((s) => s.text).join("\n"),
language: result.language ?? "n/a",
model: "google",
};
}

View File

@ -1,19 +1,30 @@
import yandexClient from "ya-ocr";
import type { ClientType } from "ya-ocr/types";
import type { OCRResult } from "../types/ocr";
const yandex = new yandexClient();
export async function yandexOcr(
image: Buffer,
mime: string
): Promise<ClientType.OCRFullData>;
export async function yandexOcr(url: string): Promise<ClientType.OCRFullData>;
): Promise<OCRResult>;
export async function yandexOcr(url: string): Promise<OCRResult>;
export async function yandexOcr(
resource: string | Buffer,
mime?: string
): Promise<ClientType.OCRFullData> {
): Promise<OCRResult> {
let result;
if (typeof resource === "string") {
return yandex.scanByUrl(resource);
result = await yandex.scanByUrl(resource);
} else {
result = await yandex.scanByBlob(
new Blob([resource], { type: mime }) as Blob
);
}
return yandex.scanByBlob(new Blob([resource], { type: mime }) as Blob);
return {
text: result.text,
language: result.detected_lang ?? "n/a",
model: "yandex",
};
}