itame/index.ts
2025-09-18 15:41:36 +02:00

153 lines
3.7 KiB
TypeScript
Executable File

import { InvalidArgumentError, Option, program } from "commander";
import packageJson from "./package.json";
import path from "path";
import fs from "fs";
import Lens from "chrome-lens-ocr";
import yandexClient from "ya-ocr";
import sharp from "sharp";
type Engine = "google" | "yandex";
interface Options {
engine: "auto" | Engine;
engines: Engine[];
cookie?: string;
image: string;
}
type OCRResult = {
text: string;
language: string;
engine: Engine;
};
const log = console.error;
function bail(err: any) {
log(err?.stack ? err.stack : err);
process.exit(1);
}
function resolvePath(val: string, allowedTypes?: string[]) {
val = val.trim();
const absolutePath = path.resolve(val);
if (!fs.existsSync(absolutePath))
throw new InvalidArgumentError("input file does not exist");
if (
allowedTypes &&
!allowedTypes?.includes(path.extname(absolutePath).slice(1).toLowerCase())
)
throw new InvalidArgumentError(
"input file must be one of " + allowedTypes.join(", ")
);
return absolutePath;
}
function parseArgs() {
program
.name(packageJson.name)
.version(packageJson.version)
.addOption(
new Option("-e, --engine [engine]", "ocr engine to use")
.choices(["auto", "google", "yandex"])
.default("auto")
)
.option("-c, --cookie [value]", "google.com cookie header value")
.argument("image <path>", "input image file path", (val) =>
resolvePath(val, ["jpg", "jpeg", "png"])
)
.parse();
const opts = program.opts();
return {
...opts,
engines: opts.engine === "auto" ? ["google", "yandex"] : [opts.engine],
image: program.processedArgs[0],
} as Options;
}
async function processYandex(
yandex: yandexClient,
image: string
): Promise<OCRResult> {
const compressed = await sharp(image)
.resize({ width: 1000, withoutEnlargement: true })
.jpeg({ quality: 95 })
.toBuffer();
const result = await yandex.scanByBlob(
new Blob([compressed], {
type:
image.endsWith(".jpg") || image.endsWith(".jpeg")
? "image/jpeg"
: "image/png",
})
);
return {
text: result.text,
language: result.detected_lang ?? "N/A",
engine: "yandex",
};
}
async function processGoogle(lens: Lens, image: string): Promise<OCRResult> {
const result = await lens.scanByFile(image);
return {
text: result.segments.map((s) => s.text).join("\n"),
language: result.language ?? "N/A",
engine: "google",
};
}
async function processImage(opts: Options): Promise<OCRResult> {
let lens: Lens | null = null;
let yandex: yandexClient | null = null;
const filename = path.basename(opts.image);
// lazy init
if (opts.engines.includes("google"))
lens = new Lens({
headers: opts.cookie ? { cookie: opts.cookie } : undefined,
});
if (opts.engines.includes("yandex")) yandex = new yandexClient();
for (const engine of opts.engines) {
try {
log(`processing '${filename}' with ${engine}`);
switch (engine) {
case "google":
if (!lens) throw new Error("google engine not initialized");
return processGoogle(lens, opts.image);
case "yandex":
if (!yandex) throw new Error("yandex engine not initialized");
return processYandex(yandex, opts.image);
}
} catch (err) {
if (opts.engine !== "auto") throw err;
log(`failed to process '${filename}' with ${engine}`);
log(err);
}
}
throw new Error(`failed to process '${filename}'`);
}
async function main() {
const opts = parseArgs();
const result = await processImage(opts);
log(
`${path.basename(opts.image)} processed with ${
result.engine
}, outputting to stdout`
);
console.log(result.text);
}
main().catch(bail);