157 lines
3.8 KiB
TypeScript
Executable File
157 lines
3.8 KiB
TypeScript
Executable File
import { InvalidArgumentError, Option, program } from "commander";
|
|
import packageJson from "./package.json";
|
|
import path from "path";
|
|
import fs from "fs";
|
|
import Lens from "chrome-lens-ocr";
|
|
import yandexClient from "ya-ocr";
|
|
import sharp from "sharp";
|
|
|
|
type Engine = "google" | "yandex";
|
|
|
|
interface Options {
|
|
engine: "auto" | Engine;
|
|
engines: Engine[];
|
|
cookie?: string;
|
|
image: string;
|
|
}
|
|
|
|
type OCRResult = {
|
|
text: string;
|
|
language: string;
|
|
engine: Engine;
|
|
};
|
|
|
|
const log = console.error;
|
|
|
|
function bail(err: any) {
|
|
log(err?.stack ? err.stack : err);
|
|
process.exit(1);
|
|
}
|
|
|
|
function resolvePath(val: string, allowedTypes?: string[]) {
|
|
val = val.trim();
|
|
|
|
const absolutePath = path.resolve(val);
|
|
if (!fs.existsSync(absolutePath))
|
|
throw new InvalidArgumentError("input file does not exist");
|
|
|
|
if (
|
|
allowedTypes &&
|
|
!allowedTypes?.includes(path.extname(absolutePath).slice(1).toLowerCase())
|
|
)
|
|
throw new InvalidArgumentError(
|
|
"input file must be one of " + allowedTypes.join(", ")
|
|
);
|
|
|
|
return absolutePath;
|
|
}
|
|
|
|
function parseArgs() {
|
|
program
|
|
.name(packageJson.name)
|
|
.version(packageJson.version)
|
|
.addOption(
|
|
new Option("-e, --engine [engine]", "ocr engine to use")
|
|
.choices(["auto", "google", "yandex"])
|
|
.default("auto")
|
|
)
|
|
.option(
|
|
"-c, --cookie [path]",
|
|
"google.com cookie header value file path",
|
|
(val) => fs.readFileSync(resolvePath(val, ["txt"])).toString("utf-8")
|
|
)
|
|
.argument("image <path>", "input image file path", (val) =>
|
|
resolvePath(val, ["jpg", "jpeg", "png"])
|
|
)
|
|
.parse();
|
|
|
|
const opts = program.opts();
|
|
|
|
return {
|
|
...opts,
|
|
engines: opts.engine === "auto" ? ["google", "yandex"] : [opts.engine],
|
|
image: program.processedArgs[0],
|
|
} as Options;
|
|
}
|
|
|
|
async function processYandex(
|
|
yandex: yandexClient,
|
|
image: string
|
|
): Promise<OCRResult> {
|
|
const compressed = await sharp(image)
|
|
.resize({ width: 1000, withoutEnlargement: true })
|
|
.jpeg({ quality: 95 })
|
|
.toBuffer();
|
|
const result = await yandex.scanByBlob(
|
|
new Blob([compressed], {
|
|
type:
|
|
image.endsWith(".jpg") || image.endsWith(".jpeg")
|
|
? "image/jpeg"
|
|
: "image/png",
|
|
})
|
|
);
|
|
return {
|
|
text: result.text,
|
|
language: result.detected_lang ?? "N/A",
|
|
engine: "yandex",
|
|
};
|
|
}
|
|
|
|
async function processGoogle(lens: Lens, image: string): Promise<OCRResult> {
|
|
const result = await lens.scanByFile(image);
|
|
return {
|
|
text: result.segments.map((s) => s.text).join("\n"),
|
|
language: result.language ?? "N/A",
|
|
engine: "google",
|
|
};
|
|
}
|
|
|
|
async function processImage(opts: Options): Promise<OCRResult> {
|
|
let lens: Lens | null = null;
|
|
let yandex: yandexClient | null = null;
|
|
|
|
const filename = path.basename(opts.image);
|
|
|
|
// lazy init
|
|
if (opts.engines.includes("google"))
|
|
lens = new Lens({
|
|
headers: opts.cookie ? { cookie: opts.cookie } : undefined,
|
|
});
|
|
if (opts.engines.includes("yandex")) yandex = new yandexClient();
|
|
|
|
for (const engine of opts.engines) {
|
|
try {
|
|
log(`processing '${filename}' with ${engine}`);
|
|
|
|
switch (engine) {
|
|
case "google":
|
|
if (!lens) throw new Error("google engine not initialized");
|
|
return processGoogle(lens, opts.image);
|
|
case "yandex":
|
|
if (!yandex) throw new Error("yandex engine not initialized");
|
|
return processYandex(yandex, opts.image);
|
|
}
|
|
} catch (err) {
|
|
if (opts.engine !== "auto") throw err;
|
|
|
|
log(`failed to process '${filename}' with ${engine}`);
|
|
log(err);
|
|
}
|
|
}
|
|
|
|
throw new Error(`failed to process '${filename}'`);
|
|
}
|
|
|
|
async function main() {
|
|
const opts = parseArgs();
|
|
const result = await processImage(opts);
|
|
log(
|
|
`${path.basename(opts.image)} processed with ${
|
|
result.engine
|
|
}, outputting to stdout`
|
|
);
|
|
console.log(result.text);
|
|
}
|
|
|
|
main().catch(bail);
|