logic
This commit is contained in:
parent
0cd70df807
commit
fb6187f502
3
bun.lock
3
bun.lock
@ -5,6 +5,7 @@
|
||||
"name": "itame",
|
||||
"dependencies": {
|
||||
"chrome-lens-ocr": "^4.1.1",
|
||||
"commander": "^14.0.1",
|
||||
"sharp": "^0.34.4",
|
||||
"ya-ocr": "^1.1.1",
|
||||
},
|
||||
@ -97,6 +98,8 @@
|
||||
|
||||
"color-string": ["color-string@1.9.1", "", { "dependencies": { "color-name": "^1.0.0", "simple-swizzle": "^0.2.2" } }, "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg=="],
|
||||
|
||||
"commander": ["commander@14.0.1", "", {}, "sha512-2JkV3gUZUVrbNA+1sjBOYLsMZ5cEEl8GTFP2a4AVz5hvasAMCQ1D2l2le/cX+pV4N6ZU17zjUahLpIXRrnWL8A=="],
|
||||
|
||||
"cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="],
|
||||
|
||||
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
|
||||
|
||||
157
index.ts
Normal file → Executable file
157
index.ts
Normal file → Executable file
@ -1 +1,156 @@
|
||||
console.log("Hello via Bun!");
|
||||
import { InvalidArgumentError, Option, program } from "commander";
|
||||
import packageJson from "./package.json";
|
||||
import path from "path";
|
||||
import fs from "fs";
|
||||
import Lens from "chrome-lens-ocr";
|
||||
import yandexClient from "ya-ocr";
|
||||
import sharp from "sharp";
|
||||
|
||||
type Engine = "google" | "yandex";
|
||||
|
||||
interface Options {
|
||||
engine: "auto" | Engine;
|
||||
engines: Engine[];
|
||||
cookie?: string;
|
||||
image: string;
|
||||
}
|
||||
|
||||
type OCRResult = {
|
||||
text: string;
|
||||
language: string;
|
||||
engine: Engine;
|
||||
};
|
||||
|
||||
const log = console.error;
|
||||
|
||||
function bail(err: any) {
|
||||
log(err?.stack ? err.stack : err);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
function resolvePath(val: string, allowedTypes?: string[]) {
|
||||
val = val.trim();
|
||||
|
||||
const absolutePath = path.resolve(val);
|
||||
if (!fs.existsSync(absolutePath))
|
||||
throw new InvalidArgumentError("input file does not exist");
|
||||
|
||||
if (
|
||||
allowedTypes &&
|
||||
!allowedTypes?.includes(path.extname(absolutePath).slice(1).toLowerCase())
|
||||
)
|
||||
throw new InvalidArgumentError(
|
||||
"input file must be one of " + allowedTypes.join(", ")
|
||||
);
|
||||
|
||||
return absolutePath;
|
||||
}
|
||||
|
||||
function parseArgs() {
|
||||
program
|
||||
.name(packageJson.name)
|
||||
.version(packageJson.version)
|
||||
.addOption(
|
||||
new Option("-e, --engine [engine]", "ocr engine to use")
|
||||
.choices(["auto", "google", "yandex"])
|
||||
.default("auto")
|
||||
)
|
||||
.option(
|
||||
"-c, --cookie [path]",
|
||||
"google.com cookie header value file path",
|
||||
(val) => fs.readFileSync(resolvePath(val, ["txt"])).toString("utf-8")
|
||||
)
|
||||
.argument("image <path>", "input image file path", (val) =>
|
||||
resolvePath(val, ["jpg", "jpeg", "png"])
|
||||
)
|
||||
.parse();
|
||||
|
||||
const opts = program.opts();
|
||||
|
||||
return {
|
||||
...opts,
|
||||
engines: opts.engine === "auto" ? ["google", "yandex"] : [opts.engine],
|
||||
image: program.processedArgs[0],
|
||||
} as Options;
|
||||
}
|
||||
|
||||
async function processYandex(
|
||||
yandex: yandexClient,
|
||||
image: string
|
||||
): Promise<OCRResult> {
|
||||
const compressed = await sharp(image)
|
||||
.resize({ width: 1000, withoutEnlargement: true })
|
||||
.jpeg({ quality: 95 })
|
||||
.toBuffer();
|
||||
const result = await yandex.scanByBlob(
|
||||
new Blob([compressed], {
|
||||
type:
|
||||
image.endsWith(".jpg") || image.endsWith(".jpeg")
|
||||
? "image/jpeg"
|
||||
: "image/png",
|
||||
})
|
||||
);
|
||||
return {
|
||||
text: result.text,
|
||||
language: result.detected_lang ?? "N/A",
|
||||
engine: "yandex",
|
||||
};
|
||||
}
|
||||
|
||||
async function processGoogle(lens: Lens, image: string): Promise<OCRResult> {
|
||||
const result = await lens.scanByFile(image);
|
||||
return {
|
||||
text: result.segments.map((s) => s.text).join("\n"),
|
||||
language: result.language ?? "N/A",
|
||||
engine: "google",
|
||||
};
|
||||
}
|
||||
|
||||
async function processImage(opts: Options): Promise<OCRResult> {
|
||||
let lens: Lens | null = null;
|
||||
let yandex: yandexClient | null = null;
|
||||
|
||||
const filename = path.basename(opts.image);
|
||||
|
||||
// lazy init
|
||||
if (opts.engines.includes("google"))
|
||||
lens = new Lens({
|
||||
headers: opts.cookie ? { cookie: opts.cookie } : undefined,
|
||||
});
|
||||
if (opts.engines.includes("yandex")) yandex = new yandexClient();
|
||||
|
||||
for (const engine of opts.engines) {
|
||||
try {
|
||||
log(`processing '${filename}' with ${engine}`);
|
||||
|
||||
switch (engine) {
|
||||
case "google":
|
||||
if (!lens) throw new Error("google engine not initialized");
|
||||
return processGoogle(lens, opts.image);
|
||||
case "yandex":
|
||||
if (!yandex) throw new Error("yandex engine not initialized");
|
||||
return processYandex(yandex, opts.image);
|
||||
}
|
||||
} catch (err) {
|
||||
if (opts.engine !== "auto") throw err;
|
||||
|
||||
log(`failed to process '${filename}' with ${engine}`);
|
||||
log(err);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`failed to process '${filename}'`);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const opts = parseArgs();
|
||||
const result = await processImage(opts);
|
||||
log(
|
||||
`${path.basename(opts.image)} processed with ${
|
||||
result.engine
|
||||
}, outputting to stdout`
|
||||
);
|
||||
console.log(result.text);
|
||||
}
|
||||
|
||||
main().catch(bail);
|
||||
|
||||
28
itame.py
Normal file
28
itame.py
Normal file
@ -0,0 +1,28 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
file_dir = Path(__file__).parent
|
||||
itame_vendor_dir = file_dir / "itame"
|
||||
|
||||
if not itame_vendor_dir.exists():
|
||||
print("script must be placed in the parent directory of the vendor itame directory")
|
||||
sys.exit(1)
|
||||
|
||||
os.chdir(itame_vendor_dir)
|
||||
|
||||
if not os.path.exists(".initialized"):
|
||||
p = subprocess.run(["powershell", "-c", "irm bun.sh/install.ps1|iex"])
|
||||
if p.returncode != 0:
|
||||
print("failed to install bun")
|
||||
sys.exit(1)
|
||||
open(".initialized", "w").close()
|
||||
|
||||
if not os.path.exists("node_modules"):
|
||||
p = subprocess.run(["bun", "i"])
|
||||
if p.returncode != 0:
|
||||
print("failed to install node modules")
|
||||
sys.exit(1)
|
||||
|
||||
subprocess.run(["bun", "run", "index.ts"] + sys.argv[1:])
|
||||
24
pack-win.sh
Executable file
24
pack-win.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
mkdir -p dist
|
||||
|
||||
DIR_NAME=$(basename "$PWD")
|
||||
ZIP_FILE="dist/itame-win.zip"
|
||||
TMP_DIR=$(mktemp -d)
|
||||
|
||||
rsync -av \
|
||||
--exclude='.git' \
|
||||
--exclude='.gitignore' \
|
||||
--exclude='pack-win.sh' \
|
||||
--exclude='dist' \
|
||||
--exclude='node_modules' \
|
||||
--exclude='itame.py' \
|
||||
./ "$TMP_DIR/$DIR_NAME/"
|
||||
|
||||
cp itame.py "$TMP_DIR/"
|
||||
|
||||
(cd "$TMP_DIR" && zip -FSr "$OLDPWD/$ZIP_FILE" .)
|
||||
|
||||
rm -rf "$TMP_DIR"
|
||||
|
||||
echo "Packed $DIR_NAME and itame.py into $ZIP_FILE"
|
||||
@ -12,6 +12,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"chrome-lens-ocr": "^4.1.1",
|
||||
"commander": "^14.0.1",
|
||||
"sharp": "^0.34.4",
|
||||
"ya-ocr": "^1.1.1"
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user