logic
This commit is contained in:
parent
0cd70df807
commit
fb6187f502
3
bun.lock
3
bun.lock
@ -5,6 +5,7 @@
|
|||||||
"name": "itame",
|
"name": "itame",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"chrome-lens-ocr": "^4.1.1",
|
"chrome-lens-ocr": "^4.1.1",
|
||||||
|
"commander": "^14.0.1",
|
||||||
"sharp": "^0.34.4",
|
"sharp": "^0.34.4",
|
||||||
"ya-ocr": "^1.1.1",
|
"ya-ocr": "^1.1.1",
|
||||||
},
|
},
|
||||||
@ -97,6 +98,8 @@
|
|||||||
|
|
||||||
"color-string": ["color-string@1.9.1", "", { "dependencies": { "color-name": "^1.0.0", "simple-swizzle": "^0.2.2" } }, "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg=="],
|
"color-string": ["color-string@1.9.1", "", { "dependencies": { "color-name": "^1.0.0", "simple-swizzle": "^0.2.2" } }, "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg=="],
|
||||||
|
|
||||||
|
"commander": ["commander@14.0.1", "", {}, "sha512-2JkV3gUZUVrbNA+1sjBOYLsMZ5cEEl8GTFP2a4AVz5hvasAMCQ1D2l2le/cX+pV4N6ZU17zjUahLpIXRrnWL8A=="],
|
||||||
|
|
||||||
"cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="],
|
"cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="],
|
||||||
|
|
||||||
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
|
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
|
||||||
|
|||||||
157
index.ts
Normal file → Executable file
157
index.ts
Normal file → Executable file
@ -1 +1,156 @@
|
|||||||
console.log("Hello via Bun!");
|
import { InvalidArgumentError, Option, program } from "commander";
|
||||||
|
import packageJson from "./package.json";
|
||||||
|
import path from "path";
|
||||||
|
import fs from "fs";
|
||||||
|
import Lens from "chrome-lens-ocr";
|
||||||
|
import yandexClient from "ya-ocr";
|
||||||
|
import sharp from "sharp";
|
||||||
|
|
||||||
|
type Engine = "google" | "yandex";
|
||||||
|
|
||||||
|
interface Options {
|
||||||
|
engine: "auto" | Engine;
|
||||||
|
engines: Engine[];
|
||||||
|
cookie?: string;
|
||||||
|
image: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
type OCRResult = {
|
||||||
|
text: string;
|
||||||
|
language: string;
|
||||||
|
engine: Engine;
|
||||||
|
};
|
||||||
|
|
||||||
|
const log = console.error;
|
||||||
|
|
||||||
|
function bail(err: any) {
|
||||||
|
log(err?.stack ? err.stack : err);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolvePath(val: string, allowedTypes?: string[]) {
|
||||||
|
val = val.trim();
|
||||||
|
|
||||||
|
const absolutePath = path.resolve(val);
|
||||||
|
if (!fs.existsSync(absolutePath))
|
||||||
|
throw new InvalidArgumentError("input file does not exist");
|
||||||
|
|
||||||
|
if (
|
||||||
|
allowedTypes &&
|
||||||
|
!allowedTypes?.includes(path.extname(absolutePath).slice(1).toLowerCase())
|
||||||
|
)
|
||||||
|
throw new InvalidArgumentError(
|
||||||
|
"input file must be one of " + allowedTypes.join(", ")
|
||||||
|
);
|
||||||
|
|
||||||
|
return absolutePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArgs() {
|
||||||
|
program
|
||||||
|
.name(packageJson.name)
|
||||||
|
.version(packageJson.version)
|
||||||
|
.addOption(
|
||||||
|
new Option("-e, --engine [engine]", "ocr engine to use")
|
||||||
|
.choices(["auto", "google", "yandex"])
|
||||||
|
.default("auto")
|
||||||
|
)
|
||||||
|
.option(
|
||||||
|
"-c, --cookie [path]",
|
||||||
|
"google.com cookie header value file path",
|
||||||
|
(val) => fs.readFileSync(resolvePath(val, ["txt"])).toString("utf-8")
|
||||||
|
)
|
||||||
|
.argument("image <path>", "input image file path", (val) =>
|
||||||
|
resolvePath(val, ["jpg", "jpeg", "png"])
|
||||||
|
)
|
||||||
|
.parse();
|
||||||
|
|
||||||
|
const opts = program.opts();
|
||||||
|
|
||||||
|
return {
|
||||||
|
...opts,
|
||||||
|
engines: opts.engine === "auto" ? ["google", "yandex"] : [opts.engine],
|
||||||
|
image: program.processedArgs[0],
|
||||||
|
} as Options;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function processYandex(
|
||||||
|
yandex: yandexClient,
|
||||||
|
image: string
|
||||||
|
): Promise<OCRResult> {
|
||||||
|
const compressed = await sharp(image)
|
||||||
|
.resize({ width: 1000, withoutEnlargement: true })
|
||||||
|
.jpeg({ quality: 95 })
|
||||||
|
.toBuffer();
|
||||||
|
const result = await yandex.scanByBlob(
|
||||||
|
new Blob([compressed], {
|
||||||
|
type:
|
||||||
|
image.endsWith(".jpg") || image.endsWith(".jpeg")
|
||||||
|
? "image/jpeg"
|
||||||
|
: "image/png",
|
||||||
|
})
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
text: result.text,
|
||||||
|
language: result.detected_lang ?? "N/A",
|
||||||
|
engine: "yandex",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function processGoogle(lens: Lens, image: string): Promise<OCRResult> {
|
||||||
|
const result = await lens.scanByFile(image);
|
||||||
|
return {
|
||||||
|
text: result.segments.map((s) => s.text).join("\n"),
|
||||||
|
language: result.language ?? "N/A",
|
||||||
|
engine: "google",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function processImage(opts: Options): Promise<OCRResult> {
|
||||||
|
let lens: Lens | null = null;
|
||||||
|
let yandex: yandexClient | null = null;
|
||||||
|
|
||||||
|
const filename = path.basename(opts.image);
|
||||||
|
|
||||||
|
// lazy init
|
||||||
|
if (opts.engines.includes("google"))
|
||||||
|
lens = new Lens({
|
||||||
|
headers: opts.cookie ? { cookie: opts.cookie } : undefined,
|
||||||
|
});
|
||||||
|
if (opts.engines.includes("yandex")) yandex = new yandexClient();
|
||||||
|
|
||||||
|
for (const engine of opts.engines) {
|
||||||
|
try {
|
||||||
|
log(`processing '${filename}' with ${engine}`);
|
||||||
|
|
||||||
|
switch (engine) {
|
||||||
|
case "google":
|
||||||
|
if (!lens) throw new Error("google engine not initialized");
|
||||||
|
return processGoogle(lens, opts.image);
|
||||||
|
case "yandex":
|
||||||
|
if (!yandex) throw new Error("yandex engine not initialized");
|
||||||
|
return processYandex(yandex, opts.image);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
if (opts.engine !== "auto") throw err;
|
||||||
|
|
||||||
|
log(`failed to process '${filename}' with ${engine}`);
|
||||||
|
log(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`failed to process '${filename}'`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const opts = parseArgs();
|
||||||
|
const result = await processImage(opts);
|
||||||
|
log(
|
||||||
|
`${path.basename(opts.image)} processed with ${
|
||||||
|
result.engine
|
||||||
|
}, outputting to stdout`
|
||||||
|
);
|
||||||
|
console.log(result.text);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(bail);
|
||||||
|
|||||||
28
itame.py
Normal file
28
itame.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
file_dir = Path(__file__).parent
|
||||||
|
itame_vendor_dir = file_dir / "itame"
|
||||||
|
|
||||||
|
if not itame_vendor_dir.exists():
|
||||||
|
print("script must be placed in the parent directory of the vendor itame directory")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
os.chdir(itame_vendor_dir)
|
||||||
|
|
||||||
|
if not os.path.exists(".initialized"):
|
||||||
|
p = subprocess.run(["powershell", "-c", "irm bun.sh/install.ps1|iex"])
|
||||||
|
if p.returncode != 0:
|
||||||
|
print("failed to install bun")
|
||||||
|
sys.exit(1)
|
||||||
|
open(".initialized", "w").close()
|
||||||
|
|
||||||
|
if not os.path.exists("node_modules"):
|
||||||
|
p = subprocess.run(["bun", "i"])
|
||||||
|
if p.returncode != 0:
|
||||||
|
print("failed to install node modules")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
subprocess.run(["bun", "run", "index.ts"] + sys.argv[1:])
|
||||||
24
pack-win.sh
Executable file
24
pack-win.sh
Executable file
@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
mkdir -p dist
|
||||||
|
|
||||||
|
DIR_NAME=$(basename "$PWD")
|
||||||
|
ZIP_FILE="dist/itame-win.zip"
|
||||||
|
TMP_DIR=$(mktemp -d)
|
||||||
|
|
||||||
|
rsync -av \
|
||||||
|
--exclude='.git' \
|
||||||
|
--exclude='.gitignore' \
|
||||||
|
--exclude='pack-win.sh' \
|
||||||
|
--exclude='dist' \
|
||||||
|
--exclude='node_modules' \
|
||||||
|
--exclude='itame.py' \
|
||||||
|
./ "$TMP_DIR/$DIR_NAME/"
|
||||||
|
|
||||||
|
cp itame.py "$TMP_DIR/"
|
||||||
|
|
||||||
|
(cd "$TMP_DIR" && zip -FSr "$OLDPWD/$ZIP_FILE" .)
|
||||||
|
|
||||||
|
rm -rf "$TMP_DIR"
|
||||||
|
|
||||||
|
echo "Packed $DIR_NAME and itame.py into $ZIP_FILE"
|
||||||
@ -12,6 +12,7 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"chrome-lens-ocr": "^4.1.1",
|
"chrome-lens-ocr": "^4.1.1",
|
||||||
|
"commander": "^14.0.1",
|
||||||
"sharp": "^0.34.4",
|
"sharp": "^0.34.4",
|
||||||
"ya-ocr": "^1.1.1"
|
"ya-ocr": "^1.1.1"
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user