From fb6187f50254b04caeb9b20f41551f4f5dad84b4 Mon Sep 17 00:00:00 2001 From: artie Date: Thu, 18 Sep 2025 14:51:56 +0200 Subject: [PATCH] logic --- README.md | 7 --- bun.lock | 3 + index.ts | 157 ++++++++++++++++++++++++++++++++++++++++++++++++++- itame.py | 28 +++++++++ pack-win.sh | 24 ++++++++ package.json | 1 + 6 files changed, 212 insertions(+), 8 deletions(-) delete mode 100644 README.md mode change 100644 => 100755 index.ts create mode 100644 itame.py create mode 100755 pack-win.sh diff --git a/README.md b/README.md deleted file mode 100644 index 7447429..0000000 --- a/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# itame - -Usage: - -```bash -itame --help -``` diff --git a/bun.lock b/bun.lock index 1753160..0e22965 100644 --- a/bun.lock +++ b/bun.lock @@ -5,6 +5,7 @@ "name": "itame", "dependencies": { "chrome-lens-ocr": "^4.1.1", + "commander": "^14.0.1", "sharp": "^0.34.4", "ya-ocr": "^1.1.1", }, @@ -97,6 +98,8 @@ "color-string": ["color-string@1.9.1", "", { "dependencies": { "color-name": "^1.0.0", "simple-swizzle": "^0.2.2" } }, "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg=="], + "commander": ["commander@14.0.1", "", {}, "sha512-2JkV3gUZUVrbNA+1sjBOYLsMZ5cEEl8GTFP2a4AVz5hvasAMCQ1D2l2le/cX+pV4N6ZU17zjUahLpIXRrnWL8A=="], + "cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="], "csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="], diff --git a/index.ts b/index.ts old mode 100644 new mode 100755 index f67b2c6..ae4d115 --- a/index.ts +++ b/index.ts @@ -1 +1,156 @@ -console.log("Hello via Bun!"); \ No newline at end of file +import { InvalidArgumentError, Option, program } from "commander"; +import packageJson from "./package.json"; +import path from "path"; +import fs from "fs"; +import Lens from "chrome-lens-ocr"; +import yandexClient from "ya-ocr"; +import sharp from "sharp"; + +type Engine = "google" | "yandex"; + +interface Options { + engine: "auto" | Engine; + engines: Engine[]; + cookie?: string; + image: string; +} + +type OCRResult = { + text: string; + language: string; + engine: Engine; +}; + +const log = console.error; + +function bail(err: any) { + log(err?.stack ? err.stack : err); + process.exit(1); +} + +function resolvePath(val: string, allowedTypes?: string[]) { + val = val.trim(); + + const absolutePath = path.resolve(val); + if (!fs.existsSync(absolutePath)) + throw new InvalidArgumentError("input file does not exist"); + + if ( + allowedTypes && + !allowedTypes?.includes(path.extname(absolutePath).slice(1).toLowerCase()) + ) + throw new InvalidArgumentError( + "input file must be one of " + allowedTypes.join(", ") + ); + + return absolutePath; +} + +function parseArgs() { + program + .name(packageJson.name) + .version(packageJson.version) + .addOption( + new Option("-e, --engine [engine]", "ocr engine to use") + .choices(["auto", "google", "yandex"]) + .default("auto") + ) + .option( + "-c, --cookie [path]", + "google.com cookie header value file path", + (val) => fs.readFileSync(resolvePath(val, ["txt"])).toString("utf-8") + ) + .argument("image ", "input image file path", (val) => + resolvePath(val, ["jpg", "jpeg", "png"]) + ) + .parse(); + + const opts = program.opts(); + + return { + ...opts, + engines: opts.engine === "auto" ? ["google", "yandex"] : [opts.engine], + image: program.processedArgs[0], + } as Options; +} + +async function processYandex( + yandex: yandexClient, + image: string +): Promise { + const compressed = await sharp(image) + .resize({ width: 1000, withoutEnlargement: true }) + .jpeg({ quality: 95 }) + .toBuffer(); + const result = await yandex.scanByBlob( + new Blob([compressed], { + type: + image.endsWith(".jpg") || image.endsWith(".jpeg") + ? "image/jpeg" + : "image/png", + }) + ); + return { + text: result.text, + language: result.detected_lang ?? "N/A", + engine: "yandex", + }; +} + +async function processGoogle(lens: Lens, image: string): Promise { + const result = await lens.scanByFile(image); + return { + text: result.segments.map((s) => s.text).join("\n"), + language: result.language ?? "N/A", + engine: "google", + }; +} + +async function processImage(opts: Options): Promise { + let lens: Lens | null = null; + let yandex: yandexClient | null = null; + + const filename = path.basename(opts.image); + + // lazy init + if (opts.engines.includes("google")) + lens = new Lens({ + headers: opts.cookie ? { cookie: opts.cookie } : undefined, + }); + if (opts.engines.includes("yandex")) yandex = new yandexClient(); + + for (const engine of opts.engines) { + try { + log(`processing '${filename}' with ${engine}`); + + switch (engine) { + case "google": + if (!lens) throw new Error("google engine not initialized"); + return processGoogle(lens, opts.image); + case "yandex": + if (!yandex) throw new Error("yandex engine not initialized"); + return processYandex(yandex, opts.image); + } + } catch (err) { + if (opts.engine !== "auto") throw err; + + log(`failed to process '${filename}' with ${engine}`); + log(err); + } + } + + throw new Error(`failed to process '${filename}'`); +} + +async function main() { + const opts = parseArgs(); + const result = await processImage(opts); + log( + `${path.basename(opts.image)} processed with ${ + result.engine + }, outputting to stdout` + ); + console.log(result.text); +} + +main().catch(bail); diff --git a/itame.py b/itame.py new file mode 100644 index 0000000..fde569e --- /dev/null +++ b/itame.py @@ -0,0 +1,28 @@ +import os +from pathlib import Path +import subprocess +import sys + +file_dir = Path(__file__).parent +itame_vendor_dir = file_dir / "itame" + +if not itame_vendor_dir.exists(): + print("script must be placed in the parent directory of the vendor itame directory") + sys.exit(1) + +os.chdir(itame_vendor_dir) + +if not os.path.exists(".initialized"): + p = subprocess.run(["powershell", "-c", "irm bun.sh/install.ps1|iex"]) + if p.returncode != 0: + print("failed to install bun") + sys.exit(1) + open(".initialized", "w").close() + +if not os.path.exists("node_modules"): + p = subprocess.run(["bun", "i"]) + if p.returncode != 0: + print("failed to install node modules") + sys.exit(1) + +subprocess.run(["bun", "run", "index.ts"] + sys.argv[1:]) diff --git a/pack-win.sh b/pack-win.sh new file mode 100755 index 0000000..ec07e72 --- /dev/null +++ b/pack-win.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +mkdir -p dist + +DIR_NAME=$(basename "$PWD") +ZIP_FILE="dist/itame-win.zip" +TMP_DIR=$(mktemp -d) + +rsync -av \ +--exclude='.git' \ +--exclude='.gitignore' \ +--exclude='pack-win.sh' \ +--exclude='dist' \ +--exclude='node_modules' \ +--exclude='itame.py' \ +./ "$TMP_DIR/$DIR_NAME/" + +cp itame.py "$TMP_DIR/" + +(cd "$TMP_DIR" && zip -FSr "$OLDPWD/$ZIP_FILE" .) + +rm -rf "$TMP_DIR" + +echo "Packed $DIR_NAME and itame.py into $ZIP_FILE" diff --git a/package.json b/package.json index 45fe035..45b432d 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ }, "dependencies": { "chrome-lens-ocr": "^4.1.1", + "commander": "^14.0.1", "sharp": "^0.34.4", "ya-ocr": "^1.1.1" }