This commit is contained in:
artie 2025-09-18 14:51:56 +02:00
parent 0cd70df807
commit fb6187f502
6 changed files with 212 additions and 8 deletions

View File

@ -1,7 +0,0 @@
# itame
Usage:
```bash
itame --help
```

View File

@ -5,6 +5,7 @@
"name": "itame", "name": "itame",
"dependencies": { "dependencies": {
"chrome-lens-ocr": "^4.1.1", "chrome-lens-ocr": "^4.1.1",
"commander": "^14.0.1",
"sharp": "^0.34.4", "sharp": "^0.34.4",
"ya-ocr": "^1.1.1", "ya-ocr": "^1.1.1",
}, },
@ -97,6 +98,8 @@
"color-string": ["color-string@1.9.1", "", { "dependencies": { "color-name": "^1.0.0", "simple-swizzle": "^0.2.2" } }, "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg=="], "color-string": ["color-string@1.9.1", "", { "dependencies": { "color-name": "^1.0.0", "simple-swizzle": "^0.2.2" } }, "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg=="],
"commander": ["commander@14.0.1", "", {}, "sha512-2JkV3gUZUVrbNA+1sjBOYLsMZ5cEEl8GTFP2a4AVz5hvasAMCQ1D2l2le/cX+pV4N6ZU17zjUahLpIXRrnWL8A=="],
"cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="], "cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="],
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="], "csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],

157
index.ts Normal file → Executable file
View File

@ -1 +1,156 @@
console.log("Hello via Bun!"); import { InvalidArgumentError, Option, program } from "commander";
import packageJson from "./package.json";
import path from "path";
import fs from "fs";
import Lens from "chrome-lens-ocr";
import yandexClient from "ya-ocr";
import sharp from "sharp";
type Engine = "google" | "yandex";
interface Options {
engine: "auto" | Engine;
engines: Engine[];
cookie?: string;
image: string;
}
type OCRResult = {
text: string;
language: string;
engine: Engine;
};
const log = console.error;
function bail(err: any) {
log(err?.stack ? err.stack : err);
process.exit(1);
}
function resolvePath(val: string, allowedTypes?: string[]) {
val = val.trim();
const absolutePath = path.resolve(val);
if (!fs.existsSync(absolutePath))
throw new InvalidArgumentError("input file does not exist");
if (
allowedTypes &&
!allowedTypes?.includes(path.extname(absolutePath).slice(1).toLowerCase())
)
throw new InvalidArgumentError(
"input file must be one of " + allowedTypes.join(", ")
);
return absolutePath;
}
function parseArgs() {
program
.name(packageJson.name)
.version(packageJson.version)
.addOption(
new Option("-e, --engine [engine]", "ocr engine to use")
.choices(["auto", "google", "yandex"])
.default("auto")
)
.option(
"-c, --cookie [path]",
"google.com cookie header value file path",
(val) => fs.readFileSync(resolvePath(val, ["txt"])).toString("utf-8")
)
.argument("image <path>", "input image file path", (val) =>
resolvePath(val, ["jpg", "jpeg", "png"])
)
.parse();
const opts = program.opts();
return {
...opts,
engines: opts.engine === "auto" ? ["google", "yandex"] : [opts.engine],
image: program.processedArgs[0],
} as Options;
}
async function processYandex(
yandex: yandexClient,
image: string
): Promise<OCRResult> {
const compressed = await sharp(image)
.resize({ width: 1000, withoutEnlargement: true })
.jpeg({ quality: 95 })
.toBuffer();
const result = await yandex.scanByBlob(
new Blob([compressed], {
type:
image.endsWith(".jpg") || image.endsWith(".jpeg")
? "image/jpeg"
: "image/png",
})
);
return {
text: result.text,
language: result.detected_lang ?? "N/A",
engine: "yandex",
};
}
async function processGoogle(lens: Lens, image: string): Promise<OCRResult> {
const result = await lens.scanByFile(image);
return {
text: result.segments.map((s) => s.text).join("\n"),
language: result.language ?? "N/A",
engine: "google",
};
}
async function processImage(opts: Options): Promise<OCRResult> {
let lens: Lens | null = null;
let yandex: yandexClient | null = null;
const filename = path.basename(opts.image);
// lazy init
if (opts.engines.includes("google"))
lens = new Lens({
headers: opts.cookie ? { cookie: opts.cookie } : undefined,
});
if (opts.engines.includes("yandex")) yandex = new yandexClient();
for (const engine of opts.engines) {
try {
log(`processing '${filename}' with ${engine}`);
switch (engine) {
case "google":
if (!lens) throw new Error("google engine not initialized");
return processGoogle(lens, opts.image);
case "yandex":
if (!yandex) throw new Error("yandex engine not initialized");
return processYandex(yandex, opts.image);
}
} catch (err) {
if (opts.engine !== "auto") throw err;
log(`failed to process '${filename}' with ${engine}`);
log(err);
}
}
throw new Error(`failed to process '${filename}'`);
}
async function main() {
const opts = parseArgs();
const result = await processImage(opts);
log(
`${path.basename(opts.image)} processed with ${
result.engine
}, outputting to stdout`
);
console.log(result.text);
}
main().catch(bail);

28
itame.py Normal file
View File

@ -0,0 +1,28 @@
import os
from pathlib import Path
import subprocess
import sys
file_dir = Path(__file__).parent
itame_vendor_dir = file_dir / "itame"
if not itame_vendor_dir.exists():
print("script must be placed in the parent directory of the vendor itame directory")
sys.exit(1)
os.chdir(itame_vendor_dir)
if not os.path.exists(".initialized"):
p = subprocess.run(["powershell", "-c", "irm bun.sh/install.ps1|iex"])
if p.returncode != 0:
print("failed to install bun")
sys.exit(1)
open(".initialized", "w").close()
if not os.path.exists("node_modules"):
p = subprocess.run(["bun", "i"])
if p.returncode != 0:
print("failed to install node modules")
sys.exit(1)
subprocess.run(["bun", "run", "index.ts"] + sys.argv[1:])

24
pack-win.sh Executable file
View File

@ -0,0 +1,24 @@
#!/bin/bash
mkdir -p dist
DIR_NAME=$(basename "$PWD")
ZIP_FILE="dist/itame-win.zip"
TMP_DIR=$(mktemp -d)
rsync -av \
--exclude='.git' \
--exclude='.gitignore' \
--exclude='pack-win.sh' \
--exclude='dist' \
--exclude='node_modules' \
--exclude='itame.py' \
./ "$TMP_DIR/$DIR_NAME/"
cp itame.py "$TMP_DIR/"
(cd "$TMP_DIR" && zip -FSr "$OLDPWD/$ZIP_FILE" .)
rm -rf "$TMP_DIR"
echo "Packed $DIR_NAME and itame.py into $ZIP_FILE"

View File

@ -12,6 +12,7 @@
}, },
"dependencies": { "dependencies": {
"chrome-lens-ocr": "^4.1.1", "chrome-lens-ocr": "^4.1.1",
"commander": "^14.0.1",
"sharp": "^0.34.4", "sharp": "^0.34.4",
"ya-ocr": "^1.1.1" "ya-ocr": "^1.1.1"
} }