add wikipedia

This commit is contained in:
artie 2025-02-10 19:00:25 +01:00
parent 9290819374
commit 3044206a52
9 changed files with 262 additions and 17 deletions

View File

@ -52,7 +52,6 @@ export default defineCommand({
await interaction.deferReply(); await interaction.deferReply();
// autocomplete value vs user value
if (term.startsWith(":")) { if (term.startsWith(":")) {
term = term.slice(1); term = term.slice(1);
} else { } else {

View File

@ -8,7 +8,7 @@ import {
import { defineCommand } from ".."; import { defineCommand } from "..";
import { downloadFile } from "../../utils/http"; import { downloadFile } from "../../utils/http";
import { abort } from "../../utils/error"; import { abort } from "../../utils/error";
import { yandexOcr } from "../../utils/ocr"; import { yandexOcr } from "../../utils/yandex";
import sharp from "sharp"; import sharp from "sharp";
export function buildOcrPayload( export function buildOcrPayload(

View File

@ -0,0 +1,132 @@
import { SlashCommandBuilder } from "discord.js";
import { defineCommand } from "..";
import { abort } from "../../utils/error";
import {
getApiClient,
getPageUrl,
getRandomWikipediaPage,
getWikipediaEditions,
getWikipediaPage,
searchWikipedia,
} from "../../utils/wikipedia";
import { trim } from "../../utils/functions";
export default defineCommand({
data: new SlashCommandBuilder()
.setName("wikipedia")
.setDescription("Looks up a thing on Wikipedia")
.addStringOption((option) =>
option
.setName("query")
.setDescription("The thing to look up")
.setAutocomplete(true)
)
.addStringOption((option) =>
option
.setName("language")
.setDescription("The Wikipedia language edition to use")
.setAutocomplete(true)
),
async autocomplete(interaction) {
const option = interaction.options.getFocused(true);
const language = interaction.options.getString("language") ?? "en";
if (option.name === "language") {
const value = option.value.toLowerCase();
const editions = await getWikipediaEditions();
const choices = editions
.filter(
(edition) =>
edition.subdomain.toLowerCase() === value ||
edition.language.toLowerCase().includes(value)
)
.map((edition) => ({
name: `${edition.language} (${edition.subdomain})`,
value: `:${edition.subdomain}`,
}))
.slice(0, 25);
await interaction.respond(choices);
} else {
if (option.value.length < 3) {
await interaction.respond([]);
return;
}
const suggestions = await searchWikipedia(
getApiClient(language.replace(/^:/, "")),
option.value
);
const choices = suggestions
.map((suggestion) => ({
name: suggestion,
value: `:${suggestion}`,
}))
.slice(0, 25);
await interaction.respond(choices);
}
},
async execute(interaction) {
let query = interaction.options.getString("query");
let language = interaction.options.getString("language") ?? ":en";
await interaction.deferReply();
if (language.startsWith(":")) {
language = language.slice(1);
} else {
const editions = await getWikipediaEditions();
const edition =
editions.find((endpoint) => endpoint.subdomain === language) ||
editions.find(
(endpoint) =>
endpoint.language.toLowerCase() === language.toLowerCase()
);
if (!edition) {
abort("No such Wikipedia language edition");
}
language = edition.subdomain;
}
const client = getApiClient(language);
if (query) {
if (query.startsWith(":")) {
query = query.slice(1);
} else {
const suggestions = await searchWikipedia(client, query);
query = suggestions[0];
}
} else {
query = await getRandomWikipediaPage(client);
}
if (!query) {
abort("No results found");
}
const page = await getWikipediaPage(client, query);
if (!page || !page.extract) {
abort("No results found");
}
await interaction.editReply({
embeds: [
{
title: page.title,
description: trim(page.extract, 4096),
url: getPageUrl(language, page.title),
color: 0xfefefe,
image: {
url: page.original?.source,
},
author: {
name: "Wikipedia",
icon_url:
"https://en.wikipedia.org/static/apple-touch/wikipedia.png",
},
},
],
});
},
});

View File

@ -1,8 +1,3 @@
import { ArtemisClient } from "../client"; import { getWikipediaEditions } from "../utils/wikipedia";
import { env } from "../env";
const client = new ArtemisClient(); console.log(await getWikipediaEditions());
await client.api.applicationCommands.bulkOverwriteGlobalCommands(
env.APPLICATION_ID,
[]
);

View File

@ -59,3 +59,7 @@ export function lazy<T>(cb: () => T) {
return () => (defaultValue ??= cb()); return () => (defaultValue ??= cb());
} }
export function trim(str: string, maxLength: number) {
return str.length > maxLength ? str.slice(0, maxLength) + "…" : str;
}

View File

@ -1,7 +1,9 @@
import { fileTypeFromBuffer } from "file-type"; import { fileTypeFromBuffer } from "file-type";
import ky from "ky";
import { FAKE_USER_AGENT } from "./constants";
export async function downloadFile(url: string) { export async function downloadFile(url: string) {
const res = await fetch(url); const res = await ky.get(url, { headers: { "User-Agent": FAKE_USER_AGENT } });
const data = Buffer.from(await res.arrayBuffer()); const data = Buffer.from(await res.arrayBuffer());
const type = await fileTypeFromBuffer(data); const type = await fileTypeFromBuffer(data);
return { data, type }; return { data, type };

View File

@ -1,7 +0,0 @@
import yandexClient from "ya-ocr";
const yandex = new yandexClient();
export async function yandexOcr(image: Buffer, mime: string) {
return yandex.scanByBlob(new Blob([image], { type: mime }) as Blob);
}

101
src/utils/wikipedia.ts Normal file
View File

@ -0,0 +1,101 @@
import ky, { type KyInstance } from "ky";
import { lazy } from "./functions";
import * as cheerio from "cheerio";
type SearchResponse = [string, string[], string[], string[]];
type RandomResponse = {
query: {
random: {
title: string;
}[];
};
};
type Page = {
query: {
pages: {
[id: string]: {
title: string;
extract: string;
original: {
source: string;
};
};
};
};
};
export function getApiClient(subdomain: string) {
return ky.create({
prefixUrl: `https://${subdomain}.wikipedia.org/w/api.php`,
});
}
export function getPageUrl(subdomain: string, title: string) {
return `https://${subdomain}.wikipedia.org/wiki/${encodeURIComponent(title)}`;
}
export const getWikipediaEditions = lazy(async () => {
const res = await ky.get("https://en.wikipedia.org/wiki/List_of_Wikipedias");
const data = await res.text();
const $ = cheerio.load(data);
const editions = $(
"table:has(> caption:contains('Wikipedia editions')) > tbody > tr"
);
return editions
.map((_, el) => ({
language: $(el).find("> td:nth-child(3)").text(),
subdomain: $(el).find("> td:nth-child(5)").text(),
}))
.toArray()
.filter((edition) => edition.language && edition.subdomain);
});
export async function searchWikipedia(client: KyInstance, query: string) {
const res = await client.get("", {
searchParams: {
action: "opensearch",
search: query,
format: "json",
redirects: "resolve",
},
});
const data = await res.json<SearchResponse>();
return data[1];
}
export async function getRandomWikipediaPage(client: KyInstance) {
const res = await client.get("", {
searchParams: {
action: "query",
list: "random",
rnnamespace: 0,
redirects: 1,
format: "json",
},
});
const data = await res.json<RandomResponse>();
return data.query.random[0].title;
}
export async function getWikipediaPage(client: KyInstance, title: string) {
const res = await client.get("", {
searchParams: {
action: "query",
titles: title,
prop: "extracts|pageimages",
format: "json",
exintro: "",
explaintext: "",
exsentences: "5",
piprop: "original",
redirects: 1,
},
});
const data = await res.json<Page>();
const pages = Object.keys(data.query.pages);
if (!pages.length) return null;
return data.query.pages[pages[0]];
}

19
src/utils/yandex.ts Normal file
View File

@ -0,0 +1,19 @@
import yandexClient from "ya-ocr";
import type { ClientType } from "ya-ocr/types";
const yandex = new yandexClient();
export async function yandexOcr(
image: Buffer,
mime: string
): Promise<ClientType.OCRFullData>;
export async function yandexOcr(url: string): Promise<ClientType.OCRFullData>;
export async function yandexOcr(
resource: string | Buffer,
mime?: string
): Promise<ClientType.OCRFullData> {
if (typeof resource === "string") {
return yandex.scanByUrl(resource);
}
return yandex.scanByBlob(new Blob([resource], { type: mime }) as Blob);
}