From 3044206a5218513b797b1b1ac96d0d3d95abff32 Mon Sep 17 00:00:00 2001 From: artie Date: Mon, 10 Feb 2025 19:00:25 +0100 Subject: [PATCH] add wikipedia --- src/commands/language/wiktionary.ts | 1 - src/commands/ocr/ocr.ts | 2 +- src/commands/utility/wikipedia.ts | 132 ++++++++++++++++++++++++++++ src/scripts/test.ts | 9 +- src/utils/functions.ts | 4 + src/utils/http.ts | 4 +- src/utils/ocr.ts | 7 -- src/utils/wikipedia.ts | 101 +++++++++++++++++++++ src/utils/yandex.ts | 19 ++++ 9 files changed, 262 insertions(+), 17 deletions(-) create mode 100644 src/commands/utility/wikipedia.ts delete mode 100644 src/utils/ocr.ts create mode 100644 src/utils/wikipedia.ts create mode 100644 src/utils/yandex.ts diff --git a/src/commands/language/wiktionary.ts b/src/commands/language/wiktionary.ts index 296d7a7..a65d308 100644 --- a/src/commands/language/wiktionary.ts +++ b/src/commands/language/wiktionary.ts @@ -52,7 +52,6 @@ export default defineCommand({ await interaction.deferReply(); - // autocomplete value vs user value if (term.startsWith(":")) { term = term.slice(1); } else { diff --git a/src/commands/ocr/ocr.ts b/src/commands/ocr/ocr.ts index 4478bd2..4d56621 100644 --- a/src/commands/ocr/ocr.ts +++ b/src/commands/ocr/ocr.ts @@ -8,7 +8,7 @@ import { import { defineCommand } from ".."; import { downloadFile } from "../../utils/http"; import { abort } from "../../utils/error"; -import { yandexOcr } from "../../utils/ocr"; +import { yandexOcr } from "../../utils/yandex"; import sharp from "sharp"; export function buildOcrPayload( diff --git a/src/commands/utility/wikipedia.ts b/src/commands/utility/wikipedia.ts new file mode 100644 index 0000000..13e3ad9 --- /dev/null +++ b/src/commands/utility/wikipedia.ts @@ -0,0 +1,132 @@ +import { SlashCommandBuilder } from "discord.js"; +import { defineCommand } from ".."; +import { abort } from "../../utils/error"; +import { + getApiClient, + getPageUrl, + getRandomWikipediaPage, + getWikipediaEditions, + getWikipediaPage, + searchWikipedia, +} from "../../utils/wikipedia"; +import { trim } from "../../utils/functions"; + +export default defineCommand({ + data: new SlashCommandBuilder() + .setName("wikipedia") + .setDescription("Looks up a thing on Wikipedia") + .addStringOption((option) => + option + .setName("query") + .setDescription("The thing to look up") + .setAutocomplete(true) + ) + .addStringOption((option) => + option + .setName("language") + .setDescription("The Wikipedia language edition to use") + .setAutocomplete(true) + ), + + async autocomplete(interaction) { + const option = interaction.options.getFocused(true); + const language = interaction.options.getString("language") ?? "en"; + + if (option.name === "language") { + const value = option.value.toLowerCase(); + const editions = await getWikipediaEditions(); + const choices = editions + .filter( + (edition) => + edition.subdomain.toLowerCase() === value || + edition.language.toLowerCase().includes(value) + ) + .map((edition) => ({ + name: `${edition.language} (${edition.subdomain})`, + value: `:${edition.subdomain}`, + })) + .slice(0, 25); + await interaction.respond(choices); + } else { + if (option.value.length < 3) { + await interaction.respond([]); + return; + } + const suggestions = await searchWikipedia( + getApiClient(language.replace(/^:/, "")), + option.value + ); + const choices = suggestions + .map((suggestion) => ({ + name: suggestion, + value: `:${suggestion}`, + })) + .slice(0, 25); + await interaction.respond(choices); + } + }, + + async execute(interaction) { + let query = interaction.options.getString("query"); + let language = interaction.options.getString("language") ?? ":en"; + + await interaction.deferReply(); + + if (language.startsWith(":")) { + language = language.slice(1); + } else { + const editions = await getWikipediaEditions(); + const edition = + editions.find((endpoint) => endpoint.subdomain === language) || + editions.find( + (endpoint) => + endpoint.language.toLowerCase() === language.toLowerCase() + ); + if (!edition) { + abort("No such Wikipedia language edition"); + } + language = edition.subdomain; + } + + const client = getApiClient(language); + + if (query) { + if (query.startsWith(":")) { + query = query.slice(1); + } else { + const suggestions = await searchWikipedia(client, query); + query = suggestions[0]; + } + } else { + query = await getRandomWikipediaPage(client); + } + + if (!query) { + abort("No results found"); + } + + const page = await getWikipediaPage(client, query); + if (!page || !page.extract) { + abort("No results found"); + } + + await interaction.editReply({ + embeds: [ + { + title: page.title, + description: trim(page.extract, 4096), + url: getPageUrl(language, page.title), + color: 0xfefefe, + image: { + url: page.original?.source, + }, + author: { + name: "Wikipedia", + icon_url: + "https://en.wikipedia.org/static/apple-touch/wikipedia.png", + }, + }, + ], + }); + }, +}); diff --git a/src/scripts/test.ts b/src/scripts/test.ts index 58118c3..f39b39f 100644 --- a/src/scripts/test.ts +++ b/src/scripts/test.ts @@ -1,8 +1,3 @@ -import { ArtemisClient } from "../client"; -import { env } from "../env"; +import { getWikipediaEditions } from "../utils/wikipedia"; -const client = new ArtemisClient(); -await client.api.applicationCommands.bulkOverwriteGlobalCommands( - env.APPLICATION_ID, - [] -); +console.log(await getWikipediaEditions()); diff --git a/src/utils/functions.ts b/src/utils/functions.ts index ea2fda2..1fe2699 100644 --- a/src/utils/functions.ts +++ b/src/utils/functions.ts @@ -59,3 +59,7 @@ export function lazy(cb: () => T) { return () => (defaultValue ??= cb()); } + +export function trim(str: string, maxLength: number) { + return str.length > maxLength ? str.slice(0, maxLength) + "…" : str; +} diff --git a/src/utils/http.ts b/src/utils/http.ts index a93aeef..deb5cf6 100644 --- a/src/utils/http.ts +++ b/src/utils/http.ts @@ -1,7 +1,9 @@ import { fileTypeFromBuffer } from "file-type"; +import ky from "ky"; +import { FAKE_USER_AGENT } from "./constants"; export async function downloadFile(url: string) { - const res = await fetch(url); + const res = await ky.get(url, { headers: { "User-Agent": FAKE_USER_AGENT } }); const data = Buffer.from(await res.arrayBuffer()); const type = await fileTypeFromBuffer(data); return { data, type }; diff --git a/src/utils/ocr.ts b/src/utils/ocr.ts deleted file mode 100644 index b3d67d4..0000000 --- a/src/utils/ocr.ts +++ /dev/null @@ -1,7 +0,0 @@ -import yandexClient from "ya-ocr"; - -const yandex = new yandexClient(); - -export async function yandexOcr(image: Buffer, mime: string) { - return yandex.scanByBlob(new Blob([image], { type: mime }) as Blob); -} diff --git a/src/utils/wikipedia.ts b/src/utils/wikipedia.ts new file mode 100644 index 0000000..01f9a76 --- /dev/null +++ b/src/utils/wikipedia.ts @@ -0,0 +1,101 @@ +import ky, { type KyInstance } from "ky"; +import { lazy } from "./functions"; +import * as cheerio from "cheerio"; + +type SearchResponse = [string, string[], string[], string[]]; +type RandomResponse = { + query: { + random: { + title: string; + }[]; + }; +}; + +type Page = { + query: { + pages: { + [id: string]: { + title: string; + extract: string; + original: { + source: string; + }; + }; + }; + }; +}; + +export function getApiClient(subdomain: string) { + return ky.create({ + prefixUrl: `https://${subdomain}.wikipedia.org/w/api.php`, + }); +} + +export function getPageUrl(subdomain: string, title: string) { + return `https://${subdomain}.wikipedia.org/wiki/${encodeURIComponent(title)}`; +} + +export const getWikipediaEditions = lazy(async () => { + const res = await ky.get("https://en.wikipedia.org/wiki/List_of_Wikipedias"); + const data = await res.text(); + const $ = cheerio.load(data); + + const editions = $( + "table:has(> caption:contains('Wikipedia editions')) > tbody > tr" + ); + + return editions + .map((_, el) => ({ + language: $(el).find("> td:nth-child(3)").text(), + subdomain: $(el).find("> td:nth-child(5)").text(), + })) + .toArray() + .filter((edition) => edition.language && edition.subdomain); +}); + +export async function searchWikipedia(client: KyInstance, query: string) { + const res = await client.get("", { + searchParams: { + action: "opensearch", + search: query, + format: "json", + redirects: "resolve", + }, + }); + const data = await res.json(); + return data[1]; +} + +export async function getRandomWikipediaPage(client: KyInstance) { + const res = await client.get("", { + searchParams: { + action: "query", + list: "random", + rnnamespace: 0, + redirects: 1, + format: "json", + }, + }); + const data = await res.json(); + return data.query.random[0].title; +} + +export async function getWikipediaPage(client: KyInstance, title: string) { + const res = await client.get("", { + searchParams: { + action: "query", + titles: title, + prop: "extracts|pageimages", + format: "json", + exintro: "", + explaintext: "", + exsentences: "5", + piprop: "original", + redirects: 1, + }, + }); + const data = await res.json(); + const pages = Object.keys(data.query.pages); + if (!pages.length) return null; + return data.query.pages[pages[0]]; +} diff --git a/src/utils/yandex.ts b/src/utils/yandex.ts new file mode 100644 index 0000000..9e5df44 --- /dev/null +++ b/src/utils/yandex.ts @@ -0,0 +1,19 @@ +import yandexClient from "ya-ocr"; +import type { ClientType } from "ya-ocr/types"; + +const yandex = new yandexClient(); + +export async function yandexOcr( + image: Buffer, + mime: string +): Promise; +export async function yandexOcr(url: string): Promise; +export async function yandexOcr( + resource: string | Buffer, + mime?: string +): Promise { + if (typeof resource === "string") { + return yandex.scanByUrl(resource); + } + return yandex.scanByBlob(new Blob([resource], { type: mime }) as Blob); +}