artemis/cogs/media.py

from __future__ import annotations, unicode_literals

import asyncio
import html
import re
import shlex
import struct
import zipfile
from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING, Optional
from urllib.parse import quote_plus

import discord
import humanize
import pendulum
import yt_dlp
from bs4 import BeautifulSoup
from discord.ext import commands
from PIL import Image
from pycaption import SRTWriter, WebVTTReader
from yt_dlp.utils import parse_duration

import utils
from utils.common import ArtemisError
from utils.constants import MAX_DISCORD_SIZE, MAX_LITTERBOX_SIZE
from utils.catbox import CatboxError
from utils.flags import DLFlags
from utils.iso_639 import get_language_name
from utils.views import DropdownView

if TYPE_CHECKING:
    from bot import Artemis

TEMP_DIR = Path("data/temp/")
yt_dlp.utils.bug_reports_message = lambda: ""

DEFAULT_OPTS = {
    "quiet": True,
    "noprogress": True,
    "no_warnings": True,
    "socket_timeout": 5,
    "noplaylist": True,
    "playlistend": 1,
    "nopart": True,
}


def format_ytdlp_error(error: str) -> str:
    ret = utils.silence_url_embeds(error)
    ret = (
        ret.removeprefix("[generic] ")
        .removeprefix("None: ")
        .split("Set --default-search")[0]
        .split("(caused by")[0]
        .split("You might want to use a VPN")[0]
    )
    return ret


async def run_ytdlp(query: str, opts: dict, download: bool = True) -> dict:
    try:
        with yt_dlp.YoutubeDL(opts) as ytdl:
            return await asyncio.to_thread(ytdl.extract_info, query, download=download)
    except yt_dlp.utils.YoutubeDLError as error:
        raise ArtemisError(format_ytdlp_error(error))


class Media(commands.Cog):
    def __init__(self, bot: Artemis):
        self.bot: Artemis = bot

    @commands.command(aliases=["nf"])
    @commands.cooldown(1, 2, commands.BucketType.user)
    async def netflix(self, ctx: commands.Context, *, query: str):
        """Check if and where a show is available on Netflix."""

        await ctx.typing()
        data = await self.bot.unogs.search(query)
        if "total" not in data:
            return await ctx.reply("The API returned no data, weird!")
        elif data["total"] == 0:
            return await ctx.reply("No results found.")
        elif data["total"] == 1:
            data = data["results"][0]
        else:
            view = DropdownView(
                ctx,
                data["results"],
                lambda x: html.unescape(x["title"]),
                placeholder="Choose title...",
            )
            data = await view.prompt()
            if not data:
                return

        title = html.unescape(data["title"])
        synopsis = html.unescape(data["synopsis"])
        nfid = data["nfid"]
        nfurl = f"https://www.netflix.com/title/{data['nfid']}"
        img = data.get("poster") or data.get("img")

        countries = await self.bot.unogs.fetch_details(nfid, "countries")
        flags = " ".join([f":flag_{country['cc'].strip().lower()}:" for country in countries])

        audio = []
        subtitles = []
        for country in countries:
            audio += country["audio"].split(",")
            subtitles += country["subtitle"].split(",")
        audio, subtitles = sorted(set(audio)), sorted(set(subtitles))
        audio, subtitles = [a for a in audio if a], [s for s in subtitles if s]

        embed = discord.Embed(title=title, description=synopsis, url=nfurl, color=0xE50914)
        if img and "http" in img:
            embed.set_image(url=img)
        embed.set_author(
            name="Netflix",
            icon_url="https://assets.nflxext.com/us/ffe/siteui/common/icons/nficon2016.png",
        )
        embed.add_field(name="Availability", value=flags)
        embed.add_field(name="Audio", value=", ".join(audio), inline=False)
        embed.add_field(name="Subtitles", value=", ".join(subtitles), inline=False)
        await ctx.reply(embed=embed)

    @commands.command(aliases=["thumb"])
    async def thumbnail(self, ctx: commands.Context, url: str):
        """Gives you a video thumbnail URL for a video from any site supported by YTDL."""
        url = url.strip("<>")
        utils.check_for_ssrf(url)

        await ctx.typing()

        youtube = re.search(
            r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([\w-]+)", url
        )
        if youtube:
            thumbnail = f"https://i.ytimg.com/vi/{youtube.group(1)}/maxresdefault.jpg"
        else:
            info_dict = await run_ytdlp(url, DEFAULT_OPTS, download=False)

            thumbnail = info_dict.get("thumbnail")
            if not thumbnail:
                return await ctx.reply("No thumbnail available.")

        await ctx.reply(thumbnail)

    @commands.command(aliases=["audio"])
    @commands.max_concurrency(1)
    async def dlaudio(self, ctx: commands.Context, url: str, fmt: Optional[str]):
        """
        Downloads audio from a YouTube video in original format or mp3.
        To convert the audio to mp3, pass 'mp3' after the URL.
        """
        url = url.strip("<>")
        utils.check_for_ssrf(url)
        ytdl_opts = {
            **DEFAULT_OPTS,
            "format": "251/140/ba",
            "outtmpl": TEMP_DIR.joinpath("%(id)s.%(ext)s").as_posix(),
            "match_filter": yt_dlp.match_filter_func("duration < 1500"),
        }

        if fmt == "mp3":
            ytdl_opts["postprocessors"] = [
                {"key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": "128"}
            ]

        async with ctx.typing():
            info_dict = await run_ytdlp(url, ytdl_opts)

            title = utils.romajify(info_dict.get("title"))
            vid_id = info_dict.get("id")
            ext = info_dict.get("ext") if fmt != "mp3" else "mp3"
            filename = f"{vid_id}.{ext}"
            pretty_filename = f"{title}.{ext}" if ext != "webm" else f"{title}.ogg"
            path = TEMP_DIR / filename

            if not path.exists():
                return await ctx.reply("ERROR: The file is too big for me to upload!")

            await ctx.reply(file=discord.File(path, pretty_filename))
            path.unlink()

    @commands.command(usage="<url> <lang>", aliases=["subs", "subtitles"])
    async def dlsubs(self, ctx: commands.Context, url: str, lang: Optional[str]):
        """
        Downloads a subtitle file from any site supported by YTDL.
        Makes you choose the language if more than one detected and no `<lang>` given.
        `<lang>` is optional if the video only has one subtitle file.
        Pass `all` to `<lang>` to get all of the subtitles.
        """
        url = url.strip("<>")
        utils.check_for_ssrf(url)
        ytdl_opts = {
            **DEFAULT_OPTS,
            "writesubtitles": True,
            "subtitleslangs": ["all"],
        }

        async def process_one(data: dict) -> discord.File:
            url = data.get("url")
            ext = data["ext"]
            if data.get("data") is not None:
                sub_data = data["data"]
            else:
                async with self.bot.session.get(url) as r:
                    sub_data = await r.text()
            if ext == "vtt":
                try:
                    sub_data = str(SRTWriter().write(WebVTTReader().read(sub_data)))
                    ext = "srt"
                except Exception:
                    pass
            filename = f"{yt_dlp.utils.sanitize_filename(title)}-{data['lang']}.{ext}"
            return discord.File(BytesIO(sub_data.encode("utf-8")), filename)

        async def process(data: list[dict], lang: str = None) -> discord.File:
            if lang:
                found = discord.utils.find(lambda x: x["lang"] == lang)
                if not data:
                    raise ArtemisError("No subtitles available for that language.")
                return await process_one(found)
            elif len(data) == 1:
                return await process_one(data[0])

            zip_buffer = BytesIO()

            coros = [process_one(entry) for entry in data]
            files: list[discord.File] = await asyncio.gather(*coros)

            with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zip_file:
                for file in files:
                    zip_file.writestr(file.filename, file.fp.read())
            zip_buffer.seek(0)

            filename = f"{title}-subs.zip"
            return discord.File(zip_buffer, filename)

        async with ctx.typing():
            info_dict = await run_ytdlp(url, ytdl_opts, download=False)

        title = utils.romajify(info_dict.get("title")).replace(" ", "_")
        subtitles: dict = info_dict.get("requested_subtitles")

        if not subtitles:
            return await ctx.reply("No subtitles available.")

        file = None
        subtitles = [{"lang": k, **v} for k, v in subtitles.items()]

        if lang:
            if lang == "all":
                file = await process(subtitles)
            else:
                try:
                    file = await process(subtitles, lang)
                except KeyError:
                    return await ctx.reply("No subtitles available for that language.")
        elif len(subtitles) == 1:
            file = await process(subtitles)
        elif len(subtitles) > 1:
            view = DropdownView(
                ctx,
                subtitles,
                lambda item: item["lang"],
                lambda item: item.get("name") or get_language_name(item["lang"].lower()) or None,
                "Choose one or more...",
                25,
                True,
            )
            view.message = await ctx.reply("Which language(s)?", view=view)
            if await view.wait():
                return await view.message.edit(content="You took too long!", view=None)

            result = view.result
            async with ctx.typing():
                file = await process(result)

        await ctx.reply(file=file)

    @commands.command()
    @commands.cooldown(1, 2, commands.BucketType.default)
    async def mediainfo(self, ctx: commands.Context, url: str, format: Optional[str]):
        """Returns MediaInfo output for a media file."""

        url = url.strip("<>")
        utils.check_for_ssrf(url)
        if not format:
            format = (
                "bv/best"
                if any([domain in url for domain in ("youtube", "youtu.be")])
                else "b/mp4/b*"
            )
        ytdl_opts = {**DEFAULT_OPTS, "format": format}

        async with ctx.typing():
            info_dict = await run_ytdlp(url, ytdl_opts, download=False)

            title = info_dict.get("title")
            url = info_dict["url"]
            result = await utils.run_cmd(f'mediainfo "{url}"')

            if not result.ok:
                return await ctx.reply(result.decoded)

            lines = result.decoded.split("\n")
            lines.pop(1)
            output = "\n".join(lines)

            data = BytesIO(output.encode())
            fp = discord.File(data, f"{utils.romajify(title)}.txt")
        await ctx.reply(f"Media information for `{title}`", file=fp)

    @commands.command(aliases=["screenshot", "ss"])
    @commands.cooldown(1, 2, commands.BucketType.default)
    async def screencap(self, ctx: commands.Context, url: str, timestamp: Optional[str] = "1"):
        """
        Takes a video screencap at a specified timestamp.

        Valid timestamp formats:
        - `SS` or `SS.ms`
        - `HH:MM:SS` or `HH:MM:SS.ms`
        """
        TIMESTAMP_RE = r"\d{2}:\d{2}:\d{2}(?:\.\d{1,3})?"
        SECONDS_RE = r"\d{1,5}(?:\.\d{1,3})?"
        url = url.strip("<>")
        utils.check_for_ssrf(url)
        ytdl_opts = {**DEFAULT_OPTS, "format": "bv*/b"}

        @utils.in_executor
        def to_jpeg(image):
            im = Image.open(image)
            buff = BytesIO()
            im.save(buff, "JPEG", quality=90)
            buff.seek(0)
            return buff

        if not (re.fullmatch(TIMESTAMP_RE, timestamp) or re.fullmatch(SECONDS_RE, timestamp)):
            return await ctx.reply("Invalid timestamp format, check out `$help screencap`.")

        async with ctx.typing():
            info_dict = await run_ytdlp(url, ytdl_opts, download=False)

            title = info_dict["title"]
            url = info_dict["url"]

            if info_dict.get("is_live"):
                args = f'ffmpeg -hide_banner -loglevel warning -i "{url}" -vframes 1 -c:v png -f image2 -'
            else:
                args = f'ffmpeg -hide_banner -loglevel warning -ss {timestamp} -i "{url}" -vframes 1 -c:v png -f image2 -'

            result = await utils.run_cmd(args)
            stdout, stderr = result.stdout, result.stderr
            if not result.ok:
                return await ctx.reply(stderr.decode().split("pipe:")[0])

            w, h = struct.unpack(">II", stdout[16:20] + stdout[20:24])
            msg = f"Resolution: {w}x{h}"
            buff = BytesIO(stdout)

            if len(stdout) > MAX_DISCORD_SIZE:
                buff = await to_jpeg(buff)
                msg += "\nThe image was too big for me to upload so I converted it to JPEG Q90."
            dfile = discord.File(buff, f"{title}.png")
        return await ctx.reply(content=msg, file=dfile)

    @commands.command(usage="[format:] [trim:] <url>", aliases=["dl"])
    @commands.max_concurrency(1)
    @commands.cooldown(1, 30, commands.BucketType.user)
    async def download(self, ctx: commands.Context, *, flags: DLFlags):
        """
        Downloads videos from websites supported by youtube-dl.

        The download fails if the video is more than 1 hour long or its filesize exceeds 1 GB.
        Only one command can run at once and every user has a 30 second cooldown.

        Optional flags:
        `format` or `f` - youtube-dl format choice (only when trim flag is not present)
        `trim` or `t` - Trim selection of the form `start-end`.

        Valid trim selection formats:
        - `SS-SS` or `SS.ms-SS.ms`
        - `MM:SS-MM:SS` or `MM:SS.ms-MM:SS.ms`
        - `HH:MM:SS-HH:MM:SS` or `HH:MM:SS.ms-HH:MM:SS.ms`

        Examples:
        `{prefix}download https://youtu.be/dQw4w9WgXcQ`
        `{prefix}download f:22 https://youtu.be/o6wtDPVkKqI`
        `{prefix}dl trim:41-58 https://youtu.be/uKxyLmbOc0Q`
        `{prefix}dl t:01:15-01:27 https://youtu.be/qUk1ZoCGqsA`
        `{prefix}dl t:120-160 https://www.reddit.com/r/anime/comments/f86otf/`
        """
        path: Path = None
        msg: discord.Message = None
        finished = False
        state = "downloading"
        template = TEMP_DIR.joinpath("%(id)s.%(ext)s").as_posix()

        url = flags.url
        format = flags.format
        trim = flags.trim
        ss, to = flags.ss, None

        async def monitor_download():
            nonlocal msg, state
            path = Path("./data/temp/")
            while not finished:
                content = "Processing..."
                if state == "downloading":
                    match = None
                    files = list(path.iterdir())
                    if files:
                        match = max(files, key=lambda f: f.stat().st_size)
                    if match:
                        size = match.stat().st_size
                        size = humanize.naturalsize(size, binary=True)
                        content = f":arrow_down: `Downloading...` {size}"
                    else:
                        content = ":arrow_down: `Downloading...`"
                elif state == "uploading":
                    content = ":arrow_up: `Uploading...`"

                if not msg:
                    msg = await ctx.reply(content)
                else:
                    msg = await msg.edit(content=content)
                await asyncio.sleep(1)
            if msg:
                await msg.delete()

        try:
            url = url.strip("<>")
            utils.check_for_ssrf(url)
            if not url:
                raise ArtemisError("No URL provided.")

            def match_filter(info_dict, incomplete):
                nonlocal url
                if "#_sudo" in url and ctx.author.id == self.bot.owner_id:
                    return None
                duration = info_dict.get("duration")
                filesize = info_dict.get("filesize") or info_dict.get("filesize_approx")
                is_live = info_dict.get("is_live")
                if is_live:
                    raise ArtemisError("Streams are not supported.")
                elif trim:
                    return None
                elif not duration and not filesize:
                    raise ArtemisError("Failed to extract duration and filesize.")
                elif filesize and (filesize < 1 or filesize > MAX_LITTERBOX_SIZE):
                    raise ArtemisError("The video is too big (> 1 GB).")
                elif duration and (duration < 0 or duration > 3600):
                    raise ArtemisError("The video is too long (> 1 hour).")
                else:
                    return None

            ytdl_opts = {**DEFAULT_OPTS, "outtmpl": template, "match_filter": match_filter}

            if "youtube.com" in url or "youtu.be" in url:
                ytdl_opts["format"] = "248+251/247+251/137+140/136+140/bv*+ba/b"
            else:
                ytdl_opts["format_sort"] = ["ext", "+vcodec:avc"]

            if trim:
                dur = tuple(map(parse_duration, trim.strip().split("-")))
                if len(dur) == 2 and all(t is not None for t in dur):
                    ss, to = dur
                else:
                    raise ArtemisError("Invalid trim selection. Must be of the form `start-end`.")

                args = {
                    "ffmpeg": shlex.split("-hide_banner -loglevel error"),
                    "ffmpeg_i": shlex.split(f"-ss {ss} -to {to}"),
                }
                ytdl_opts["format"] = f"({ytdl_opts['format']})[protocol!*=dash][protocol!*=m3u8]"
                ytdl_opts["external_downloader"] = {"default": "ffmpeg"}
                ytdl_opts["external_downloader_args"] = args

                diff = to - ss
                if diff > 3600:
                    raise ArtemisError("The trim selection is too long (> 1 hour).")
                elif diff < 1:
                    raise ArtemisError("The trim selection cannot be negative or zero.")
            if format:
                if trim:
                    raise ArtemisError("Format choice is not supported with a trim selection.")
                ytdl_opts["format"] = format

            info_dict = None
            asyncio.create_task(monitor_download())
            async with ctx.typing():
                info_dict = await run_ytdlp(url, ytdl_opts)
            state = "uploading"

            title = utils.romajify(info_dict.get("title"))
            vid_id = info_dict.get("id")
            ext = info_dict.get("ext")
            filename = f"{vid_id}.{ext}"
            if trim:
                discord_filename = f"{title}_{round(ss)}-{round(to)}.{ext}"
            else:
                discord_filename = f"{title}.{ext}"

            path = TEMP_DIR / filename
            if not path.exists():
                raise ArtemisError(f"Internal Error: File {path} does not exist.")
            size = path.stat().st_size

            async with ctx.typing():
                if size <= utils.MAX_DISCORD_SIZE:
                    await ctx.reply(file=discord.File(path, discord_filename))
                elif size <= MAX_LITTERBOX_SIZE:
                    try:
                        res = await self.bot.litterbox.upload(path.as_posix(), 24)
                        expiration = discord.utils.format_dt(pendulum.now("UTC").add(hours=24))
                        await ctx.reply(f"This file will expire on {expiration}\n{res}")
                    except CatboxError as err:
                        await ctx.reply(err)
                else:
                    raise ArtemisError(
                        "The file passed the initial filesize guesstimation but is still too big to upload (> 1 GB)."
                    )
        except ArtemisError as err:
            ctx.command.reset_cooldown(ctx)
            if "requested format not available" in str(err) and ss and to:
                raise ArtemisError("Segmented streams are not supported with a trim selection.")
            raise err
        except Exception as err:
            raise err
        finally:
            finished = True
            if path and path.exists():
                path.unlink()

    @commands.command()
    @commands.cooldown(1, 1, commands.BucketType.default)
    async def dislikes(self, ctx: commands.Context, url: str):
        """Shows some statistics for a YouTube video including dislikes using Return YouTube Dislikes API."""
        YT_RE = r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([\w-]+)"

        if len(url) == 11:
            vid = url
        else:
            m = re.search(YT_RE, url)
            if not m:
                raise ArtemisError("Invalid YouTube URL or ID.")
            vid = m.group(1)

        params = {"videoId": vid}

        async with ctx.typing():
            async with self.bot.session.get(
                "https://returnyoutubedislikeapi.com/votes", params=params
            ) as r:
                if not r.ok:
                    if r.status == 404:
                        raise ArtemisError("Video not found.")
                    elif r.status == 400:
                        raise ArtemisError("Invalid video ID.")
                    else:
                        raise ArtemisError(
                            f"Return YouTube Dislikes API returned {r.status} {r.reason}"
                        )
                data = await r.json()

        views = humanize.intcomma(data["viewCount"])
        likes = humanize.intcomma(data["likes"])
        dislikes = humanize.intcomma(data["dislikes"])

        msg = f"**{views}** views\n**{likes}** likes\n**{dislikes}** dislikes"
        await ctx.reply(msg)

    @commands.command(aliases=["lg"])
    @commands.cooldown(1, 2, commands.BucketType.default)
    async def libgen(self, ctx: commands.Context, *, query: str):
        """
        Search and download content from Library Genesis.
        Current mirror: libgen.is
        """
        LIBGEN_SEARCH_URL = "https://libgen.is/search.php?req={query}&column=def"

        if len(query) < 3:
            return await ctx.reply("The search query most contain at least 3 characters.")

        await ctx.typing()

        query = quote_plus(query)
        headers = {"User-Agent": self.bot.user_agent}

        async with self.bot.session.get(
            LIBGEN_SEARCH_URL.format(query=query), headers=headers
        ) as r:
            html = await r.text()
        soup = BeautifulSoup(html, "lxml")

        for el in soup.select("i"):
            el.decompose()

        table = soup.select(".c > tr")
        if not table:
            return await ctx.reply(
                "edge case hit, debug dump:\n",
                file=discord.File(BytesIO(html.encode("utf-8")), "search.html"),
            )
        elif len(table) == 1:
            return await ctx.reply("No results found.")

        entries = []
        for row in table[1:]:
            cells = row.select("td")
            title = ", ".join([s for s in cells[2].stripped_strings if s])
            year = cells[4].text
            if year:
                title += f" ({year})"
            author = cells[1].text
            mirrors = [cell.a["href"] for cell in cells[9:11]]
            ext = cells[8].text
            entries.append((title, author, mirrors, ext))

        if len(entries) == 1:
            result = entries[0]
        else:
            view = DropdownView(ctx, entries, lambda x: x[0], lambda x: x[1])
            result = await view.prompt("Which entry?")
            if not result:
                return

        async with ctx.typing():
            for mirror in result[2]:
                try:
                    async with self.bot.session.get(mirror, headers=headers) as r:
                        html = await r.text()
                except Exception:
                    continue

                soup = BeautifulSoup(html, "lxml")
                url = soup.find("a", text="GET")["href"]
                if not url:
                    continue

                try:
                    async with self.bot.session.get(url, headers=headers) as r:
                        filesize = r.headers.get("content-length")
                        disposition = r.content_disposition
                        if disposition:
                            filename = disposition.filename
                        else:
                            filename = f"{result[0]}.{result[3]}"

                        content = None
                        if not filesize:
                            content = await r.read()
                            filesize = len(content)

                        if int(filesize) > MAX_DISCORD_SIZE:
                            msg = "The file is too big to upload, so here's the link:"
                            desc = f"[{filename}]({url})"
                            embed = discord.Embed(description=desc, color=0xFEFEFE)
                            return await ctx.reply(msg, embed=embed)

                        if not content:
                            content = await r.read()

                        file = discord.File(BytesIO(content), filename)
                        return await ctx.reply(file=file)
                except Exception:
                    continue

            return await ctx.reply("Kernel panic: Could not contact any of the download mirrors.")


async def setup(bot: Artemis):
    await bot.add_cog(Media(bot))