artemis/utils/notifiers.py
2024-03-01 20:51:07 +01:00

143 lines
4.5 KiB
Python

from __future__ import annotations
import asyncio
from dataclasses import dataclass
import logging
from typing import TYPE_CHECKING, TypeVar
from collections import deque
from bs4 import BeautifulSoup
if TYPE_CHECKING:
from bot import Artemis
T = TypeVar("T")
class FeedNotifier:
NAME: str = "Base"
CHECK_INTERVAL: int | float = 60 * 5
FEED_INTERVAL: int | float = 0.1
CACHE_SIZE: int = 100
bot: Artemis
feeds: list[str]
_cache = dict[str, list[str]]
_task = asyncio.Task
def __init__(self, bot: Artemis, feeds: list[str]):
self.bot = bot
self.feeds = feeds
self._log = logging.getLogger(f"{self.NAME}Notifier")
self._cache = {}
for feed in self.feeds:
self._cache[feed] = deque([], maxlen=self.CACHE_SIZE)
def log(self, msg):
self._log.info(msg)
async def _run(self):
try:
await self._init_cache()
await asyncio.sleep(self.CHECK_INTERVAL)
self.log("Starting check loop...")
while True:
self.log("Processing feeds...")
for feed in self.feeds:
entries = await self.fetch_entries(feed)
for entry in entries:
key = self.get_cache_key(entry)
if key in self._cache[feed]:
continue
self.log(f"{feed}: New entry found, handing over to on_new_entry()")
self._cache[feed].append(key)
await self.on_new_entry(entry)
await asyncio.sleep(self.FEED_INTERVAL)
await asyncio.sleep(self.CHECK_INTERVAL)
except Exception as error:
await self.on_error(error)
async def _init_cache(self):
self.log("Bootstrapping cache...")
for feed in self.feeds:
self._cache[feed].extend(
[self.get_cache_key(entry) for entry in await self.fetch_entries(feed)]
)
self.log(f"{feed}: Bootstrapped cache with {len(self._cache[feed])} entries.")
def start(self):
self._task = asyncio.create_task(self._run())
self.log("Worker started.")
return self
def stop(self):
try:
self._task.cancel()
except asyncio.CancelledError:
pass
finally:
self.log("Worker stopped.")
def get_cache_key(self, entry: T) -> str:
raise NotImplementedError()
async def fetch_entries(self, feed: str) -> list[T]:
raise NotImplementedError()
async def on_new_entry(self, entry: T):
raise NotImplementedError()
async def on_error(self, error: Exception):
await self.send_to_user(
self.bot.owner_id, f"[{self.NAME}Notifier] {error.__class__.__name__}: {str(error)}"
)
async def fetch_html(self, url):
self.log(f"Fetching {url}")
headers = {"User-Agent": self.bot.user_agent}
async with self.bot.session.get(url, headers=headers) as r:
html = await r.text()
return BeautifulSoup(html, "lxml")
async def fetch_json(self, url) -> dict:
headers = {"User-Agent": self.bot.user_agent}
async with self.bot.session.get(url, headers=headers) as r:
return await r.json()
async def send_to_channel(self, channel_id: int, *args, **kwargs):
self.log(f"Sending new entry to channel {channel_id}.")
await self.bot.get_channel(channel_id).send(*args, **kwargs)
async def send_to_user(self, user_id: int, *args, **kwargs):
self.log(f"Sending new entry to user {user_id}.")
await self.bot.get_user(user_id).send(*args, **kwargs)
@dataclass
class HNEntry:
title: str
url: str
class HackerNewsNotifier(FeedNotifier):
NAME = "HackerNews"
CHECK_INTERVAL = 60
def get_cache_key(self, entry: HNEntry) -> str:
return entry.url
async def fetch_entries(self, feed: str) -> list[HNEntry]:
url = "https://news.ycombinator.com/" + feed
soup = await self.fetch_html(url)
articles = []
for article in soup.select("tr.athing"):
titleline = article.select_one("span.titleline > a")
url = titleline["href"]
title = titleline.text
articles.append(HNEntry(title, url))
return list(reversed(articles))
async def on_new_entry(self, entry: HNEntry):
await self.send_to_user(self.bot.owner_id, f"{entry.title}\n{entry.url}")