1796 lines
78 KiB
Python
1796 lines
78 KiB
Python
import asyncio
|
|
import aiohttp
|
|
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
|
import copy
|
|
import datetime
|
|
import discord
|
|
import feedparser
|
|
import filetype
|
|
import io
|
|
import itertools
|
|
import logging
|
|
import re
|
|
import time
|
|
import warnings
|
|
from typing import Optional, Union
|
|
from types import MappingProxyType, SimpleNamespace
|
|
from urllib.parse import urlparse
|
|
|
|
from redbot.core import checks, commands, Config
|
|
from redbot.core.utils import can_user_send_messages_in
|
|
from redbot.core.utils.chat_formatting import bold, box, escape, humanize_list, pagify
|
|
|
|
from .color import Color
|
|
from .quiet_template import QuietTemplate
|
|
from .rss_feed import RssFeed
|
|
from .tag_type import INTERNAL_TAGS, VALID_IMAGES, TagType
|
|
|
|
log = logging.getLogger("red.aikaterna.rss")
|
|
|
|
|
|
IPV4_RE = re.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}")
|
|
IPV6_RE = re.compile("([a-f0-9:]+:+)+[a-f0-9]+")
|
|
GuildMessageable = Union[discord.TextChannel, discord.VoiceChannel, discord.StageChannel, discord.Thread]
|
|
|
|
|
|
__version__ = "2.1.8"
|
|
|
|
warnings.filterwarnings(
|
|
"ignore",
|
|
category=DeprecationWarning,
|
|
# Ignore the warning in feedparser module *and* our module to account for the unreleased fix of this warning:
|
|
# https://github.com/kurtmckee/feedparser/pull/278
|
|
module=r"^(feedparser|rss)(\..+)?$",
|
|
message=(
|
|
"To avoid breaking existing software while fixing issue 310, a temporary mapping has been created from"
|
|
" `updated_parsed` to `published_parsed` if `updated_parsed` doesn't exist"
|
|
),
|
|
)
|
|
warnings.filterwarnings("ignore", module="rss", category=MarkupResemblesLocatorWarning)
|
|
|
|
|
|
class RSS(commands.Cog):
|
|
"""RSS feeds for your server."""
|
|
|
|
def __init__(self, bot):
|
|
self.bot = bot
|
|
|
|
self.config = Config.get_conf(self, 2761331001, force_registration=True)
|
|
self.config.register_channel(feeds={})
|
|
self.config.register_global(use_published=["www.youtube.com"])
|
|
|
|
self._post_queue = asyncio.PriorityQueue()
|
|
self._post_queue_size = None
|
|
|
|
self._read_feeds_loop = None
|
|
|
|
self._headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"}
|
|
|
|
async def red_delete_data_for_user(self, **kwargs):
|
|
"""Nothing to delete"""
|
|
return
|
|
|
|
def initialize(self):
|
|
self._read_feeds_loop = self.bot.loop.create_task(self.read_feeds())
|
|
|
|
def cog_unload(self):
|
|
if self._read_feeds_loop:
|
|
self._read_feeds_loop.cancel()
|
|
|
|
def _add_content_images(self, bs4_soup: BeautifulSoup, rss_object: feedparser.util.FeedParserDict):
|
|
"""
|
|
$content_images should always be marked as a special tag as the tags will
|
|
be dynamically generated based on the content included in the latest post.
|
|
"""
|
|
content_images = bs4_soup.find_all("img")
|
|
if content_images:
|
|
for i, image in enumerate(content_images):
|
|
tag_name = f"content_image{str(i + 1).zfill(2)}"
|
|
try:
|
|
rss_object[tag_name] = image["src"]
|
|
rss_object["is_special"].append(tag_name)
|
|
except KeyError:
|
|
pass
|
|
return rss_object
|
|
|
|
async def _add_feed(self, ctx, feed_name: str, channel: GuildMessageable, url: str):
|
|
"""Helper for rss add."""
|
|
rss_exists = await self._check_feed_existing(ctx, feed_name, channel)
|
|
if not rss_exists:
|
|
feedparser_obj = await self._fetch_feedparser_object(url)
|
|
if not feedparser_obj:
|
|
await ctx.send("Couldn't fetch that feed: there were no feed objects found.")
|
|
return
|
|
|
|
# sort everything by time if a time value is present
|
|
if feedparser_obj.entries:
|
|
# this feed has posts
|
|
sorted_feed_by_post_time = await self._sort_by_post_time(feedparser_obj.entries)
|
|
else:
|
|
# this feed does not have posts, but it has a header with channel information
|
|
sorted_feed_by_post_time = [feedparser_obj.feed]
|
|
|
|
# add additional tags/images/clean html
|
|
feedparser_plus_obj = await self._add_to_feedparser_object(sorted_feed_by_post_time[0], url)
|
|
rss_object = await self._convert_feedparser_to_rssfeed(feed_name, feedparser_plus_obj, url)
|
|
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
feed_data[feed_name] = rss_object.to_json()
|
|
msg = (
|
|
f"Feed `{feed_name}` added in channel: {channel.mention}\n"
|
|
f"List the template tags with `{ctx.prefix}rss listtags` "
|
|
f"and modify the template using `{ctx.prefix}rss template`."
|
|
)
|
|
await ctx.send(msg)
|
|
else:
|
|
await ctx.send(f"There is already an existing feed named {bold(feed_name)} in {channel.mention}.")
|
|
return
|
|
|
|
def _add_generic_html_plaintext(self, bs4_soup: BeautifulSoup):
|
|
"""
|
|
Bs4's .text attribute on a soup strips newlines and spaces
|
|
This provides newlines and more readable content.
|
|
"""
|
|
text = ""
|
|
for element in bs4_soup.descendants:
|
|
if isinstance(element, str):
|
|
text += element
|
|
elif element.name == "br" or element.name == "p" or element.name == "li":
|
|
text += "\n"
|
|
text = re.sub("\\n+", "\n", text)
|
|
text = text.replace("*", "\\*")
|
|
text = text.replace("SC_OFF", "").replace("SC_ON", "\n")
|
|
text = text.replace("[link]", "").replace("[comments]", "")
|
|
|
|
return escape(text)
|
|
|
|
async def _append_bs4_tags(self, rss_object: feedparser.util.FeedParserDict, url: str):
|
|
"""Append bs4-discovered tags to an rss_feed/feedparser object."""
|
|
rss_object["is_special"] = []
|
|
soup = None
|
|
tags_list = []
|
|
|
|
temp_rss_obect = copy.deepcopy(rss_object)
|
|
for tag_name, tag_content in temp_rss_obect.items():
|
|
if tag_name in INTERNAL_TAGS:
|
|
continue
|
|
|
|
tag_content_check = await self._get_tag_content_type(tag_content)
|
|
|
|
if tag_content_check == TagType.HTML:
|
|
# this is a tag that is only html content
|
|
try:
|
|
soup = BeautifulSoup(tag_content, "html.parser")
|
|
except TypeError:
|
|
pass
|
|
|
|
# this is a standard html format summary_detail tag
|
|
# the tag was determined to be html through the type attrib that
|
|
# was attached from the feed publisher but it's really a dict.
|
|
try:
|
|
soup = BeautifulSoup(tag_content["value"], "html.parser")
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
# this is a standard html format content or summary tag
|
|
try:
|
|
soup = BeautifulSoup(tag_content[0]["value"], "html.parser")
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
if soup:
|
|
rss_object[f"{tag_name}_plaintext"] = self._add_generic_html_plaintext(soup)
|
|
|
|
if tag_content_check == TagType.LIST:
|
|
tags_content_counter = 0
|
|
|
|
for list_item in tag_content:
|
|
list_item_check = await self._get_tag_content_type(list_item)
|
|
|
|
# for common "links" format or when "content" is a list
|
|
list_html_content_counter = 0
|
|
if list_item_check == TagType.HTML:
|
|
list_tags = ["value", "href"]
|
|
for tag in list_tags:
|
|
try:
|
|
url_check = await self._valid_url(list_item[tag], feed_check=False)
|
|
if not url_check:
|
|
# bs4 will cry if you try to give it a url to parse, so let's only
|
|
# parse non-url content
|
|
tag_content = BeautifulSoup(list_item[tag], "html.parser")
|
|
tag_content = self._add_generic_html_plaintext(tag_content)
|
|
else:
|
|
tag_content = list_item[tag]
|
|
list_html_content_counter += 1
|
|
name = f"{tag_name}_plaintext{str(list_html_content_counter).zfill(2)}"
|
|
rss_object[name] = tag_content
|
|
rss_object["is_special"].append(name)
|
|
except (KeyError, TypeError):
|
|
pass
|
|
|
|
if list_item_check == TagType.DICT:
|
|
authors_content_counter = 0
|
|
enclosure_content_counter = 0
|
|
enclosure_url_counter = 0
|
|
|
|
# common "authors" tag format
|
|
try:
|
|
authors_content_counter += 1
|
|
name = f"{tag_name}_plaintext{str(authors_content_counter).zfill(2)}"
|
|
tag_content = BeautifulSoup(list_item["name"], "html.parser")
|
|
rss_object[name] = tag_content.get_text()
|
|
rss_object["is_special"].append(name)
|
|
except KeyError:
|
|
pass
|
|
|
|
# common "enclosure" tag image format
|
|
# note: this is not adhering to RSS feed specifications
|
|
# proper enclosure tags should have `length`, `type`, `url`
|
|
# and not `href`, `type`, `rel`
|
|
# but, this is written for the first feed I have seen with an "enclosure" tag
|
|
try:
|
|
image_url = list_item["href"]
|
|
image_type = list_item["type"]
|
|
image_rel = list_item["rel"]
|
|
enclosure_content_counter += 1
|
|
name = f"media_plaintext{str(enclosure_content_counter).zfill(2)}"
|
|
rss_object[name] = image_url
|
|
rss_object["is_special"].append(name)
|
|
except KeyError:
|
|
pass
|
|
|
|
# special tag for enclosure["url"] so that users can differentiate them
|
|
# from image urls found in enclosure["href"]
|
|
try:
|
|
image_url = list_item["url"]
|
|
enclosure_url_counter += 1
|
|
name = f"media_url{str(enclosure_url_counter).zfill(2)}"
|
|
rss_object[name] = image_url
|
|
rss_object["is_special"].append(name)
|
|
except KeyError:
|
|
pass
|
|
|
|
# common "tags" tag format
|
|
try:
|
|
tag = list_item["term"]
|
|
tags_content_counter += 1
|
|
name = f"{tag_name}_plaintext{str(tags_content_counter).zfill(2)}"
|
|
rss_object[name] = tag
|
|
rss_object["is_special"].append(name)
|
|
tags_list.append(tag) if tag not in tags_list else tags_list
|
|
except KeyError:
|
|
pass
|
|
|
|
if len(tags_list) > 0:
|
|
rss_object["tags_list"] = tags_list
|
|
rss_object["tags_plaintext_list"] = humanize_list(tags_list)
|
|
rss_object["is_special"].append("tags_list")
|
|
rss_object["is_special"].append("tags_plaintext_list")
|
|
|
|
# if image dict tag exists, check for an image
|
|
try:
|
|
rss_object["image_plaintext"] = rss_object["image"]["href"]
|
|
rss_object["is_special"].append("image_plaintext")
|
|
except KeyError:
|
|
pass
|
|
|
|
# if media_thumbnail or media_content exists, return the first friendly url
|
|
try:
|
|
rss_object["media_content_plaintext"] = rss_object["media_content"][0]["url"]
|
|
rss_object["is_special"].append("media_content_plaintext")
|
|
except KeyError:
|
|
pass
|
|
try:
|
|
rss_object["media_thumbnail_plaintext"] = rss_object["media_thumbnail"][0]["url"]
|
|
rss_object["is_special"].append("media_thumbnail_plaintext")
|
|
except KeyError:
|
|
pass
|
|
|
|
# change published_parsed and updated_parsed into a datetime object for embed footers
|
|
for time_tag in ["updated_parsed", "published_parsed"]:
|
|
try:
|
|
if isinstance(rss_object[time_tag], time.struct_time):
|
|
rss_object[f"{time_tag}_datetime"] = datetime.datetime(*rss_object[time_tag][:6])
|
|
except KeyError:
|
|
pass
|
|
|
|
if soup:
|
|
rss_object = self._add_content_images(soup, rss_object)
|
|
|
|
# add special tag/special site formatter here if needed in the future
|
|
|
|
return rss_object
|
|
|
|
async def _check_channel_permissions(self, ctx, channel: GuildMessageable, addl_send_messages_check=True):
|
|
"""Helper for rss functions."""
|
|
if not channel.permissions_for(ctx.me).read_messages:
|
|
await ctx.send("I don't have permissions to read that channel.")
|
|
return False
|
|
author_perms = channel.permissions_for(ctx.author)
|
|
if not author_perms.read_messages:
|
|
await ctx.send("You don't have permissions to read that channel.")
|
|
return False
|
|
# bot can only see threads that it has permissions to read messages in so no special handling needed
|
|
# if author has read messages perm, they can read all public threads *but also* private threads they are in
|
|
if isinstance(channel, discord.Thread) and channel.is_private() and not author_perms.manage_threads:
|
|
try:
|
|
await channel.fetch_member(ctx.author.id)
|
|
except discord.NotFound:
|
|
# author is not in a private thread
|
|
return False
|
|
if addl_send_messages_check:
|
|
# check for send messages perm if needed, like on an rss add
|
|
# not needed on something like rss delete
|
|
if not can_user_send_messages_in(ctx.me, channel):
|
|
await ctx.send("I don't have permissions to send messages in that channel.")
|
|
return False
|
|
else:
|
|
return True
|
|
else:
|
|
return True
|
|
|
|
async def _check_feed_existing(self, ctx, feed_name: str, channel: GuildMessageable):
|
|
"""Helper for rss functions."""
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
return False
|
|
return True
|
|
|
|
async def _delete_feed(self, ctx, feed_name: str, channel: GuildMessageable):
|
|
"""Helper for rss delete."""
|
|
rss_exists = await self._check_feed_existing(ctx, feed_name, channel)
|
|
|
|
if rss_exists:
|
|
async with self.config.channel(channel).feeds() as rss_data:
|
|
rss_data.pop(feed_name, None)
|
|
return True
|
|
return False
|
|
|
|
async def _edit_template(self, ctx, feed_name: str, channel: GuildMessageable, template: str):
|
|
"""Helper for rss template."""
|
|
rss_exists = await self._check_feed_existing(ctx, feed_name, channel)
|
|
|
|
if rss_exists:
|
|
async with self.config.channel(channel).feeds.all() as feed_data:
|
|
if feed_name not in feed_data:
|
|
feed_data[feed_name] = {}
|
|
feed_data[feed_name]["template"] = template
|
|
return True
|
|
return False
|
|
|
|
@staticmethod
|
|
def _find_website(website_url: str):
|
|
"""Helper for rss parse."""
|
|
result = urlparse(website_url)
|
|
if result.scheme:
|
|
# https://www.website.com/...
|
|
if result.netloc:
|
|
website = result.netloc
|
|
else:
|
|
return None
|
|
else:
|
|
# www.website.com/...
|
|
if result.path:
|
|
website = result.path.split("/")[0]
|
|
else:
|
|
return None
|
|
|
|
return website
|
|
|
|
async def _get_channel_object(self, channel_id: int):
|
|
"""Helper for rss feed loop."""
|
|
channel = self.bot.get_channel(channel_id)
|
|
if not channel:
|
|
try:
|
|
channel = await self.bot.fetch_channel(channel_id)
|
|
except (discord.errors.Forbidden, discord.errors.NotFound):
|
|
return None
|
|
if channel and can_user_send_messages_in(channel.guild.me, channel):
|
|
return channel
|
|
return None
|
|
|
|
async def _get_feed_names(self, channel: GuildMessageable):
|
|
"""Helper for rss list/listall."""
|
|
feed_list = []
|
|
space = "\N{SPACE}"
|
|
all_feeds = await self.config.channel(channel).feeds.all()
|
|
if not all_feeds:
|
|
return ["None."]
|
|
longest_name_len = len(max(list(all_feeds.keys()), key=len))
|
|
for name, data in all_feeds.items():
|
|
extra_spacing = longest_name_len - len(name)
|
|
feed_list.append(f"{name}{space * extra_spacing} {data['url']}")
|
|
return feed_list
|
|
|
|
async def _get_tag_content_type(self, tag_content):
|
|
"""
|
|
Tag content type can be:
|
|
str, list, dict (FeedParserDict), bool, datetime.datetime object or time.struct_time
|
|
"""
|
|
try:
|
|
if tag_content["type"] == "text/html":
|
|
return TagType(2)
|
|
except (KeyError, TypeError):
|
|
html_tags = ["<a>", "<a href", "<img", "<p>", "<b>", "</li>", "</ul>"]
|
|
if any(word in str(tag_content) for word in html_tags):
|
|
return TagType(2)
|
|
|
|
if isinstance(tag_content, dict):
|
|
return TagType(3)
|
|
elif isinstance(tag_content, list):
|
|
return TagType(4)
|
|
else:
|
|
return TagType(1)
|
|
|
|
async def _get_url_content(self, url):
|
|
"""Helper for rss add/_valid_url."""
|
|
try:
|
|
# force github.com to serve us xml instead of json
|
|
headers = self._headers
|
|
if "github.com" in url:
|
|
headers["Accept"] = "application/vnd.github+xml"
|
|
|
|
timeout = aiohttp.ClientTimeout(total=20)
|
|
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
|
async with session.get(url) as resp:
|
|
if resp.status == 404:
|
|
friendly_msg = "The server returned 404 Not Found. Check your url and try again."
|
|
return None, friendly_msg
|
|
html = await resp.read()
|
|
return html, None
|
|
except aiohttp.client_exceptions.ClientConnectorError:
|
|
friendly_msg = "There was an OSError or the connection failed."
|
|
msg = f"aiohttp failure accessing feed at url:\n\t{url}"
|
|
log.error(msg, exc_info=True)
|
|
return None, friendly_msg
|
|
except aiohttp.client_exceptions.ClientPayloadError as e:
|
|
friendly_msg = "The website closed the connection prematurely or the response was malformed.\n"
|
|
friendly_msg += f"The error returned was: `{str(e)}`\n"
|
|
friendly_msg += "For more technical information, check your bot's console or logs."
|
|
msg = f"content error while reading feed at url:\n\t{url}"
|
|
log.error(msg, exc_info=True)
|
|
return None, friendly_msg
|
|
except asyncio.exceptions.TimeoutError:
|
|
friendly_msg = "The bot timed out while trying to access that content."
|
|
msg = f"asyncio timeout while accessing feed at url:\n\t{url}"
|
|
log.error(msg, exc_info=True)
|
|
return None, friendly_msg
|
|
except aiohttp.client_exceptions.ServerDisconnectedError:
|
|
friendly_msg = "The target server disconnected early without a response."
|
|
msg = f"server disconnected while accessing feed at url:\n\t{url}"
|
|
log.error(msg, exc_info=True)
|
|
return None, friendly_msg
|
|
except Exception:
|
|
friendly_msg = "There was an unexpected error. Check your console for more information."
|
|
msg = f"General failure accessing feed at url:\n\t{url}"
|
|
log.error(msg, exc_info=True)
|
|
return None, friendly_msg
|
|
|
|
async def _fetch_feedparser_object(self, url: str):
|
|
"""Get a full feedparser object from a url: channel header + items."""
|
|
html, error_msg = await self._get_url_content(url)
|
|
if not html:
|
|
return SimpleNamespace(entries=None, error=error_msg, url=url)
|
|
|
|
feedparser_obj = feedparser.parse(html)
|
|
if feedparser_obj.bozo:
|
|
error_msg = f"Bozo feed: feedparser is unable to parse the response from {url}.\n"
|
|
error_msg += f"Feedparser error message: `{feedparser_obj.bozo_exception}`"
|
|
return SimpleNamespace(entries=None, error=error_msg, url=url)
|
|
|
|
return feedparser_obj
|
|
|
|
async def _add_to_feedparser_object(self, feedparser_obj: feedparser.util.FeedParserDict, url: str):
|
|
"""
|
|
Input: A feedparser object
|
|
Process: Append custom tags to the object from the custom formatters
|
|
Output: A feedparser object with additional attributes
|
|
"""
|
|
feedparser_plus_obj = await self._append_bs4_tags(feedparser_obj, url)
|
|
feedparser_plus_obj["template_tags"] = sorted(feedparser_plus_obj.keys())
|
|
|
|
return feedparser_plus_obj
|
|
|
|
async def _convert_feedparser_to_rssfeed(
|
|
self, feed_name: str, feedparser_plus_obj: feedparser.util.FeedParserDict, url: str
|
|
):
|
|
"""
|
|
Converts any feedparser/feedparser_plus object to an RssFeed object.
|
|
Used in rss add when saving a new feed.
|
|
"""
|
|
entry_time = await self._time_tag_validation(feedparser_plus_obj)
|
|
|
|
# sometimes there's no title or no link attribute and feedparser doesn't really play nice with that
|
|
try:
|
|
feedparser_plus_obj_title = feedparser_plus_obj["title"]
|
|
except KeyError:
|
|
feedparser_plus_obj_title = ""
|
|
try:
|
|
feedparser_plus_obj_link = feedparser_plus_obj["link"]
|
|
except KeyError:
|
|
feedparser_plus_obj_link = ""
|
|
|
|
rss_object = RssFeed(
|
|
name=feed_name.lower(),
|
|
last_title=feedparser_plus_obj_title,
|
|
last_link=feedparser_plus_obj_link,
|
|
last_time=entry_time,
|
|
template="$title\n$link",
|
|
url=url,
|
|
template_tags=feedparser_plus_obj["template_tags"],
|
|
is_special=feedparser_plus_obj["is_special"],
|
|
embed=True,
|
|
)
|
|
|
|
return rss_object
|
|
|
|
async def _sort_by_post_time(self, feedparser_obj: feedparser.util.FeedParserDict):
|
|
base_url = urlparse(feedparser_obj[0].get("link")).netloc
|
|
use_published_parsed_override = await self.config.use_published()
|
|
|
|
if base_url in use_published_parsed_override:
|
|
time_tag = ["published_parsed"]
|
|
else:
|
|
time_tag = ["updated_parsed", "published_parsed"]
|
|
|
|
for tag in time_tag:
|
|
try:
|
|
baseline_time = time.struct_time((2021, 1, 1, 12, 0, 0, 4, 1, -1))
|
|
sorted_feed_by_post_time = sorted(feedparser_obj, key=lambda x: x.get(tag, baseline_time), reverse=True)
|
|
break
|
|
except TypeError:
|
|
sorted_feed_by_post_time = feedparser_obj
|
|
|
|
return sorted_feed_by_post_time
|
|
|
|
async def _time_tag_validation(self, entry: feedparser.util.FeedParserDict):
|
|
"""Gets a unix timestamp if it's available from a single feedparser post entry."""
|
|
feed_link = entry.get("link", None)
|
|
if feed_link:
|
|
base_url = urlparse(feed_link).netloc
|
|
else:
|
|
return None
|
|
|
|
# check for a feed time override, if a feed is being problematic regarding updated_parsed
|
|
# usage (i.e. a feed entry keeps reposting with no perceived change in content)
|
|
use_published_parsed_override = await self.config.use_published()
|
|
if base_url in use_published_parsed_override:
|
|
entry_time = entry.get("published_parsed", None)
|
|
else:
|
|
entry_time = entry.get("updated_parsed", None)
|
|
if not entry_time:
|
|
entry_time = entry.get("published_parsed", None)
|
|
|
|
if isinstance(entry_time, time.struct_time):
|
|
entry_time = time.mktime(entry_time)
|
|
if entry_time:
|
|
return int(entry_time)
|
|
return None
|
|
|
|
@staticmethod
|
|
async def _title_case(phrase: str):
|
|
exceptions = ["a", "and", "in", "of", "or", "on", "the"]
|
|
lowercase_words = re.split(" ", phrase.lower())
|
|
final_words = [lowercase_words[0].capitalize()]
|
|
final_words += [word if word in exceptions else word.capitalize() for word in lowercase_words[1:]]
|
|
return " ".join(final_words)
|
|
|
|
async def _update_last_scraped(
|
|
self,
|
|
channel: GuildMessageable,
|
|
feed_name: str,
|
|
current_feed_title: str,
|
|
current_feed_link: str,
|
|
current_feed_time: int,
|
|
):
|
|
"""Updates last title and last link seen for comparison on next feed pull."""
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
try:
|
|
feed_data[feed_name]["last_title"] = current_feed_title
|
|
feed_data[feed_name]["last_link"] = current_feed_link
|
|
feed_data[feed_name]["last_time"] = current_feed_time
|
|
except KeyError:
|
|
# the feed was deleted during a _get_current_feed execution
|
|
pass
|
|
|
|
async def _valid_url(self, url: str, feed_check=True):
|
|
"""Helper for rss add."""
|
|
try:
|
|
result = urlparse(url)
|
|
except Exception as e:
|
|
log.exception(e, exc_info=e)
|
|
return False
|
|
|
|
if all([result.scheme, result.netloc, result.path]):
|
|
if feed_check:
|
|
text, error_msg = await self._get_url_content(url)
|
|
if not text:
|
|
raise NoFeedContent(error_msg)
|
|
return False
|
|
|
|
rss = feedparser.parse(text)
|
|
if rss.bozo:
|
|
error_message = rss.feed.get("summary", str(rss))[:1500]
|
|
error_message = re.sub(IPV4_RE, "[REDACTED IP ADDRESS]", error_message)
|
|
error_message = re.sub(IPV6_RE, "[REDACTED IP ADDRESS]", error_message)
|
|
msg = f"Bozo feed: feedparser is unable to parse the response from {url}.\n\n"
|
|
msg += "Received content preview:\n"
|
|
msg += box(error_message)
|
|
raise NoFeedContent(msg)
|
|
return False
|
|
else:
|
|
return True
|
|
else:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
async def _validate_image(self, url: str):
|
|
"""Helper for _get_current_feed_embed."""
|
|
try:
|
|
timeout = aiohttp.ClientTimeout(total=20)
|
|
async with aiohttp.ClientSession(headers=self._headers, timeout=timeout) as session:
|
|
async with session.get(url) as resp:
|
|
image = await resp.content.read(261)
|
|
img = io.BytesIO(image)
|
|
file_type = filetype.guess(img)
|
|
if not file_type:
|
|
return None
|
|
return file_type.extension
|
|
except aiohttp.client_exceptions.InvalidURL:
|
|
return None
|
|
except asyncio.exceptions.TimeoutError:
|
|
log.error(f"asyncio timeout while accessing image at url:\n\t{url}", exc_info=True)
|
|
return None
|
|
except Exception:
|
|
log.error(f"Failure accessing image in embed feed at url:\n\t{url}", exc_info=True)
|
|
return None
|
|
|
|
@commands.guild_only()
|
|
@commands.group()
|
|
@checks.mod_or_permissions(manage_channels=True)
|
|
async def rss(self, ctx):
|
|
"""RSS feed stuff."""
|
|
pass
|
|
|
|
@rss.command(name="add")
|
|
async def _rss_add(self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None, *, url: str):
|
|
"""
|
|
Add an RSS feed to a channel.
|
|
|
|
Defaults to the current channel if no channel is specified.
|
|
"""
|
|
if feed_name.startswith("<#"):
|
|
# someone typed a channel name but not a feed name
|
|
msg = "Try again with a feed name included in the right spot so that you can refer to the feed later.\n"
|
|
msg += f"Example: `{ctx.prefix}rss add feed_name channel_name feed_url`"
|
|
await ctx.send(msg)
|
|
return
|
|
channel = channel or ctx.channel
|
|
channel_permission_check = await self._check_channel_permissions(ctx, channel)
|
|
if not channel_permission_check:
|
|
return
|
|
|
|
async with ctx.typing():
|
|
try:
|
|
valid_url = await self._valid_url(url)
|
|
except NoFeedContent as e:
|
|
await ctx.send(str(e))
|
|
return
|
|
|
|
if valid_url:
|
|
await self._add_feed(ctx, feed_name.lower(), channel, url)
|
|
else:
|
|
await ctx.send("Invalid or unavailable URL.")
|
|
|
|
@rss.group(name="embed")
|
|
async def _rss_embed(self, ctx):
|
|
"""Embed feed settings."""
|
|
pass
|
|
|
|
@_rss_embed.command(name="color", aliases=["colour"])
|
|
async def _rss_embed_color(
|
|
self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None, *, color: str = None
|
|
):
|
|
"""
|
|
Set an embed color for a feed.
|
|
|
|
Use this command with no color to reset to the default.
|
|
`color` must be a hex code like #990000, a [Discord color name](https://discordpy.readthedocs.io/en/latest/api.html#colour),
|
|
or a [CSS3 color name](https://www.w3.org/TR/2018/REC-css-color-3-20180619/#svg-color).
|
|
"""
|
|
channel = channel or ctx.channel
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
await ctx.send("That feed name doesn't exist in this channel.")
|
|
return
|
|
|
|
embed_toggle = rss_feed["embed"]
|
|
embed_state_message = ""
|
|
if not embed_toggle:
|
|
embed_state_message += (
|
|
f"{bold(feed_name)} is not currently set to be in an embed. "
|
|
f"Toggle it on with `{ctx.prefix}rss embed toggle`.\n"
|
|
)
|
|
|
|
if not color:
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
feed_data[feed_name]["embed_color"] = None
|
|
await ctx.send(
|
|
f"{embed_state_message}The color for {bold(feed_name)} has been reset. "
|
|
"Use this command with a color argument to set a color for this feed."
|
|
)
|
|
return
|
|
|
|
color = color.replace(" ", "_")
|
|
hex_code = await Color()._color_converter(color)
|
|
if not hex_code:
|
|
await ctx.send(
|
|
"Not a valid color code. Use a hex code like #990000, a "
|
|
"Discord color name or a CSS3 color name.\n"
|
|
"<https://discordpy.readthedocs.io/en/latest/api.html#colour>\n"
|
|
"<https://www.w3.org/TR/2018/REC-css-color-3-20180619/#svg-color>"
|
|
)
|
|
return
|
|
user_facing_hex = hex_code.replace("0x", "#")
|
|
color_name = await Color()._hex_to_css3_name(hex_code)
|
|
|
|
# 0xFFFFFF actually doesn't show up as white in an embed
|
|
# so let's make it close enough to count
|
|
if hex_code == "0xFFFFFF":
|
|
hex_code = "0xFFFFFE"
|
|
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
# data is always a 0xFFFFFF style value
|
|
feed_data[feed_name]["embed_color"] = hex_code
|
|
|
|
await ctx.send(f"Embed color for {bold(feed_name)} set to {user_facing_hex} ({color_name}).")
|
|
|
|
@_rss_embed.command(name="image")
|
|
async def _rss_embed_image(
|
|
self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None, image_tag_name: str = None
|
|
):
|
|
"""
|
|
Set a tag to be a large embed image.
|
|
|
|
This image will be applied to the last embed in the paginated list.
|
|
Use this command with no image_tag_name to clear the embed image.
|
|
"""
|
|
channel = channel or ctx.channel
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
await ctx.send("That feed name doesn't exist in this channel.")
|
|
return
|
|
|
|
embed_toggle = rss_feed["embed"]
|
|
embed_state_message = ""
|
|
if not embed_toggle:
|
|
embed_state_message += (
|
|
f"{bold(feed_name)} is not currently set to be in an embed. "
|
|
f"Toggle it on with `{ctx.prefix}rss embed toggle`.\n"
|
|
)
|
|
|
|
if image_tag_name is not None:
|
|
if image_tag_name.startswith("$"):
|
|
image_tag_name = image_tag_name.strip("$")
|
|
else:
|
|
msg = "You must use a feed tag for this setting. "
|
|
msg += f"Feed tags start with `$` and can be found by using `{ctx.prefix}rss listtags` "
|
|
msg += "with the saved feed name.\nImages that are scraped from feed content are usually "
|
|
msg += "stored under the tags styled similar to `$content_image01`: subsequent scraped images "
|
|
msg += "will be in tags named `$content_image02`, `$content_image03`, etc. Not every feed entry "
|
|
msg += "will have the same amount of scraped image tags. Images can also be found under tags named "
|
|
msg += "`$media_content_plaintext`, if present.\nExperiment with tags by setting them as your "
|
|
msg += (
|
|
f"template with `{ctx.prefix}rss template` and using `{ctx.prefix}rss force` to view the content."
|
|
)
|
|
await ctx.send(msg)
|
|
return
|
|
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
feed_data[feed_name]["embed_image"] = image_tag_name
|
|
|
|
if image_tag_name:
|
|
await ctx.send(f"{embed_state_message}Embed image set to the ${image_tag_name} tag.")
|
|
else:
|
|
await ctx.send(
|
|
"Embed image has been cleared. Use this command with a tag name if you intended to set an image tag."
|
|
)
|
|
|
|
@_rss_embed.command(name="thumbnail")
|
|
async def _rss_embed_thumbnail(
|
|
self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None, thumbnail_tag_name: str = None
|
|
):
|
|
"""
|
|
Set a tag to be a thumbnail image.
|
|
|
|
This thumbnail will be applied to the first embed in the paginated list.
|
|
Use this command with no thumbnail_tag_name to clear the embed thumbnail.
|
|
"""
|
|
channel = channel or ctx.channel
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
await ctx.send("That feed name doesn't exist in this channel.")
|
|
return
|
|
|
|
embed_toggle = rss_feed["embed"]
|
|
embed_state_message = ""
|
|
if not embed_toggle:
|
|
embed_state_message += (
|
|
f"{bold(feed_name)} is not currently set to be in an embed. "
|
|
f"Toggle it on with `{ctx.prefix}rss embed toggle`.\n"
|
|
)
|
|
|
|
if thumbnail_tag_name is not None:
|
|
if thumbnail_tag_name.startswith("$"):
|
|
thumbnail_tag_name = thumbnail_tag_name.strip("$")
|
|
else:
|
|
msg = "You must use a feed tag for this setting. "
|
|
msg += f"Feed tags start with `$` and can be found by using `{ctx.prefix}rss listtags` "
|
|
msg += "with the saved feed name.\nImages that are scraped from feed content are usually "
|
|
msg += "stored under the tags styled similar to `$content_image01`: subsequent scraped images "
|
|
msg += "will be in tags named `$content_image02`, `$content_image03`, etc. Not every feed entry "
|
|
msg += "will have the same amount of scraped image tags. Images can also be found under tags named "
|
|
msg += "`$media_content_plaintext`, if present.\nExperiment with tags by setting them as your "
|
|
msg += (
|
|
f"template with `{ctx.prefix}rss template` and using `{ctx.prefix}rss force` to view the content."
|
|
)
|
|
await ctx.send(msg)
|
|
return
|
|
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
feed_data[feed_name]["embed_thumbnail"] = thumbnail_tag_name
|
|
|
|
if thumbnail_tag_name:
|
|
await ctx.send(f"{embed_state_message}Embed thumbnail set to the ${thumbnail_tag_name} tag.")
|
|
else:
|
|
await ctx.send(
|
|
"Embed thumbnail has been cleared. "
|
|
"Use this command with a tag name if you intended to set a thumbnail tag."
|
|
)
|
|
|
|
@_rss_embed.command(name="toggle")
|
|
async def _rss_embed_toggle(self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None):
|
|
"""
|
|
Toggle whether a feed is sent in an embed or not.
|
|
|
|
If the bot doesn't have permissions to post embeds,
|
|
the feed will always be plain text, even if the embed
|
|
toggle is set.
|
|
"""
|
|
channel = channel or ctx.channel
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
await ctx.send("That feed name doesn't exist in this channel.")
|
|
return
|
|
|
|
embed_toggle = rss_feed["embed"]
|
|
toggle_text = "disabled" if embed_toggle else "enabled"
|
|
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
feed_data[feed_name]["embed"] = not embed_toggle
|
|
|
|
await ctx.send(f"Embeds for {bold(feed_name)} are {toggle_text}.")
|
|
|
|
@rss.command(name="find")
|
|
async def _rss_find(self, ctx, website_url: str):
|
|
"""
|
|
Attempts to find RSS feeds from a URL/website.
|
|
|
|
The site must have identified their feed in the html of the page based on RSS feed type standards.
|
|
"""
|
|
async with ctx.typing():
|
|
timeout = aiohttp.ClientTimeout(total=20)
|
|
async with aiohttp.ClientSession(headers=self._headers, timeout=timeout) as session:
|
|
try:
|
|
async with session.get(website_url) as response:
|
|
soup = BeautifulSoup(await response.text(errors="replace"), "html.parser")
|
|
except (aiohttp.client_exceptions.ClientConnectorError, aiohttp.client_exceptions.ClientPayloadError):
|
|
await ctx.send("I can't reach that website.")
|
|
return
|
|
except aiohttp.client_exceptions.InvalidURL:
|
|
await ctx.send(
|
|
"That seems to be an invalid URL. Use a full website URL like `https://www.site.com/`."
|
|
)
|
|
return
|
|
except aiohttp.client_exceptions.ServerDisconnectedError:
|
|
await ctx.send("The server disconnected early without a response.")
|
|
return
|
|
except asyncio.exceptions.TimeoutError:
|
|
await ctx.send("The site didn't respond in time or there was no response.")
|
|
return
|
|
except Exception as e:
|
|
msg = "There was an issue trying to find a feed in that site. "
|
|
msg += "Please check your console for more information."
|
|
log.exception(e, exc_info=e)
|
|
await ctx.send(msg)
|
|
return
|
|
|
|
if "403 Forbidden" in soup.get_text():
|
|
await ctx.send("I received a '403 Forbidden' message while trying to reach that site.")
|
|
return
|
|
if not soup:
|
|
await ctx.send("I didn't find anything at all on that link.")
|
|
return
|
|
|
|
msg = ""
|
|
url_parse = urlparse(website_url)
|
|
base_url = url_parse.netloc
|
|
url_scheme = url_parse.scheme
|
|
feed_url_types = ["application/rss+xml", "application/atom+xml", "text/xml", "application/rdf+xml"]
|
|
for feed_type in feed_url_types:
|
|
possible_feeds = soup.find_all("link", rel="alternate", type=feed_type, href=True)
|
|
for feed in possible_feeds:
|
|
feed_url = feed.get("href", None)
|
|
ls_feed_url = feed_url.lstrip("/")
|
|
if not feed_url:
|
|
continue
|
|
if feed_url.startswith("//"):
|
|
final_url = f"{url_scheme}:{feed_url}"
|
|
elif (not ls_feed_url.startswith(url_scheme)) and (not ls_feed_url.startswith(base_url)):
|
|
final_url = f"{url_scheme}://{base_url}/{ls_feed_url}"
|
|
elif ls_feed_url.startswith(base_url):
|
|
final_url = f"{url_scheme}://{base_url}"
|
|
else:
|
|
final_url = feed_url
|
|
msg += f"[Feed Title]: {feed.get('title', None)}\n"
|
|
msg += f"[Feed URL]: {final_url}\n\n"
|
|
if msg:
|
|
await ctx.send(box(msg, lang="ini"))
|
|
else:
|
|
await ctx.send("No RSS feeds found in the link provided.")
|
|
|
|
@rss.command(name="force")
|
|
async def _rss_force(self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None):
|
|
"""Forces a feed alert."""
|
|
channel = channel or ctx.channel
|
|
channel_permission_check = await self._check_channel_permissions(ctx, channel)
|
|
if not channel_permission_check:
|
|
return
|
|
|
|
feeds = await self.config.all_channels()
|
|
try:
|
|
feeds[channel.id]
|
|
except KeyError:
|
|
await ctx.send("There are no feeds in this channel.")
|
|
return
|
|
|
|
if feed_name not in feeds[channel.id]["feeds"]:
|
|
await ctx.send("That feed name doesn't exist in this channel.")
|
|
return
|
|
|
|
rss_feed = feeds[channel.id]["feeds"][feed_name]
|
|
await self.get_current_feed(channel, feed_name, rss_feed, force=True)
|
|
|
|
@rss.command(name="limit")
|
|
async def _rss_limit(
|
|
self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None, character_limit: int = None
|
|
):
|
|
"""
|
|
Set a character limit for feed posts. Use 0 for unlimited.
|
|
|
|
RSS posts are naturally split at around 2000 characters to fit within the Discord character limit per message.
|
|
If you only want the first embed or first message in a post feed to show, use 2000 or less characters for this setting.
|
|
|
|
Note that this setting applies the character limit to the entire post, for all template values on the feed together.
|
|
For example, if the template is `$title\\n$content\\n$link`, and title + content + link is longer than the limit, the link will not show.
|
|
"""
|
|
extra_msg = ""
|
|
|
|
if character_limit is None:
|
|
await ctx.send_help()
|
|
return
|
|
|
|
if character_limit < 0:
|
|
await ctx.send("Character limit cannot be less than zero.")
|
|
return
|
|
|
|
if character_limit > 20000:
|
|
character_limit = 0
|
|
|
|
if 0 < character_limit < 20:
|
|
extra_msg = "Character limit has a 20 character minimum.\n"
|
|
character_limit = 20
|
|
|
|
channel = channel or ctx.channel
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
await ctx.send("That feed name doesn't exist in this channel.")
|
|
return
|
|
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
feed_data[feed_name]["limit"] = character_limit
|
|
|
|
characters = f"approximately {character_limit}" if character_limit > 0 else "an unlimited amount of"
|
|
await ctx.send(f"{extra_msg}Character limit for {bold(feed_name)} is now {characters} characters.")
|
|
|
|
@rss.command(name="list")
|
|
async def _rss_list(self, ctx, channel: GuildMessageable = None):
|
|
"""List saved feeds for this channel or a specific channel."""
|
|
channel = channel or ctx.channel
|
|
channel_permission_check = await self._check_channel_permissions(ctx, channel)
|
|
if not channel_permission_check:
|
|
return
|
|
|
|
feeds = await self._get_feed_names(channel)
|
|
msg = f"[ Available Feeds for #{channel.name} ]\n\n\t"
|
|
if feeds:
|
|
msg += "\n\t".join(sorted(feeds))
|
|
else:
|
|
msg += "\n\tNone."
|
|
for page in pagify(msg, delims=["\n"], page_length=1800):
|
|
await ctx.send(box(page, lang="ini"))
|
|
|
|
@rss.command(name="listall")
|
|
async def _rss_listall(self, ctx):
|
|
"""List all saved feeds for this server."""
|
|
all_channels = await self.config.all_channels()
|
|
all_guild_channels = [x.id for x in itertools.chain(ctx.guild.channels, ctx.guild.threads)]
|
|
msg = ""
|
|
for channel_id, data in all_channels.items():
|
|
if channel_id in all_guild_channels:
|
|
channel_obj = ctx.guild.get_channel_or_thread(channel_id)
|
|
feeds = await self._get_feed_names(channel_obj)
|
|
if not feeds:
|
|
continue
|
|
if feeds == ["None."]:
|
|
continue
|
|
msg += f"[ Available Feeds for #{channel_obj.name} ]\n\n\t"
|
|
msg += "\n\t".join(sorted(feeds))
|
|
msg += "\n\n"
|
|
|
|
for page in pagify(msg, delims=["\n\n", "\n"], page_length=1800):
|
|
await ctx.send(box(page, lang="ini"))
|
|
|
|
@rss.command(name="listtags")
|
|
async def _rss_list_tags(self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None):
|
|
"""List the tags available from a specific feed."""
|
|
channel = channel or ctx.channel
|
|
channel_permission_check = await self._check_channel_permissions(ctx, channel)
|
|
if not channel_permission_check:
|
|
return
|
|
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
|
|
if not rss_feed:
|
|
await ctx.send("No feed with that name in this channel.")
|
|
return
|
|
|
|
async with ctx.typing():
|
|
await self._rss_list_tags_helper(ctx, rss_feed, feed_name)
|
|
|
|
async def _rss_list_tags_helper(self, ctx, rss_feed: dict, feed_name: str):
|
|
"""Helper function for rss listtags."""
|
|
msg = f"[ Available Template Tags for {feed_name} ]\n\n\t"
|
|
feedparser_obj = await self._fetch_feedparser_object(rss_feed["url"])
|
|
|
|
if not feedparser_obj:
|
|
await ctx.send("Couldn't fetch that feed.")
|
|
return
|
|
if feedparser_obj.entries:
|
|
# this feed has posts
|
|
feedparser_plus_obj = await self._add_to_feedparser_object(feedparser_obj.entries[0], rss_feed["url"])
|
|
else:
|
|
# this feed does not have posts, but it has a header with channel information
|
|
feedparser_plus_obj = await self._add_to_feedparser_object(feedparser_obj.feed, rss_feed["url"])
|
|
|
|
for tag_name, tag_content in sorted(feedparser_plus_obj.items()):
|
|
if tag_name in INTERNAL_TAGS:
|
|
# these tags attached to the rss feed object are for internal handling options
|
|
continue
|
|
|
|
tag_content_check = await self._get_tag_content_type(tag_content)
|
|
if tag_content_check == TagType.HTML:
|
|
msg += f"[X] ${tag_name}\n\t"
|
|
elif tag_content_check == TagType.DICT:
|
|
msg += f"[\\] ${tag_name} \n\t"
|
|
elif tag_content_check == TagType.LIST:
|
|
msg += f"[-] ${tag_name} \n\t"
|
|
elif tag_name in feedparser_plus_obj["is_special"]:
|
|
msg += f"[*] ${tag_name} \n\t"
|
|
else:
|
|
msg += f"[ ] ${tag_name} \n\t"
|
|
msg += "\n\n\t[X] = html | [\\] = dictionary | [-] = list | [ ] = plain text"
|
|
msg += "\n\t[*] = specially-generated tag, may not be present in every post"
|
|
|
|
for msg_part in pagify(msg, delims=["\n\t", "\n\n"]):
|
|
await ctx.send(box(msg_part, lang="ini"))
|
|
|
|
@checks.is_owner()
|
|
@rss.group(name="parse")
|
|
async def _rss_parse(self, ctx):
|
|
"""
|
|
Change feed parsing for a specfic domain.
|
|
|
|
This is a global change per website.
|
|
The default is to use the feed's updated_parsed tag, and adding a website to this list will change the check to published_parsed.
|
|
|
|
Some feeds may spam feed entries as they are updating the updated_parsed slot on their feed, but not updating feed content.
|
|
In this case we can force specific sites to use the published_parsed slot instead by adding the website to this override list.
|
|
"""
|
|
pass
|
|
|
|
@_rss_parse.command(name="add")
|
|
async def _rss_parse_add(self, ctx, website_url: str):
|
|
"""
|
|
Add a website to the list for a time parsing override.
|
|
|
|
Use a website link formatted like `www.website.com` or `https://www.website.com`.
|
|
For more information, use `[p]help rss parse`.
|
|
"""
|
|
website = self._find_website(website_url)
|
|
if not website:
|
|
msg = f"I can't seem to find a website in `{website_url}`. "
|
|
msg += "Use something like `https://www.website.com/` or `www.website.com`."
|
|
await ctx.send(msg)
|
|
return
|
|
|
|
override_list = await self.config.use_published()
|
|
if website in override_list:
|
|
await ctx.send(f"`{website}` is already in the parsing override list.")
|
|
else:
|
|
override_list.append(website)
|
|
await self.config.use_published.set(override_list)
|
|
await ctx.send(f"`{website}` was added to the parsing override list.")
|
|
|
|
@_rss_parse.command(name="list")
|
|
async def _rss_parse_list(self, ctx):
|
|
"""
|
|
Show the list for time parsing overrides.
|
|
|
|
For more information, use `[p]help rss parse`.
|
|
"""
|
|
override_list = await self.config.use_published()
|
|
if not override_list:
|
|
msg = "No site overrides saved."
|
|
else:
|
|
msg = "Active for:\n" + "\n".join(override_list)
|
|
await ctx.send(box(msg))
|
|
|
|
@_rss_parse.command(name="remove", aliases=["delete", "del"])
|
|
async def _rss_parse_remove(self, ctx, website_url: str = None):
|
|
"""
|
|
Remove a website from the list for a time parsing override.
|
|
|
|
Use a website link formatted like `www.website.com` or `https://www.website.com`.
|
|
For more information, use `[p]help rss parse`.
|
|
"""
|
|
website = self._find_website(website_url)
|
|
override_list = await self.config.use_published()
|
|
if website in override_list:
|
|
override_list.remove(website)
|
|
await self.config.use_published.set(override_list)
|
|
await ctx.send(f"`{website}` was removed from the parsing override list.")
|
|
else:
|
|
await ctx.send(f"`{website}` isn't in the parsing override list.")
|
|
|
|
@rss.command(name="remove", aliases=["delete", "del"])
|
|
async def _rss_remove(self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None):
|
|
"""
|
|
Removes a feed from a channel.
|
|
|
|
Defaults to the current channel if no channel is specified.
|
|
"""
|
|
channel = channel or ctx.channel
|
|
channel_permission_check = await self._check_channel_permissions(ctx, channel, addl_send_messages_check=False)
|
|
if not channel_permission_check:
|
|
return
|
|
|
|
success = await self._delete_feed(ctx, feed_name, channel)
|
|
if success:
|
|
await ctx.send("Feed deleted.")
|
|
else:
|
|
await ctx.send("Feed not found!")
|
|
|
|
@rss.command(name="showtemplate")
|
|
async def _rss_show_template(self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None):
|
|
"""Show the template in use for a specific feed."""
|
|
channel = channel or ctx.channel
|
|
channel_permission_check = await self._check_channel_permissions(ctx, channel)
|
|
if not channel_permission_check:
|
|
return
|
|
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
await ctx.send("No feed with that name in this channel.")
|
|
return
|
|
|
|
space = "\N{SPACE}"
|
|
embed_toggle = f"[ ] Embed:{space*16}Off" if not rss_feed["embed"] else f"[X] Embed:{space*16}On"
|
|
embed_image = (
|
|
f"[ ] Embed image tag:{space*6}None"
|
|
if not rss_feed["embed_image"]
|
|
else f"[X] Embed image tag:{space*6}${rss_feed['embed_image']}"
|
|
)
|
|
embed_thumbnail = (
|
|
f"[ ] Embed thumbnail tag:{space*2}None"
|
|
if not rss_feed["embed_thumbnail"]
|
|
else f"[X] Embed thumbnail tag:{space*2}${rss_feed['embed_thumbnail']}"
|
|
)
|
|
hex_color = rss_feed.get("embed_color", None)
|
|
if hex_color:
|
|
color_name = await Color()._hex_to_css3_name(hex_color)
|
|
hex_color = hex_color.lstrip("0x")
|
|
embed_color = (
|
|
f"[ ] Embed hex color:{space*6}None"
|
|
if not hex_color
|
|
else f"[X] Embed hex color:{space*6}{hex_color} ({color_name})"
|
|
)
|
|
|
|
allowed_tags = rss_feed.get("allowed_tags", [])
|
|
if not allowed_tags:
|
|
tag_msg = "[ ] No restrictions\n\tAll tags are allowed."
|
|
else:
|
|
tag_msg = "[X] Feed is restricted to posts that include:"
|
|
for tag in allowed_tags:
|
|
tag_msg += f"\n\t{await self._title_case(tag)}"
|
|
|
|
character_limit = rss_feed.get("limit", 0)
|
|
if character_limit == 0:
|
|
length_msg = "[ ] Feed length is unlimited."
|
|
else:
|
|
length_msg = f"[X] Feed length is capped at {character_limit} characters."
|
|
|
|
embed_settings = f"{embed_toggle}\n{embed_color}\n{embed_image}\n{embed_thumbnail}"
|
|
rss_template = rss_feed["template"].replace("\n", "\\n").replace("\t", "\\t")
|
|
|
|
msg = f"Template for {bold(feed_name)}:\n\n`{rss_template}`\n\n{box(embed_settings, lang='ini')}\n{box(tag_msg, lang='ini')}\n{box(length_msg, lang='ini')}"
|
|
|
|
for page in pagify(msg, delims=["\n"], page_length=1800):
|
|
await ctx.send(page)
|
|
|
|
@rss.group(name="tag")
|
|
async def _rss_tag(self, ctx):
|
|
"""RSS post tag qualification."""
|
|
pass
|
|
|
|
@_rss_tag.command(name="allow")
|
|
async def _rss_tag_allow(self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None, *, tag: str = None):
|
|
"""
|
|
Set an allowed tag for a feed to be posted. The tag must match exactly (without regard to title casing).
|
|
No regex or placeholder qualification.
|
|
|
|
Tags can be found in `[p]rss listtags` under `$tags` or `$tags_list` (if tags are present in the feed - not all feeds have tags).
|
|
"""
|
|
channel = channel or ctx.channel
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
await ctx.send("That feed name doesn't exist in this channel.")
|
|
return
|
|
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
allowed_tags = feed_data[feed_name].get("allowed_tags", [])
|
|
if tag.lower() in [x.lower() for x in allowed_tags]:
|
|
return await ctx.send(
|
|
f"{bold(await self._title_case(tag))} is already in the allowed list for {bold(feed_name)}."
|
|
)
|
|
allowed_tags.append(tag.lower())
|
|
feed_data[feed_name]["allowed_tags"] = allowed_tags
|
|
|
|
await ctx.send(
|
|
f"{bold(await self._title_case(tag))} was added to the list of allowed tags for {bold(feed_name)}. "
|
|
"If a feed post's `$tags` does not include this value, the feed will not post."
|
|
)
|
|
|
|
@_rss_tag.command(name="allowlist")
|
|
async def _rss_tag_allowlist(self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None):
|
|
"""
|
|
List allowed tags for feed post qualification.
|
|
"""
|
|
channel = channel or ctx.channel
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
await ctx.send("That feed name doesn't exist in this channel.")
|
|
return
|
|
|
|
msg = f"[ Allowed Tags for {feed_name} ]\n\n\t"
|
|
allowed_tags = rss_feed.get("allowed_tags", [])
|
|
if not allowed_tags:
|
|
msg += "All tags are allowed."
|
|
else:
|
|
for tag in allowed_tags:
|
|
msg += f"{await self._title_case(tag)}\n"
|
|
|
|
await ctx.send(box(msg, lang="ini"))
|
|
|
|
@_rss_tag.command(name="remove", aliases=["delete"])
|
|
async def _rss_tag_remove(
|
|
self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None, *, tag: str = None
|
|
):
|
|
"""
|
|
Remove a tag from the allow list. The tag must match exactly (without regard to title casing).
|
|
No regex or placeholder qualification.
|
|
"""
|
|
channel = channel or ctx.channel
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
if not rss_feed:
|
|
await ctx.send("That feed name doesn't exist in this channel.")
|
|
return
|
|
|
|
async with self.config.channel(channel).feeds() as feed_data:
|
|
allowed_tags = feed_data[feed_name].get("allowed_tags", [])
|
|
try:
|
|
allowed_tags.remove(tag.lower())
|
|
feed_data[feed_name]["allowed_tags"] = allowed_tags
|
|
await ctx.send(
|
|
f"{bold(await self._title_case(tag))} was removed from the list of allowed tags for {bold(feed_name)}."
|
|
)
|
|
except ValueError:
|
|
await ctx.send(
|
|
f"{bold(await self._title_case(tag))} was not found in the allow list for {bold(feed_name)}."
|
|
)
|
|
|
|
@rss.command(name="template")
|
|
async def _rss_template(
|
|
self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None, *, template: str = None
|
|
):
|
|
"""
|
|
Set a template for the feed alert.
|
|
|
|
Each variable must start with $, valid variables can be found with `[p]rss listtags`.
|
|
"""
|
|
channel = channel or ctx.channel
|
|
channel_permission_check = await self._check_channel_permissions(ctx, channel)
|
|
if not channel_permission_check:
|
|
return
|
|
if not template:
|
|
await ctx.send_help()
|
|
return
|
|
template = template.replace("\\t", "\t")
|
|
template = template.replace("\\n", "\n")
|
|
success = await self._edit_template(ctx, feed_name, channel, template)
|
|
if success:
|
|
await ctx.send("Template added successfully.")
|
|
else:
|
|
await ctx.send("Feed not found!")
|
|
|
|
@rss.command(name="viewtags")
|
|
async def _rss_view_tags(self, ctx, feed_name: str, channel: Optional[GuildMessageable] = None):
|
|
"""View a preview of template tag content available from a specific feed."""
|
|
channel = channel or ctx.channel
|
|
channel_permission_check = await self._check_channel_permissions(ctx, channel)
|
|
if not channel_permission_check:
|
|
return
|
|
|
|
rss_feed = await self.config.channel(channel).feeds.get_raw(feed_name, default=None)
|
|
|
|
if not rss_feed:
|
|
await ctx.send("No feed with that name in this channel.")
|
|
return
|
|
|
|
async with ctx.typing():
|
|
await self._rss_view_tags_helper(ctx, rss_feed, feed_name)
|
|
|
|
async def _rss_view_tags_helper(self, ctx, rss_feed: dict, feed_name: str):
|
|
"""Helper function for rss viewtags."""
|
|
blue_ansi_prefix = "\u001b[1;40;34m"
|
|
reset_ansi_prefix = "\u001b[0m"
|
|
msg = f"{blue_ansi_prefix}[ Template Tag Content Preview for {feed_name} ]{reset_ansi_prefix}\n\n\t"
|
|
feedparser_obj = await self._fetch_feedparser_object(rss_feed["url"])
|
|
|
|
if not feedparser_obj:
|
|
await ctx.send("Couldn't fetch that feed.")
|
|
return
|
|
if feedparser_obj.entries:
|
|
# this feed has posts
|
|
feedparser_plus_obj = await self._add_to_feedparser_object(feedparser_obj.entries[0], rss_feed["url"])
|
|
else:
|
|
# this feed does not have posts, but it has a header with channel information
|
|
feedparser_plus_obj = await self._add_to_feedparser_object(feedparser_obj.feed, rss_feed["url"])
|
|
|
|
longest_key = max(feedparser_plus_obj, key=len)
|
|
longest_key_len = len(longest_key)
|
|
for tag_name, tag_content in sorted(feedparser_plus_obj.items()):
|
|
if tag_name in INTERNAL_TAGS:
|
|
# these tags attached to the rss feed object are for internal handling options
|
|
continue
|
|
|
|
tag_content = str(tag_content).replace("[", "").replace("]", "").replace("\n", " ").replace('"', "")
|
|
tag_content = tag_content.lstrip(" ")
|
|
|
|
space = "\N{SPACE}"
|
|
tag_name_padded = (
|
|
f"{blue_ansi_prefix}${tag_name}{reset_ansi_prefix}{space*(longest_key_len - len(tag_name))}"
|
|
)
|
|
if len(tag_content) > 50:
|
|
tag_content = tag_content[:50] + "..."
|
|
msg += f"{tag_name_padded} {tag_content}\n\t"
|
|
|
|
for msg_part in pagify(msg, delims=["\n\t", "\n\n"], page_length=1900):
|
|
await ctx.send(box(msg_part.rstrip("\n\t"), lang="ansi"))
|
|
|
|
@rss.command(name="version", hidden=True)
|
|
async def _rss_version(self, ctx):
|
|
"""Show the RSS version."""
|
|
await ctx.send(f"RSS version {__version__}")
|
|
|
|
async def get_current_feed(self, channel: GuildMessageable, name: str, rss_feed: dict, *, force: bool = False):
|
|
"""Takes an RSS feed and builds an object with all extra tags"""
|
|
log.debug(f"getting feed {name} on cid {channel.id}")
|
|
url = rss_feed["url"]
|
|
last_title = rss_feed["last_title"]
|
|
# last_link is a get for feeds saved before RSS 1.1.5 which won't have this attrib till it's checked once
|
|
last_link = rss_feed.get("last_link", None)
|
|
# last_time is a get for feeds saved before RSS 1.1.7 which won't have this attrib till it's checked once
|
|
last_time = rss_feed.get("last_time", None)
|
|
template = rss_feed["template"]
|
|
message = None
|
|
|
|
feedparser_obj = await self._fetch_feedparser_object(url)
|
|
if not feedparser_obj:
|
|
return
|
|
try:
|
|
log.debug(f"{feedparser_obj.error} Channel: {channel.id}")
|
|
return
|
|
except AttributeError:
|
|
pass
|
|
|
|
# sorting the entire feedparser object by updated_parsed time if it exists, if not then published_parsed
|
|
# certain feeds can be rearranged by a user, causing all posts to be out of sequential post order
|
|
# or some feeds are out of time order by default
|
|
if feedparser_obj.entries:
|
|
# this feed has posts
|
|
sorted_feed_by_post_time = await self._sort_by_post_time(feedparser_obj.entries)
|
|
else:
|
|
# this feed does not have posts, but it has a header with channel information
|
|
sorted_feed_by_post_time = [feedparser_obj.feed]
|
|
|
|
if not force:
|
|
entry_time = await self._time_tag_validation(sorted_feed_by_post_time[0])
|
|
if (last_time and entry_time) is not None:
|
|
if last_time > entry_time:
|
|
log.debug("Not posting because new entry is older than last saved entry.")
|
|
return
|
|
try:
|
|
title = sorted_feed_by_post_time[0].title
|
|
except AttributeError:
|
|
title = ""
|
|
try:
|
|
link = sorted_feed_by_post_time[0].link
|
|
except AttributeError:
|
|
link = ""
|
|
await self._update_last_scraped(channel, name, title, link, entry_time)
|
|
|
|
feedparser_plus_objects = []
|
|
for entry in sorted_feed_by_post_time:
|
|
# sometimes there's no title or no link attribute and feedparser doesn't really play nice with that
|
|
try:
|
|
entry_title = entry.title
|
|
except AttributeError:
|
|
entry_title = ""
|
|
try:
|
|
entry_link = entry.link
|
|
except AttributeError:
|
|
entry_link = ""
|
|
|
|
# find the updated_parsed (checked first) or an published_parsed tag if they are present
|
|
entry_time = await self._time_tag_validation(entry)
|
|
|
|
# we only need one feed entry if this is from rss force
|
|
if force:
|
|
feedparser_plus_obj = await self._add_to_feedparser_object(entry, url)
|
|
feedparser_plus_objects.append(feedparser_plus_obj)
|
|
break
|
|
|
|
# TODO: spammy debug logs to vvv
|
|
|
|
# there's a post time to compare
|
|
elif (entry_time and last_time) is not None:
|
|
# this is a post with an updated time with the same link and title, maybe an edited post.
|
|
# if a feed is spamming updated times with no content update, consider adding the full website
|
|
# (www.website.com) to the rss parse command
|
|
if (last_title == entry_title) and (last_link == entry_link) and (last_time < entry_time):
|
|
log.debug(f"New update found for an existing post in {name} on cid {channel.id}")
|
|
feedparser_plus_obj = await self._add_to_feedparser_object(entry, url)
|
|
feedparser_plus_objects.append(feedparser_plus_obj)
|
|
else:
|
|
# a post from the future, or we are caught up
|
|
if last_time >= entry_time:
|
|
log.debug(f"Up to date on {name} on cid {channel.id}")
|
|
break
|
|
|
|
# a new post
|
|
if last_link != entry_link:
|
|
log.debug(f"New entry found via time and link validation for feed {name} on cid {channel.id}")
|
|
feedparser_plus_obj = await self._add_to_feedparser_object(entry, url)
|
|
feedparser_plus_objects.append(feedparser_plus_obj)
|
|
|
|
else:
|
|
# I don't belive this ever should be hit but this is a catch to debug
|
|
# a feed in case one ever appears that does this
|
|
log.debug(
|
|
f"*** This post qualified via timestamp check but has the same link as last: {entry_title[:25]} | {entry_link}"
|
|
)
|
|
|
|
# this is a post that has no time comparison information because one or both timestamps are None.
|
|
# compare the title and link to see if it's the same post as previous.
|
|
# this may need more definition in the future if there is a feed that provides new titles but not new links etc
|
|
elif entry_time is None or last_time is None:
|
|
if last_title == entry_title and last_link == entry_link:
|
|
log.debug(f"Up to date on {name} on {channel.id} via link match, no time to compare")
|
|
break
|
|
else:
|
|
log.debug(f"New entry found for feed {name} on cid {channel.id} via new link or title")
|
|
feedparser_plus_obj = await self._add_to_feedparser_object(entry, url)
|
|
feedparser_plus_objects.append(feedparser_plus_obj)
|
|
|
|
# we found a match for a previous feed post
|
|
else:
|
|
log.debug(
|
|
f"Breaking rss entry loop for {name} on {channel.id}, we found where we are supposed to be caught up to"
|
|
)
|
|
break
|
|
|
|
# TODO: fix rss losing its place on on store.steampowered.com feeds/post lists
|
|
|
|
if len(feedparser_plus_objects) == len(sorted_feed_by_post_time):
|
|
msg = (f"Couldn't match anything for feed {name} on cid {channel.id}, or switching between feed header and feed entry, only posting 1 post")
|
|
log.debug(msg)
|
|
feedparser_plus_objects = [feedparser_plus_objects[0]]
|
|
|
|
if not feedparser_plus_objects:
|
|
# early-exit so that we don't dispatch when there's no updates
|
|
return
|
|
|
|
# post oldest first
|
|
feedparser_plus_objects.reverse()
|
|
|
|
# list of feedparser_plus_objects wrapped in MappingProxyType
|
|
# filled during the loop below
|
|
proxied_dicts = []
|
|
|
|
sent_message = False
|
|
for feedparser_plus_obj in feedparser_plus_objects:
|
|
try:
|
|
curr_title = feedparser_plus_obj.title
|
|
except AttributeError:
|
|
curr_title = ""
|
|
except IndexError:
|
|
log.debug(f"No entries found for feed {name} on cid {channel.id}")
|
|
return
|
|
|
|
# allowed tag verification section
|
|
allowed_tags = rss_feed.get("allowed_tags", [])
|
|
if len(allowed_tags) > 0:
|
|
allowed_post_tags = [x.lower() for x in allowed_tags]
|
|
feed_tag_list = [x.lower() for x in feedparser_plus_obj.get("tags_list", [])]
|
|
intersection = list(set(feed_tag_list).intersection(allowed_post_tags))
|
|
if len(intersection) == 0:
|
|
log.debug(
|
|
f"{name} feed post in {channel.name} ({channel.id}) was denied because of an allowed tag mismatch."
|
|
)
|
|
continue
|
|
|
|
# starting to fill out the template for feeds that passed tag verification (if present)
|
|
to_fill = QuietTemplate(template)
|
|
message = to_fill.quiet_safe_substitute(name=bold(name), **feedparser_plus_obj)
|
|
|
|
if len(message.strip(" ")) == 0:
|
|
message = None
|
|
|
|
if not message:
|
|
log.debug(f"{name} feed in {channel.name} ({channel.id}) has no valid tags, not posting anything.")
|
|
return
|
|
|
|
embed_toggle = rss_feed["embed"]
|
|
red_embed_settings = await self.bot.embed_requested(channel)
|
|
|
|
rss_limit = rss_feed.get("limit", 0)
|
|
if rss_limit > 0:
|
|
# rss_limit needs + 8 characters for pagify counting codeblock characters
|
|
message = list(pagify(message, delims=["\n", " "], priority=True, page_length=(rss_limit + 8)))[0]
|
|
|
|
if embed_toggle and red_embed_settings:
|
|
await self._get_current_feed_embed(channel, rss_feed, feedparser_plus_obj, message)
|
|
else:
|
|
for page in pagify(message, delims=["\n"]):
|
|
await channel.send(page)
|
|
sent_message = True
|
|
|
|
# This event can be used in 3rd-party using listeners.
|
|
# This may (and most likely will) get changes in the future
|
|
# so I suggest accepting **kwargs in the listeners using this event.
|
|
#
|
|
# channel: Union[discord.TextChannel, discord.VoiceChannel, discord.StageChannel, discord.Thread]
|
|
# The channel feed alert went to.
|
|
# feed_data: Mapping[str, Any]
|
|
# Read-only mapping with feed's data.
|
|
# The available data depends on what this cog needs
|
|
# and there most likely will be changes here in future.
|
|
# Available keys include: `name`, `template`, `url`, `embed`, etc.
|
|
# feedparser_dict: Mapping[str, Any]
|
|
# Read-only mapping with parsed data from the feed.
|
|
# See documentation of feedparser.FeedParserDict for more information.
|
|
# force: bool
|
|
# True if the update was forced (through `[p]rss force`), False otherwise.
|
|
feedparser_dict_proxy = MappingProxyType(feedparser_plus_obj)
|
|
proxied_dicts.append(feedparser_dict_proxy)
|
|
self.bot.dispatch(
|
|
"aikaternacogs_rss_message",
|
|
channel=channel,
|
|
feed_data=MappingProxyType(rss_feed),
|
|
feedparser_dict=feedparser_dict_proxy,
|
|
force=force,
|
|
)
|
|
|
|
if not sent_message:
|
|
return
|
|
|
|
# This event can be used in 3rd-party using listeners.
|
|
# This may (and most likely will) get changes in the future
|
|
# so I suggest accepting **kwargs in the listeners using this event.
|
|
#
|
|
# channel: Union[discord.TextChannel, discord.VoiceChannel, discord.StageChannel, discord.Thread]
|
|
# The channel feed alerts went to.
|
|
# feed_data: Mapping[str, Any]
|
|
# Read-only mapping with feed's data.
|
|
# The available data depends on what this cog needs
|
|
# and there most likely will be changes here in future.
|
|
# Available keys include: `name`, `template`, `url`, `embed`, etc.
|
|
# feedparser_dicts: List[Mapping[str, Any]]
|
|
# List of read-only mappings with parsed data
|
|
# from each **new** entry in the feed.
|
|
# See documentation of feedparser.FeedParserDict for more information.
|
|
# force: bool
|
|
# True if the update was forced (through `[p]rss force`), False otherwise.
|
|
self.bot.dispatch(
|
|
"aikaternacogs_rss_feed_update",
|
|
channel=channel,
|
|
feed_data=MappingProxyType(rss_feed),
|
|
feedparser_dicts=proxied_dicts,
|
|
force=force,
|
|
)
|
|
|
|
async def _get_current_feed_embed(
|
|
self,
|
|
channel: GuildMessageable,
|
|
rss_feed: dict,
|
|
feedparser_plus_obj: feedparser.util.FeedParserDict,
|
|
message: str,
|
|
):
|
|
embed_list = []
|
|
for page in pagify(message, delims=["\n"]):
|
|
embed = discord.Embed(description=page)
|
|
if rss_feed["embed_color"]:
|
|
color = int(rss_feed["embed_color"], 16)
|
|
embed.color = discord.Color(color)
|
|
embed_list.append(embed)
|
|
|
|
if len(embed_list) == 0:
|
|
return
|
|
|
|
# Add published timestamp to the last footer if it exists
|
|
time_tags = ["updated_parsed_datetime", "published_parsed_datetime"]
|
|
for time_tag in time_tags:
|
|
try:
|
|
published_time = feedparser_plus_obj[time_tag]
|
|
embed = embed_list[-1]
|
|
embed.timestamp = published_time
|
|
break
|
|
except KeyError:
|
|
pass
|
|
|
|
# Add embed image to last embed if it's set
|
|
try:
|
|
embed_image_tag = rss_feed["embed_image"]
|
|
embed_image_url = feedparser_plus_obj[embed_image_tag]
|
|
img_type = await self._validate_image(embed_image_url)
|
|
if img_type in VALID_IMAGES:
|
|
embed = embed_list[-1]
|
|
embed.set_image(url=embed_image_url)
|
|
except KeyError:
|
|
pass
|
|
|
|
# Add embed thumbnail to first embed if it's set
|
|
try:
|
|
embed_thumbnail_tag = rss_feed["embed_thumbnail"]
|
|
embed_thumbnail_url = feedparser_plus_obj[embed_thumbnail_tag]
|
|
img_type = await self._validate_image(embed_thumbnail_url)
|
|
if img_type in VALID_IMAGES:
|
|
embed = embed_list[0]
|
|
embed.set_thumbnail(url=embed_thumbnail_url)
|
|
except KeyError:
|
|
pass
|
|
|
|
for embed in embed_list:
|
|
await channel.send(embed=embed)
|
|
|
|
async def read_feeds(self):
|
|
"""Feed poster loop."""
|
|
await self.bot.wait_until_red_ready()
|
|
await self._put_feeds_in_queue()
|
|
self._post_queue_size = self._post_queue.qsize()
|
|
|
|
# TODO: very large queues with a lot of RSS feeds (1000+) cause this to fall behind
|
|
while True:
|
|
try:
|
|
queue_item = await self._get_next_in_queue()
|
|
if not queue_item:
|
|
# the queue is empty
|
|
config_data = await self.config.all_channels()
|
|
if not config_data:
|
|
# nothing to check
|
|
log.debug(f"Sleeping, nothing to do")
|
|
await asyncio.sleep(30)
|
|
continue
|
|
if self._post_queue_size < 300:
|
|
# less than 300 entries to check means 1/sec check times
|
|
# the wait is (5 min - entry count) before posting again
|
|
wait = 300 - self._post_queue_size
|
|
else:
|
|
# more than 300 entries means we used the whole 5 min
|
|
# to check and post feeds so don't wait any longer to start again
|
|
wait = 0
|
|
|
|
log.debug(f"Waiting {wait}s before starting...")
|
|
await asyncio.sleep(wait)
|
|
await self._put_feeds_in_queue()
|
|
if self._post_queue.qsize() > self._post_queue_size:
|
|
# there's been more feeds added so let's update the total size
|
|
# so feeds have the proper wait time @ > 300 feeds
|
|
log.debug(f"Updating total queue size to {self._post_queue.qsize()}")
|
|
self._post_queue_size = self._post_queue.qsize()
|
|
continue
|
|
else:
|
|
try:
|
|
# queue_item is a List of channel_priority: int, total_priority: int, queue_item: SimpleNamespace
|
|
await self.get_current_feed(
|
|
queue_item[2].channel, queue_item[2].feed_name, queue_item[2].feed_data
|
|
)
|
|
except aiohttp.client_exceptions.InvalidURL as e:
|
|
log.debug(f"Feed at {e.url} is bad or took too long to respond.")
|
|
continue
|
|
|
|
if self._post_queue_size < 300:
|
|
wait = 1
|
|
else:
|
|
wait = (300 - 10) / self._post_queue_size
|
|
log.debug(f"sleeping for {wait}...")
|
|
await asyncio.sleep(wait)
|
|
|
|
except asyncio.CancelledError:
|
|
break
|
|
except Exception as e:
|
|
log.error("An error has occurred in the RSS cog. Please report it.", exc_info=e)
|
|
continue
|
|
|
|
async def _put_feeds_in_queue(self):
|
|
log.debug("Putting feeds in queue")
|
|
try:
|
|
config_data = await self.config.all_channels()
|
|
total_index = 0
|
|
for channel_id, channel_feed_list in config_data.items():
|
|
channel = await self._get_channel_object(channel_id)
|
|
if not channel:
|
|
continue
|
|
|
|
if await self.bot.cog_disabled_in_guild(self, channel.guild):
|
|
continue
|
|
|
|
for feed_key, feed in channel_feed_list.items():
|
|
for feed_name, feed_data in feed.items():
|
|
rss_feed = SimpleNamespace(channel=channel, feed_name=feed_name, feed_data=feed_data)
|
|
keys = list(feed.keys())
|
|
channel_index = keys.index(feed_name)
|
|
total_index += 1
|
|
queue_entry = [channel_index, total_index, rss_feed]
|
|
log.debug(f"Putting {channel_index}-{total_index}-{channel}-{feed_name} in queue")
|
|
await self._post_queue.put(queue_entry)
|
|
|
|
except Exception as e:
|
|
log.exception(e, exc_info=e)
|
|
|
|
async def _get_next_in_queue(self):
|
|
try:
|
|
to_check = self._post_queue.get_nowait()
|
|
except asyncio.queues.QueueEmpty:
|
|
return None
|
|
return to_check
|
|
|
|
|
|
class NoFeedContent(Exception):
|
|
def __init__(self, m):
|
|
self.message = m
|
|
|
|
def __str__(self):
|
|
return self.message
|