feedgenerator

A simple tool to create various feeds
git clone https://git.ortlepp.eu/feedgenerator.git/
Log | Files | Refs | README | LICENSE

commit 6695676b0b7804fef8aa2df1890efe1bbf8d2959
Author: Thorsten Ortlepp <post@ortlepp.eu>
Date:   Fri, 10 Dec 2021 22:43:48 +0100

Initial commit

Diffstat:
ALICENSE | 21+++++++++++++++++++++
AREADME | 28++++++++++++++++++++++++++++
A__main__.py | 12++++++++++++
Aantenne_muenster.py | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acommon.py | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Arequirements.txt | 4++++
6 files changed, 284 insertions(+), 0 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright 2021 Thorsten Ortlepp <hello.world@ortlepp.eu> + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README b/README @@ -0,0 +1,28 @@ +feedgenerator +============= + +A simple tool to create various feeds. + + +Feeds feedgenerator can create +------------------------------ +- Antenne Münster: Create a feed from the news website of a local radio station + + +Requirements +------------ +All you need is a recent version of Python 3. See requirements.txt for further +required libraries. If you are using Python 3.9 or newer, you do not need +backports.zoneinfo. + + +Building & Distribution +----------------------- +I use zipapp to run feedgenerator on my server. Building the app is easy: + +pip3 install -r feedgenerator/requirements.txt --target feedgenerator/ +python3 -m zipapp feedgenerator/ + +Executing feedgenerator after building: + +python3 feedgenerator.pyz diff --git a/__main__.py b/__main__.py @@ -0,0 +1,12 @@ +import sys +from antenne_muenster import AntenneMuensterFeed + +OUTPUT_DIR = "." + +returncodes = [] + +am_feed = AntenneMuensterFeed() +returncodes.append(am_feed.create_feed(OUTPUT_DIR + "/antenne_muenster.xml", 20)) + +if False in returncodes: + sys.exit(1) diff --git a/antenne_muenster.py b/antenne_muenster.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- + +import requests +import datetime +from bs4 import BeautifulSoup +from common import AtomFeed, FeedItem + + +class AntenneMuensterFeed(): + + FEED_NAME = "Antenne Münster Newsticker" + FEED_AUTHOR = "Antenne Münster" + FEED_BASEURL = "https://www.antennemuenster.de/" + FEED_ICON = "https://www.antennemuenster.de/assets/images/favicons/mmsantennemuenster/favicon.ico" + FEED_LOGO = "https://www.antennemuenster.de/assets/images/senderlogos/antenne_muenster_sml.png" + WEBSITE_URL = "https://www.antennemuenster.de/artikel/der-antenne-muenster-newsticker-618851.html" + + + def __init__(self): + pass + + + def __parse_date(self, string): + months = {"Januar": 1, "Februar": 2, "März": 3, "April": 4, "Mai": 5, "Juni": 6, "Juli": 7, + "August": 8, "September": 9, "Okober": 10, "November": 11, "Dezember": 12} + parts = string.strip().split(" ") + try: + return datetime.date(datetime.datetime.now().year, months[parts[2]], int(parts[1].replace(".", ""))) + except (IndexError, KeyError, ValueError): + return datetime.date.today() + + + def __parse_time(self, string): + parts = string.strip().split(" ")[0].split(":") + try: + return datetime.time(int(parts[0]), int(parts[1])) + except (IndexError, ValueError): + return datetime.datetime.now().time() + + + def __parse_title(self, string): + if string.find("Uhr:") == -1: + return string.strip() + else: + return string.split("Uhr:")[1].strip() + + + def create_feed(self, feedfile, maxitems): + feed = AtomFeed(self.FEED_NAME, self.FEED_AUTHOR, self.FEED_BASEURL, datetime.datetime.now(), self.FEED_ICON, self.FEED_LOGO) + + try: + request = requests.get(self.WEBSITE_URL) + + if request.status_code == 200: + html = BeautifulSoup(request.text, "html.parser") + content = html.select_one(".article__details > div > div").children + + current_date = datetime.date.today() + current_time = datetime.datetime.now().time() + current_title = "" + current_content = "" + + added = 0 + + for element in content: + if element.name == "script" or str(element).startswith("<div class=\"section\">"): + continue + + if element.name == "h3": + current_date = self.__parse_date(element.text) + continue + + if str(element).startswith("<p><strong>"): + if current_content != "": + tmp_datetime = datetime.datetime.combine(current_date, current_time) + feed.add_item(FeedItem(current_title, tmp_datetime, self.FEED_AUTHOR, current_content, self.WEBSITE_URL)) + current_content = "" + added += 1 + if added == maxitems: + break + current_time = self.__parse_time(element.text) + current_title = self.__parse_title(element.text) + continue + + if str(element).startswith("<div class=\"card photoswipe-item\">"): + start = str(element).find("<figcaption>") + end = str(element).find("</figcaption>") + len("</figcaption>") + delete = str(element)[start:end] + current_content += str(element).replace(delete, "") + continue + + current_content += str(element) + + else: + error_title = "Feed creation failed" + error_content = "<p>HTTP status code was " + str(request.status_code) + "</p>" + feed.add_item(FeedItem(error_title, datetime.datetime.now(), self.FEED_AUTHOR, error_content, self.WEBSITE_URL)) + + feed.set_updated(feed.get_item(0).get_date()) + + except: + error_title = "Feed creation failed" + error_content = "<p>Error while fetching the website</p>" + feed.add_item(FeedItem(error_title, datetime.datetime.now(), self.FEED_AUTHOR, error_content, self.WEBSITE_URL)) + + return feed.write_feed(feedfile) diff --git a/common.py b/common.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- + +import hashlib +import html + +try: + import zoneinfo +except ImportError: + from backports import zoneinfo + + +class FeedItem: + + def __init__(self, title, date, author, content, url): + self.__title = title + self.__date = date + self.__author = author + self.__content = content + self.__url = url + + def __str__(self): + return self.__title + " (" + str(self.__date) + ")" + + def get_title(self): + return self.__title + + def get_date(self): + return self.__date + + def get_author(self): + return self.__author + + def get_content(self): + return self.__content + + def get_url(self): + return self.__url + + + +class AtomFeed: + + def __init__(self, title, author, baseurl, updated, icon, logo): + self.__title = title + self.__author = author + self.__baseurl = baseurl + self.__updated = updated + self.__icon = icon + self.__logo = logo + self.__items = [] + + + def __format_datetime(self, datetime): + utc_datetime = datetime.astimezone(zoneinfo.ZoneInfo('UTC')) + return utc_datetime.isoformat() + + + def __create_element(self, *args): + if len(args) == 2: + return "<{0}>{1}</{0}>\n".format(args[0], args[1]) + elif len(args) == 3: + return "<{0} {1}=\"{2}\" />\n".format(args[0], args[1], args[2]) + elif len(args) == 4: + return "<{0} {1}=\"{2}\">{3}</{0}>\n".format(args[0], args[2], args[3], args[1]) + else: + return "" + + + def __feed_item(self, item): + atom_title = self.__create_element("title", item.get_title()) + atom_link = self.__create_element("link", "href", item.get_url()) + atom_id = self.__create_element("id", "urn:uuid:" + hashlib.sha256(item.get_title().encode()).hexdigest()) + atom_author = self.__create_element("author", self.__create_element("name", item.get_author())) + atom_updated = self.__create_element("updated", self.__format_datetime(item.get_date())) + atom_content = self.__create_element("content", html.escape(item.get_content()), "type", "html") + return self.__create_element("entry", atom_title + atom_link + atom_id + atom_author + atom_updated + atom_content) + + + def add_item(self, item): + self.__items.append(item) + + + def get_item(self, index): + return self.__items[index] + + + def set_updated(self, updated): + self.__updated = updated + + + def write_feed(self, feedfile): + atom_title = self.__create_element("title", self.__title) + atom_link = self.__create_element("link", "href", self.__baseurl) + atom_id = self.__create_element("id", "urn:uuid:" + hashlib.sha256(self.__baseurl.encode()).hexdigest()) + atom_author = self.__create_element("author", self.__create_element("name", self.__author)) + atom_updated = self.__create_element("updated", self.__format_datetime(self.__updated)) + atom_icon = self.__create_element("icon", self.__icon) + atom_logo = self.__create_element("logo", self.__logo) + + try: + with open(feedfile, "w", encoding="utf-8") as file: + file.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n") + file.write("<feed xmlns=\"http://www.w3.org/2005/Atom\">\n") + file.write(atom_title + atom_link + atom_id + atom_author + atom_updated + atom_icon + atom_logo) + for item in self.__items: + file.write(self.__feed_item(item)) + self.__format_datetime(item.get_date()) + file.write("</feed>\n") + except PermissionError: + print("No permission to write " + feedfile) + return False + + return True diff --git a/requirements.txt b/requirements.txt @@ -0,0 +1,4 @@ +requests +bs4 +tzdata +backports.zoneinfo