commit 6695676b0b7804fef8aa2df1890efe1bbf8d2959
Author: Thorsten Ortlepp <post@ortlepp.eu>
Date: Fri, 10 Dec 2021 22:43:48 +0100
Initial commit
Diffstat:
6 files changed, 284 insertions(+), 0 deletions(-)
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright 2021 Thorsten Ortlepp <hello.world@ortlepp.eu>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/README b/README
@@ -0,0 +1,28 @@
+feedgenerator
+=============
+
+A simple tool to create various feeds.
+
+
+Feeds feedgenerator can create
+------------------------------
+- Antenne Münster: Create a feed from the news website of a local radio station
+
+
+Requirements
+------------
+All you need is a recent version of Python 3. See requirements.txt for further
+required libraries. If you are using Python 3.9 or newer, you do not need
+backports.zoneinfo.
+
+
+Building & Distribution
+-----------------------
+I use zipapp to run feedgenerator on my server. Building the app is easy:
+
+pip3 install -r feedgenerator/requirements.txt --target feedgenerator/
+python3 -m zipapp feedgenerator/
+
+Executing feedgenerator after building:
+
+python3 feedgenerator.pyz
diff --git a/__main__.py b/__main__.py
@@ -0,0 +1,12 @@
+import sys
+from antenne_muenster import AntenneMuensterFeed
+
+OUTPUT_DIR = "."
+
+returncodes = []
+
+am_feed = AntenneMuensterFeed()
+returncodes.append(am_feed.create_feed(OUTPUT_DIR + "/antenne_muenster.xml", 20))
+
+if False in returncodes:
+ sys.exit(1)
diff --git a/antenne_muenster.py b/antenne_muenster.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+import requests
+import datetime
+from bs4 import BeautifulSoup
+from common import AtomFeed, FeedItem
+
+
+class AntenneMuensterFeed():
+
+ FEED_NAME = "Antenne Münster Newsticker"
+ FEED_AUTHOR = "Antenne Münster"
+ FEED_BASEURL = "https://www.antennemuenster.de/"
+ FEED_ICON = "https://www.antennemuenster.de/assets/images/favicons/mmsantennemuenster/favicon.ico"
+ FEED_LOGO = "https://www.antennemuenster.de/assets/images/senderlogos/antenne_muenster_sml.png"
+ WEBSITE_URL = "https://www.antennemuenster.de/artikel/der-antenne-muenster-newsticker-618851.html"
+
+
+ def __init__(self):
+ pass
+
+
+ def __parse_date(self, string):
+ months = {"Januar": 1, "Februar": 2, "März": 3, "April": 4, "Mai": 5, "Juni": 6, "Juli": 7,
+ "August": 8, "September": 9, "Okober": 10, "November": 11, "Dezember": 12}
+ parts = string.strip().split(" ")
+ try:
+ return datetime.date(datetime.datetime.now().year, months[parts[2]], int(parts[1].replace(".", "")))
+ except (IndexError, KeyError, ValueError):
+ return datetime.date.today()
+
+
+ def __parse_time(self, string):
+ parts = string.strip().split(" ")[0].split(":")
+ try:
+ return datetime.time(int(parts[0]), int(parts[1]))
+ except (IndexError, ValueError):
+ return datetime.datetime.now().time()
+
+
+ def __parse_title(self, string):
+ if string.find("Uhr:") == -1:
+ return string.strip()
+ else:
+ return string.split("Uhr:")[1].strip()
+
+
+ def create_feed(self, feedfile, maxitems):
+ feed = AtomFeed(self.FEED_NAME, self.FEED_AUTHOR, self.FEED_BASEURL, datetime.datetime.now(), self.FEED_ICON, self.FEED_LOGO)
+
+ try:
+ request = requests.get(self.WEBSITE_URL)
+
+ if request.status_code == 200:
+ html = BeautifulSoup(request.text, "html.parser")
+ content = html.select_one(".article__details > div > div").children
+
+ current_date = datetime.date.today()
+ current_time = datetime.datetime.now().time()
+ current_title = ""
+ current_content = ""
+
+ added = 0
+
+ for element in content:
+ if element.name == "script" or str(element).startswith("<div class=\"section\">"):
+ continue
+
+ if element.name == "h3":
+ current_date = self.__parse_date(element.text)
+ continue
+
+ if str(element).startswith("<p><strong>"):
+ if current_content != "":
+ tmp_datetime = datetime.datetime.combine(current_date, current_time)
+ feed.add_item(FeedItem(current_title, tmp_datetime, self.FEED_AUTHOR, current_content, self.WEBSITE_URL))
+ current_content = ""
+ added += 1
+ if added == maxitems:
+ break
+ current_time = self.__parse_time(element.text)
+ current_title = self.__parse_title(element.text)
+ continue
+
+ if str(element).startswith("<div class=\"card photoswipe-item\">"):
+ start = str(element).find("<figcaption>")
+ end = str(element).find("</figcaption>") + len("</figcaption>")
+ delete = str(element)[start:end]
+ current_content += str(element).replace(delete, "")
+ continue
+
+ current_content += str(element)
+
+ else:
+ error_title = "Feed creation failed"
+ error_content = "<p>HTTP status code was " + str(request.status_code) + "</p>"
+ feed.add_item(FeedItem(error_title, datetime.datetime.now(), self.FEED_AUTHOR, error_content, self.WEBSITE_URL))
+
+ feed.set_updated(feed.get_item(0).get_date())
+
+ except:
+ error_title = "Feed creation failed"
+ error_content = "<p>Error while fetching the website</p>"
+ feed.add_item(FeedItem(error_title, datetime.datetime.now(), self.FEED_AUTHOR, error_content, self.WEBSITE_URL))
+
+ return feed.write_feed(feedfile)
diff --git a/common.py b/common.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+
+import hashlib
+import html
+
+try:
+ import zoneinfo
+except ImportError:
+ from backports import zoneinfo
+
+
+class FeedItem:
+
+ def __init__(self, title, date, author, content, url):
+ self.__title = title
+ self.__date = date
+ self.__author = author
+ self.__content = content
+ self.__url = url
+
+ def __str__(self):
+ return self.__title + " (" + str(self.__date) + ")"
+
+ def get_title(self):
+ return self.__title
+
+ def get_date(self):
+ return self.__date
+
+ def get_author(self):
+ return self.__author
+
+ def get_content(self):
+ return self.__content
+
+ def get_url(self):
+ return self.__url
+
+
+
+class AtomFeed:
+
+ def __init__(self, title, author, baseurl, updated, icon, logo):
+ self.__title = title
+ self.__author = author
+ self.__baseurl = baseurl
+ self.__updated = updated
+ self.__icon = icon
+ self.__logo = logo
+ self.__items = []
+
+
+ def __format_datetime(self, datetime):
+ utc_datetime = datetime.astimezone(zoneinfo.ZoneInfo('UTC'))
+ return utc_datetime.isoformat()
+
+
+ def __create_element(self, *args):
+ if len(args) == 2:
+ return "<{0}>{1}</{0}>\n".format(args[0], args[1])
+ elif len(args) == 3:
+ return "<{0} {1}=\"{2}\" />\n".format(args[0], args[1], args[2])
+ elif len(args) == 4:
+ return "<{0} {1}=\"{2}\">{3}</{0}>\n".format(args[0], args[2], args[3], args[1])
+ else:
+ return ""
+
+
+ def __feed_item(self, item):
+ atom_title = self.__create_element("title", item.get_title())
+ atom_link = self.__create_element("link", "href", item.get_url())
+ atom_id = self.__create_element("id", "urn:uuid:" + hashlib.sha256(item.get_title().encode()).hexdigest())
+ atom_author = self.__create_element("author", self.__create_element("name", item.get_author()))
+ atom_updated = self.__create_element("updated", self.__format_datetime(item.get_date()))
+ atom_content = self.__create_element("content", html.escape(item.get_content()), "type", "html")
+ return self.__create_element("entry", atom_title + atom_link + atom_id + atom_author + atom_updated + atom_content)
+
+
+ def add_item(self, item):
+ self.__items.append(item)
+
+
+ def get_item(self, index):
+ return self.__items[index]
+
+
+ def set_updated(self, updated):
+ self.__updated = updated
+
+
+ def write_feed(self, feedfile):
+ atom_title = self.__create_element("title", self.__title)
+ atom_link = self.__create_element("link", "href", self.__baseurl)
+ atom_id = self.__create_element("id", "urn:uuid:" + hashlib.sha256(self.__baseurl.encode()).hexdigest())
+ atom_author = self.__create_element("author", self.__create_element("name", self.__author))
+ atom_updated = self.__create_element("updated", self.__format_datetime(self.__updated))
+ atom_icon = self.__create_element("icon", self.__icon)
+ atom_logo = self.__create_element("logo", self.__logo)
+
+ try:
+ with open(feedfile, "w", encoding="utf-8") as file:
+ file.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n")
+ file.write("<feed xmlns=\"http://www.w3.org/2005/Atom\">\n")
+ file.write(atom_title + atom_link + atom_id + atom_author + atom_updated + atom_icon + atom_logo)
+ for item in self.__items:
+ file.write(self.__feed_item(item))
+ self.__format_datetime(item.get_date())
+ file.write("</feed>\n")
+ except PermissionError:
+ print("No permission to write " + feedfile)
+ return False
+
+ return True
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,4 @@
+requests
+bs4
+tzdata
+backports.zoneinfo