feedgenerator

A simple tool to create various feeds
git clone https://git.ortlepp.eu/feedgenerator.git/
Log | Files | Refs | README | LICENSE

antenne_muenster.py (4736B)


      1 # -*- coding: utf-8 -*-
      2 
      3 import requests
      4 import datetime
      5 from bs4 import BeautifulSoup
      6 from common import AtomFeed, FeedItem
      7 
      8 
      9 class AntenneMuensterFeed():
     10 
     11     FEED_NAME = "Antenne Münster Newsticker"
     12     FEED_AUTHOR = "Antenne Münster"
     13     FEED_BASEURL = "https://www.antennemuenster.de/"
     14     FEED_ICON = "https://www.antennemuenster.de/assets/images/favicons/mmsantennemuenster/favicon.ico"
     15     FEED_LOGO = "https://www.antennemuenster.de/assets/images/senderlogos/antenne_muenster_sml.png"
     16     WEBSITE_URL = "https://www.antennemuenster.de/artikel/der-antenne-muenster-newsticker-618851.html"
     17 
     18 
     19     def __init__(self):
     20         pass
     21 
     22 
     23     def __parse_date(self, string):
     24         months = {"Januar": 1, "Februar": 2, "März": 3, "April": 4, "Mai": 5, "Juni": 6, "Juli": 7, 
     25                     "August": 8, "September": 9, "Okober": 10, "November": 11, "Dezember": 12}
     26         parts = string.strip().split(" ")
     27         try:
     28             return datetime.date(datetime.datetime.now().year, months[parts[2]], int(parts[1].replace(".", "")))
     29         except (IndexError, KeyError, ValueError):
     30             return datetime.date.today()
     31 
     32 
     33     def __parse_time(self, string):
     34         parts = string.strip().split(" ")[0].split(":")
     35         try:
     36             return datetime.time(int(parts[0]), int(parts[1]))
     37         except (IndexError, ValueError):
     38             return datetime.datetime.now().time()
     39 
     40 
     41     def __parse_title(self, string):
     42         string = string.strip()
     43         if string.endswith("."):
     44             string = string[0:len(string) - 1]
     45         if string.find("Uhr:") == -1:
     46             if string.find("Uhr") == -1:
     47                 return string
     48             else:
     49                 return string.split("Uhr", 1)[1].strip()
     50         else:
     51             return string.split("Uhr:")[1].strip()
     52 
     53     
     54     def create_feed(self, feedfile, maxitems):
     55         feed = AtomFeed(self.FEED_NAME, self.FEED_AUTHOR, self.FEED_BASEURL, datetime.datetime.now(), self.FEED_ICON, self.FEED_LOGO)
     56 
     57         try:
     58             request = requests.get(self.WEBSITE_URL)
     59 
     60             if request.status_code == 200:
     61                 html = BeautifulSoup(request.text, "html.parser")
     62                 content = html.select_one(".article__details > div > div").children
     63 
     64                 current_date = datetime.date.today()
     65                 current_time = datetime.datetime.now().time()
     66                 current_title = ""
     67                 current_content = ""
     68 
     69                 added = 0
     70 
     71                 for element in content:
     72                     if element.name == "script" or element.name == "h4":
     73                         continue
     74 
     75                     if str(element).startswith("<div class=\"section\">") or str(element).startswith("<div class=\"row carousel-slider"):
     76                         continue
     77 
     78                     if element.name == "h3":
     79                         current_date = self.__parse_date(element.text)
     80                         continue
     81 
     82                     if str(element).startswith("<p><strong>"):
     83                         if current_content != "":
     84                             tmp_datetime = datetime.datetime.combine(current_date, current_time)
     85                             feed.add_item(FeedItem(current_title, tmp_datetime, self.FEED_AUTHOR, current_content, self.WEBSITE_URL))
     86                             current_content = ""
     87                             added += 1
     88                         if added == maxitems:
     89                             break
     90                         current_time = self.__parse_time(element.text)
     91                         current_title = self.__parse_title(element.text)
     92                         continue
     93 
     94                     if str(element).startswith("<div class=\"card photoswipe-item\">"):
     95                         start = str(element).find("<figcaption>")
     96                         end = str(element).find("</figcaption>") + len("</figcaption>")
     97                         delete = str(element)[start:end]
     98                         current_content += str(element).replace(delete, "")
     99                         continue
    100 
    101                     current_content += str(element)
    102 
    103             else:
    104                 error_title = "Feed creation failed"
    105                 error_content = "<p>HTTP status code was " + str(request.status_code) + "</p>"
    106                 feed.add_item(FeedItem(error_title, datetime.datetime.now(), self.FEED_AUTHOR, error_content, self.WEBSITE_URL))
    107 
    108             feed.set_updated(feed.get_item(0).get_date())
    109 
    110         except:
    111             error_title = "Feed creation failed"
    112             error_content = "<p>Error while fetching the website</p>"
    113             feed.add_item(FeedItem(error_title, datetime.datetime.now(), self.FEED_AUTHOR, error_content, self.WEBSITE_URL))
    114 
    115         return feed.write_feed(feedfile)