prune: no tidy: no date: //article//time[@itemprop="datePublished"]/@datetime date: //p[@class='news_datum'] author: //article//span[@itemprop="author"] author: //span[@class='author'] author: //div[@class="article__author"] # print version (if single_page_link is followed) body: //article[contains(@class, 'printversion')] # regular body: //article[@id="meldung"] | //div[@class='meldung_wrapper'] | //section[@id='artikel_text'] | //article[contains(@class, 'xp__article')] # for heise.de/select (e-paper) body: //article[contains(concat(' ',normalize-space(@class),' '),' article ')] # General cleanup strip: //nav strip: //time strip: //h1[@class='article__heading'] strip: //h4[@class='author'] strip: //div[@class='gallery compact']/h3 strip: //div[@class='gallery compact']/figcaption strip: //div[@class='publish-info'] strip: //p[@class='news_datum'] strip: //p[@class='artikel_datum'] strip: //p[@class='news_navi'] strip: //p[@class='printversion'] strip: //a[contains(@href, 'mailto')]/@href strip: //div[@class='gallery compact']/h2 strip: //p[@class='themen_foren'] strip: //style strip: //span[@class='source'] strip: //figure[@class='printversion__logo'] strip: //figure[@class='branding'] strip: //a-ad strip: //footer strip: //div/section[@data-component='TeaserList']/ancestor::div[1] strip_id_or_class: comments strip_id_or_class: ISI_IGNORE strip_id_or_class: printversion--hide strip_id_or_class: linkurl_grossbild strip_id_or_class: image-num strip_id_or_class: heisebox_right strip_id_or_class: dossier strip_id_or_class: latest_posting_snippet strip_id_or_class: a-pvgs strip_id_or_class: a-pvg__body strip_id_or_class: teaser_adliste strip_id_or_class: a-article-header__service strip_id_or_class: opinary-widget-embed strip_id_or_class: article-sidebar strip_id_or_class: a-box__header strip_id_or_class: a-article-teaser strip_id_or_class: article-header strip_id_or_class: a-box__target strip_id_or_class: printversion__untertitel # strip opt-in-question to load external content (Youtube, Twitter, Mastodon) strip: //a-opt-in # Strip Ads strip_id_or_class: ad_ strip_id_or_class: a-box # Strip related articles strip: //a-collapse # Fix lazy-loaded images (even after JS processing) # Test URL: https://www.heise.de/hintergrund/James-Bond-im-Kino-60-Jahre-007-Autos-und-verrueckte-Gadgets-7288360.html?seite=all replace_string(): strip: //div[@class="ff-a-img"]//img[not(@data-ff-replacement)] # Some optimizations replace_string(Druckversion - ): <title> replace_string( | heise online): replace_string( | c't Magazin): replace_string( | Telepolis): replace_string( | heise Security): replace_string( | heise Autos): replace_string(
):
single_page_link: //a[contains(@href, '?view=print')] single_page_link: //a[contains(@title, 'Druck')] single_page_link: //a[contains(@href, '?seite=all')]/@href next_page_link: //a[@class='next' and not(contains(text(), 'Artikel'))] next_page_link: //a[@title='vor'] next_page_link: //a[@rel='next'] # # for wallabag only. Do NOT set your credentials here, do this in wallabag's UI: # requires_login: yes login_uri: https://www.heise.de/sso/login/login login_username_field: username login_password_field: password not_logged_in_xpath: //a[@id='heiselogin-link-login'] test_url: https://www.heise.de/select/ct/2020/3/1580493780857147 test_url: https://www.heise.de/news/Ueberwachungstechnik-Die-globale-Handy-Standortueberwachung-2301494.html test_url: https://www.heise.de/news/Bodenradar-fuer-selbstfahrende-Autos-horcht-unter-die-Strasse-3273941.html test_url: https://www.heise.de/tp/artikel/49/49473/1.html test_url: https://www.heise.de/ct/artikel/Die-Neuerungen-von-Linux-3-15-2196231.html test_url: https://heise.de/-3527918