Revert "drop gocomics and other inactive af_comics filters"

This reverts commit 5f064b4477.
This commit is contained in:
Andrew Dolgov
2025-05-17 08:02:42 +03:00
parent d0d90e4ec8
commit 2e50f96901
5 changed files with 336 additions and 0 deletions

View File

@@ -0,0 +1,71 @@
<?php
class Af_Comics_Dilbert extends Af_ComicFilter {
function supported() {
return array("Dilbert");
}
function process(&$article) {
if (str_contains($article["link"], "dilbert.com") ||
str_contains($article["link"], "/DilbertDailyStrip")) {
$res = UrlHelper::fetch([
'url' => $article['link'],
'useragent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0',
]);
if (!$res && UrlHelper::$fetch_last_error_content)
$res = UrlHelper::$fetch_last_error_content;
$doc = new DOMDocument();
if ($res && $doc->loadHTML($res)) {
$xpath = new DOMXPath($doc);
/** @var DOMElement|null $basenode (image container) */
$basenode = $xpath->query('(//div[@class="img-comic-container"]/a[@class="img-comic-link"])')->item(0);
// Get the comic title
$comic_title = $xpath->query('(//span[@class="comic-title-name"])')->item(0)->textContent;
// Get tags from the article
$matches = $xpath->query('(//p[contains(@class, "comic-tags")][1]//a)');
$tags = array();
foreach ($matches as $tag) {
// Only strings starting with a number sign are considered tags
if (str_starts_with($tag->textContent, '#')) {
$tags[] = mb_strtolower(substr($tag->textContent, 1), 'utf-8');
}
}
// Get the current comics transcript and set it
// as the title so it will be visible on mousover
$transcript = $xpath->query('(//div[starts-with(@id, "js-toggle-transcript-")]//p)')->item(0);
if ($transcript) {
$basenode->setAttribute("title", $transcript->textContent);
}
if ($basenode) {
$article["content"] = $doc->saveHTML($basenode);
}
// Add comic title to article type if not empty (mostly Sunday strips)
if ($comic_title) {
$article["title"] = $article["title"] . " - " . $comic_title;
}
if (!empty($tags)) {
// Ignore existing tags and just replace them all
$article["tags"] = array_unique($tags);
}
}
return true;
}
return false;
}
}
?>

View File

@@ -0,0 +1,28 @@
<?php
class Af_Comics_Explosm extends Af_ComicFilter {
function supported() {
return array("Cyanide and Happiness");
}
function process(&$article) {
if (str_contains($article["link"], "explosm.net/comics")) {
$doc = new DOMDocument();
if (@$doc->loadHTML(UrlHelper::fetch(['url' => $article['link']]))) {
$xpath = new DOMXPath($doc);
$basenode = $xpath->query('//div[contains(@class, "MainComic__ComicImage")]//img')->item(0);
if ($basenode) {
$article["content"] = $doc->saveHTML($basenode);
}
}
return true;
}
return false;
}
}

View File

@@ -0,0 +1,98 @@
<?php
class Af_Comics_Gocomics extends Af_ComicFilter {
function supported() {
return ["GoComics (see note below)"];
}
function process(&$article) {
return false;
}
public function on_subscribe($url) {
if (preg_match('#^https?://www\.gocomics\.com/([-a-z0-9]+)$#i', $url))
return '<?xml version="1.0" encoding="utf-8"?>'; // Get _is_html() to return false.
else
return false;
}
public function on_basic_info($url) {
if (preg_match('#^https?://www\.gocomics\.com/([-a-z0-9]+)$#i', $url, $matches))
return ['title' => ucfirst($matches[1]), 'site_url' => $matches[0]];
else
return false;
}
public function on_fetch($url) {
if (preg_match('#^https?://(?:feeds\.feedburner\.com/uclick|www\.gocomics\.com)/([-a-z0-9]+)$#i', $url, $comic)) {
$site_url = 'https://www.gocomics.com/' . $comic[1];
// no date suffix here since /whatever/$TODAY redirects to /whatever
$article_link = $site_url;
$body = UrlHelper::fetch(array('url' => $article_link, 'type' => 'text/html', 'followlocation' => false));
$feed_title = htmlspecialchars($comic[1]);
$site_url = htmlspecialchars($site_url);
// add the date suffix here to uniquely identify the "article" and provide the permalink
$article_link = htmlspecialchars($article_link) . date('/Y/m/d');
$tpl = new Templator();
$tpl->readTemplateFromFile('templates/generated_feed.txt');
$tpl->setVariable('FEED_TITLE', $feed_title, true);
$tpl->setVariable('VERSION', Config::get_version(), true);
$tpl->setVariable('FEED_URL', htmlspecialchars($url), true);
$tpl->setVariable('SELF_URL', $site_url, true);
if ($body) {
$doc = new DOMDocument();
if (@$doc->loadHTML($body)) {
$xpath = new DOMXPath($doc);
/** @var DOMElement|null $node */
$node = $xpath->query('//button[@aria-label="Expand comic"]/img')->item(0);
if ($node) {
$title = $xpath->query('//h1')->item(0);
if ($title) {
$title = clean(trim($title->nodeValue));
} else {
$title = date('l, F d, Y');
}
foreach (['srcset', 'sizes', 'data-srcset', 'width'] as $attr ) {
$node->removeAttribute($attr);
}
$tpl->setVariable('ARTICLE_ID', $article_link, true);
$tpl->setVariable('ARTICLE_LINK', $article_link, true);
$tpl->setVariable('ARTICLE_UPDATED_ATOM', date('c', mktime(11, 0, 0)), true);
$tpl->setVariable('ARTICLE_TITLE', htmlspecialchars($title), true);
$tpl->setVariable('ARTICLE_EXCERPT', '', true);
$tpl->setVariable('ARTICLE_CONTENT', $doc->saveHTML($node), true);
$tpl->setVariable('ARTICLE_AUTHOR', '', true);
$tpl->setVariable('ARTICLE_SOURCE_LINK', $site_url, true);
$tpl->setVariable('ARTICLE_SOURCE_TITLE', $feed_title, true);
$tpl->addBlock('entry');
}
}
}
$tpl->addBlock('feed');
if ($tpl->generateOutputToString($tmp_data))
return $tmp_data;
}
return false;
}
}

View File

@@ -0,0 +1,108 @@
<?php
class Af_Comics_Gocomics_FarSide extends Af_ComicFilter {
function supported() {
return ["The Far Side (needs cache media)"];
}
function process(&$article) {
return false;
}
public function on_subscribe($url) {
if (preg_match("#^https?://www\.thefarside\.com#", $url))
return '<?xml version="1.0" encoding="utf-8"?>'; // Get _is_html() to return false.
else
return false;
}
public function on_basic_info($url) {
if (preg_match("#^https?://www.thefarside.com/#", $url))
return ['title' => "The Far Side", 'site_url' => 'https://www.thefarside.com'];
else
return false;
}
public function on_fetch($url) {
if (preg_match("#^https?://www\.thefarside\.com#", $url)) {
$article_link = htmlspecialchars("https://www.thefarside.com" . date('/Y/m/d'));
$tpl = new Templator();
$tpl->readTemplateFromFile('templates/generated_feed.txt');
$tpl->setVariable('FEED_TITLE', "The Far Side", true);
$tpl->setVariable('VERSION', Config::get_version(), true);
$tpl->setVariable('FEED_URL', htmlspecialchars($url), true);
$tpl->setVariable('SELF_URL', htmlspecialchars($url), true);
$body = UrlHelper::fetch(['url' => $article_link, 'type' => 'text/html', 'followlocation' => false]);
if ($body) {
$doc = new DOMDocument();
if (@$doc->loadHTML($body)) {
$xpath = new DOMXPath($doc);
$content_node = $xpath->query('//*[contains(@class,"js-daily-dose")]')->item(0);
if ($content_node) {
$imgs = $xpath->query('//img[@data-src]', $content_node);
$cache = DiskCache::instance("images");
foreach ($imgs as $img) {
$image_url = $img->getAttribute('data-src');
$local_filename = sha1($image_url);
if ($image_url) {
$img->setAttribute('src', $image_url);
// try to cache image locally because they just 401 us otherwise
if (!$cache->exists($local_filename)) {
Debug::log("[Af_Comics_Gocomics_FarSide] caching: $image_url", Debug::LOG_VERBOSE);
$res = $cache->download($image_url, sha1($image_url), ["http_referrer" => $image_url]);
Debug::log("[Af_Comics_Gocomics_FarSide] cache result: $res", Debug::LOG_VERBOSE);
}
}
}
$junk_elems = $xpath->query("//*[@data-shareable-popover]");
foreach ($junk_elems as $junk)
$junk->parentNode->removeChild($junk);
$title = $xpath->query('//h3')->item(0);
if ($title) {
$title = clean(trim($title->nodeValue));
} else {
$title = date('l, F d, Y');
}
$tpl->setVariable('ARTICLE_ID', htmlspecialchars($article_link), true);
$tpl->setVariable('ARTICLE_LINK', htmlspecialchars($article_link), true);
$tpl->setVariable('ARTICLE_UPDATED_ATOM', date('c', mktime(11, 0, 0)), true);
$tpl->setVariable('ARTICLE_TITLE', htmlspecialchars($title), true);
$tpl->setVariable('ARTICLE_EXCERPT', '', true);
$tpl->setVariable('ARTICLE_CONTENT', "<p> " . $doc->saveHTML($content_node) . "</p>", true);
$tpl->setVariable('ARTICLE_AUTHOR', '', true);
$tpl->setVariable('ARTICLE_SOURCE_LINK', htmlspecialchars($article_link), true);
$tpl->setVariable('ARTICLE_SOURCE_TITLE', "The Far Side", true);
$tpl->addBlock('entry');
}
}
}
$tpl->addBlock('feed');
if ($tpl->generateOutputToString($tmp_data))
return $tmp_data;
}
return false;
}
}

View File

@@ -0,0 +1,31 @@
<?php
class Af_Comics_Pvp extends Af_ComicFilter {
function supported() {
return array("PvP Online");
}
function process(&$article) {
if (str_contains($article["guid"], "pvponline.com")) {
$res = UrlHelper::fetch([
'url' => $article['link'],
'useragent' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)',
]);
$doc = new DOMDocument();
if ($res && $doc->loadHTML($res)) {
$xpath = new DOMXPath($doc);
$basenode = $xpath->query('//section[@class="comic-art"]')->item(0);
if ($basenode) {
$article["content"] = $doc->saveHTML($basenode);
}
}
return true;
}
return false;
}
}