Skip to content

Commit 0699d88

Browse files
theScrabiMynacol
authored andcommitted
refactor and fix some issues with nordbayern bridge
remove the /region/ url subpath from nordbayern bridge filter empty articles in nordbayern fix and add new filters don't track most red artickles fix images not shown in buttons fix region path adapt setInput() function to be backward compatible
1 parent e4d3af7 commit 0699d88

1 file changed

Lines changed: 170 additions & 63 deletions

File tree

bridges/NordbayernBridge.php

Lines changed: 170 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,59 +7,114 @@ class NordbayernBridge extends BridgeAbstract
77
const CACHE_TIMEOUT = 3600;
88
const URI = 'https://www.nordbayern.de';
99
const DESCRIPTION = 'Bridge for Bavarian regional news site nordbayern.de';
10-
const PARAMETERS = [ [
11-
'region' => [
12-
'name' => 'region',
13-
'type' => 'list',
14-
'exampleValue' => 'Nürnberg',
15-
'title' => 'Select a region',
16-
'values' => [
17-
'Ansbach' => 'ansbach',
18-
'Bamberg' => 'bamberg',
19-
'Bayreuth' => 'bayreuth',
20-
'Erlangen' => 'erlangen',
21-
'Forchheim' => 'forchheim',
22-
'Fürth' => 'fuerth',
23-
'Gunzenhausen' => 'gunzenhausen',
24-
'Herzogenaurach' => 'herzogenaurach',
25-
'Höchstadt' => 'hoechstadt',
26-
'Neumarkt' => 'neumarkt',
27-
'Neustadt/Aisch-Bad Windsheim' => 'neustadt-aisch-bad-windsheim',
28-
'Nürnberg' => 'nuernberg',
29-
'Nürnberger Land' => 'nuernberger-land',
30-
'Regensburg' => 'regensburg',
31-
'Roth' => 'roth',
32-
'Schwabach' => 'schwabach',
33-
'Weißenburg' => 'weissenburg'
10+
const PARAMETERS = [
11+
[
12+
'region' => [
13+
'name' => 'region',
14+
'type' => 'list',
15+
'exampleValue' => 'Nürnberg',
16+
'title' => 'Select a region',
17+
'values' => [
18+
'Ansbach' => 'ansbach',
19+
'Bamberg' => 'bamberg',
20+
'Bayreuth' => 'bayreuth',
21+
'Erlangen' => 'erlangen',
22+
'Forchheim' => 'forchheim',
23+
'Fürth' => 'fuerth',
24+
'Gunzenhausen' => 'gunzenhausen',
25+
'Herzogenaurach' => 'herzogenaurach',
26+
'Höchstadt' => 'hoechstadt',
27+
'Neumarkt' => 'neumarkt',
28+
'Neustadt/Aisch-Bad Windsheim' => 'neustadt-aisch-bad-windsheim',
29+
'Nürnberg' => 'nuernberg',
30+
'Nürnberger Land' => 'nuernberger-land',
31+
'Regensburg' => 'regensburg',
32+
'Roth' => 'roth',
33+
'Schwabach' => 'schwabach',
34+
'Weißenburg-Gunzenhausen' => 'weissenburg-gunzenhausen'
35+
]
36+
],
37+
'hideGenussShopping' => [
38+
'name' => 'Hide Genuss & Shopping',
39+
'type' => 'checkbox',
40+
'exampleValue' => 'unchecked',
41+
'title' => 'Hide articles categorized as Genuss & Shopping'
42+
],
43+
'hideSport' => [
44+
'name' => 'Hide Sport',
45+
'type' => 'checkbox',
46+
'exampleValue' => 'unchecked',
47+
'title' => 'Hide articles categorized as Sport'
48+
],
49+
'hidePromiesTrends' => [
50+
'name' => 'Hide Promies & Trends',
51+
'type' => 'checkbox',
52+
'exampleValue' => 'unchecked',
53+
'title' => 'Hide articles categorized as Promies & Trends'
54+
],
55+
'hideService' => [
56+
'name' => 'Hide Service',
57+
'type' => 'checkbox',
58+
'exampleValue' => 'unchecked',
59+
'title' => 'Hide articles categorized as Service'
60+
],
61+
'hideFranken' => [
62+
'name' => 'Hide Franken',
63+
'type' => 'checkbox',
64+
'exampleValue' => 'unchecked',
65+
'title' => 'Hide articles categorized as Franken'
66+
],
67+
'hideBayern' => [
68+
'name' => 'Hide Bayern',
69+
'type' => 'checkbox',
70+
'exampleValue' => 'unchecked',
71+
'title' => 'Hide articles categorized as Bayern'
72+
],
73+
'hidePanorama' => [
74+
'name' => 'Hide Panorama',
75+
'type' => 'checkbox',
76+
'exampleValue' => 'unchecked',
77+
'title' => 'Hide articles categorized as Panorama'
78+
],
79+
'hidePolizeiberichte' => [
80+
'name' => 'Hide Polizeiberichte',
81+
'type' => 'checkbox',
82+
'exampleValue' => 'unchecked',
83+
'title' => 'Hide articles categorized as Polizeiberichte'
84+
],
85+
'hideNN' => [
86+
'name' => 'Hide Nürnberger Nachrichten',
87+
'type' => 'checkbox',
88+
'exampleValue' => 'checked',
89+
'defaultValue' => 'checked',
90+
'title' => 'Hide articles hosted on www.nn.de'
3491
]
35-
],
36-
'policeReports' => [
37-
'name' => 'Police Reports',
38-
'type' => 'checkbox',
39-
'exampleValue' => 'checked',
40-
'title' => 'Include Police Reports',
41-
],
42-
'hideNNPlus' => [
43-
'name' => 'Hide NN+ articles',
44-
'type' => 'checkbox',
45-
'exampleValue' => 'unchecked',
46-
'title' => 'Hide all paywall articles on NN'
47-
],
48-
'hideDPA' => [
49-
'name' => 'Hide dpa articles',
50-
'type' => 'checkbox',
51-
'exampleValue' => 'unchecked',
52-
'title' => 'Hide external articles from dpa'
5392
]
54-
]];
93+
];
94+
95+
public function setInput(array $input)
96+
{
97+
// Translate legacy parameter names so existing feed URLs keep working.
98+
if (isset($input['hideNNPlus'])) {
99+
$input['hideNN'] = $input['hideNNPlus'];
100+
unset($input['hideNNPlus']);
101+
}
102+
if (isset($input['policeReports'])) {
103+
if (!filter_var($input['policeReports'], FILTER_VALIDATE_BOOLEAN)) {
104+
$input['hidePolizeiberichte'] = 'on';
105+
}
106+
unset($input['policeReports']);
107+
}
108+
parent::setInput($input);
109+
}
55110

56111
public function collectData()
57112
{
58113
$region = $this->getInput('region');
59-
if ($region === 'rothenburg-o-d-t') {
60-
$region = 'rothenburg-ob-der-tauber';
114+
if ($region !== 'nurnberg' && $region !== 'fuerth' && $region !== 'erlangen') {
115+
$region = 'region/' . $region;
61116
}
62-
$url = self::URI . '/region/' . $region;
117+
$url = self::URI . '/' . $region;
63118
$listSite = getSimpleHTMLDOM($url);
64119

65120
$this->handleNewsblock($listSite);
@@ -105,6 +160,8 @@ private function getUseFullContent($rawContent)
105160
$content .= $this->getUseFullContent($element);
106161
} elseif ($element->tag === 'picture') {
107162
$content .= $this->getValidImage($element);
163+
} elseif ($element->tag === 'button') {
164+
$content .= $this->getUseFullContent($element);
108165
} elseif ($element->tag === 'ul') {
109166
$content .= $element;
110167
}
@@ -151,7 +208,7 @@ private function getArticle($link)
151208

152209
if ($article->find('section[class*=article__richtext]', 0) === null) {
153210
$content = $article->find('div[class*=modul__teaser]', 0)
154-
->find('p', 0);
211+
->find('p', 0);
155212
$item['content'] .= $content;
156213
} else {
157214
$content = $article->find('article', 0);
@@ -175,34 +232,84 @@ private function getArticle($link)
175232
return $item;
176233
}
177234

235+
private function findMostReadSection($main)
236+
{
237+
foreach ($main->find('section') as $section) {
238+
$header = $section->find('div[class=modul__header]', 0);
239+
if ($header !== null && str_contains($header->plaintext, 'Meistgelesen in Nürnberg')) {
240+
return $section;
241+
}
242+
}
243+
return null;
244+
}
245+
246+
private function isInsideSection($article, $section)
247+
{
248+
if ($section === null) {
249+
return false;
250+
}
251+
$ancestor = $article->parent;
252+
while ($ancestor !== null) {
253+
if ($ancestor === $section) {
254+
return true;
255+
}
256+
$ancestor = $ancestor->parent;
257+
}
258+
return false;
259+
}
260+
178261
private function handleNewsblock($listSite)
179262
{
180263
$main = $listSite->find('main', 0);
264+
$meistgelesenSection = $this->findMostReadSection($main);
181265
foreach ($main->find('article') as $article) {
182-
$url = $article->find('a', 0)->href;
183-
$url = urljoin(self::URI, $url);
184-
// exclude nn+ articles if desired
185-
if (
186-
$this->getInput('hideNNPlus') &&
187-
str_contains($url, 'www.nn.de')
188-
) {
266+
// skip articles inside the "Meistgelesen in Nürnberg" section
267+
if ($this->isInsideSection($article, $meistgelesenSection)) {
189268
continue;
190269
}
191270

192-
$item = $this->getArticle($url);
271+
// skip empty articles
272+
if (is_null($article->find('a', 0))) {
273+
continue;
274+
}
193275

194-
// exclude police reports if desired
195-
if (
196-
!$this->getInput('policeReports') &&
197-
str_contains($item['content'], 'Hier geht es zu allen aktuellen Polizeimeldungen.')
198-
) {
276+
$url = $article->find('a', 0)->href;
277+
$url = urljoin(self::URI, $url);
278+
279+
// skip articles based on category segment in URL
280+
if ($this->getInput('hideGenussShopping') && str_contains($url, '/genuss-shopping/')) {
281+
continue;
282+
}
283+
if ($this->getInput('hideSport') && str_contains($url, '/sport/')) {
284+
continue;
285+
}
286+
if ($this->getInput('hidePromiesTrends') && str_contains($url, '/promis-trends/')) {
287+
continue;
288+
}
289+
if ($this->getInput('hideService') && str_contains($url, '/service/')) {
199290
continue;
200291
}
292+
if ($this->getInput('hideFranken') && str_contains($url, '/franken/')) {
293+
continue;
294+
}
295+
if ($this->getInput('hideBayern') && str_contains($url, '/bayern/')) {
296+
continue;
297+
}
298+
if ($this->getInput('hidePanorama') && str_contains($url, '/panorama/')) {
299+
continue;
300+
}
301+
if ($this->getInput('hidePolizeiberichte') && str_contains($url, '/polizeibericht')) {
302+
continue;
303+
}
304+
if ($this->getInput('hideNN') && str_contains($url, 'www.nn.de')) {
305+
continue;
306+
}
307+
308+
$item = $this->getArticle($url);
201309

202-
// exclude dpa articles
203310
if (
204-
$this->getInput('hideDPA') &&
205-
str_contains($item['author'], 'dpa')
311+
$this->getInput('hidePolizeiberichte')
312+
&& str_contains($item['content'], 'Hier geht es zu allen aktuellen Polizeimeldungen.')
206313
) {
207314
continue;
208315
}

0 commit comments

Comments
 (0)