1
+ <?php
2
+
3
+ class LfcPlBridge extends BridgeAbstract
4
+ {
5
+ const NAME = 'LFC (lfc.pl) ' ;
6
+ const DESCRIPTION = 'LFC.pl - największa polska strona o Liverpool FC ' ;
7
+ const URI = 'https://lfc.pl ' ;
8
+ const MAINTAINER = 'brtsos ' ;
9
+ const PARAMETERS = [
10
+ [
11
+ 'comments ' => [
12
+ 'type ' => 'list ' ,
13
+ 'name ' => 'Include comments ' ,
14
+ 'title ' => 'Include comments in the article content ' ,
15
+ 'values ' => [
16
+ 'No ' => 'no ' ,
17
+ 'Yes ' => 'yes ' ,
18
+ ],
19
+ ]
20
+ ]
21
+ ];
22
+
23
+ public function collectData ()
24
+ {
25
+ $ dom = getSimpleHTMLDOM (self ::URI . '/Archiwum/ ' . date ('Y ' ) . date ('m ' ));
26
+
27
+ $ list = $ dom ->find ('#page .list-vertical li ' );
28
+ $ list = array_reverse ($ list );
29
+ $ list = array_slice ($ list , 0 , 10 );
30
+
31
+ foreach ($ list as $ li ) {
32
+ $ link = $ li ->find ('a ' , 0 );
33
+ $ url = self ::URI . $ link ->href ;
34
+
35
+ $ articleDom = getSimpleHTMLDOM ($ url );
36
+
37
+ $ description = $ this ->getContent ($ articleDom );
38
+ if (mb_strpos ($ description , 'Artykuł sponsorowany ' ) !== false ) {
39
+ continue ;
40
+ }
41
+
42
+ $ image = '<img src=" ' . $ this ->getImage ($ articleDom ) . '" alt=" ' . $ link ->plaintext . '" /> ' ;
43
+
44
+ $ content = $ image . '</br> ' . $ description ;
45
+
46
+ $ tagsToRemove = ['script ' , 'iframe ' , 'input ' , 'form ' ];
47
+ $ content = sanitize ($ content , $ tagsToRemove );
48
+
49
+ $ footerArticle = $ articleDom ->find ('.footer ' , 0 )->find ('.item ' , 0 )->find ('div ' , 1 );
50
+ $ author = $ footerArticle ->find ('a ' , 0 )->plaintext ;
51
+
52
+ $ dateTime = $ footerArticle ->find ('div ' , 0 )->plaintext ;
53
+ $ date = DateTime::createFromFormat ('d.m.Y H:i ' , $ dateTime );
54
+ $ timestamp = $ date ->getTimestamp ();
55
+ $ this ->items [] = [
56
+ 'title ' => $ link ->plaintext ,
57
+ 'uri ' => $ url ,
58
+ 'timestamp ' => $ timestamp ,
59
+ 'content ' => $ content ,
60
+ 'author ' => $ author ,
61
+ ];
62
+ }
63
+ }
64
+
65
+ private function getContent ($ article )
66
+ {
67
+ $ content = $ article ->find ('.news-body ' , 0 )->innertext ;
68
+ $ commentsHtml = $ article ->find ('#comments ' , 0 );
69
+
70
+ $ comments = '' ;
71
+ if ($ this ->withComment ()) {
72
+ if ($ commentsHtml ) {
73
+ $ commentsDom = $ commentsHtml ->find ('.comment ' );
74
+
75
+ if (count ($ commentsDom ) > 0 ) {
76
+ $ comments = '<h3>Komentarze:</h3> ' ;
77
+ }
78
+
79
+ foreach ($ commentsDom as $ comment ) {
80
+ $ header = $ comment ->find ('.header ' , 0 )->plaintext ;
81
+ $ content = $ comment ->find ('.content ' , 0 )->plaintext ;
82
+ $ comments .= $ header . '<br /> ' . $ content . '<br /><br /> ' ;
83
+ }
84
+ }
85
+ }
86
+
87
+ return $ content . '<br /> <br /> ' . $ comments ;
88
+ }
89
+
90
+ private function getImage ($ article ): ?string
91
+ {
92
+ $ imgElement = $ article ->find ('#news .img ' , 0 );
93
+ if ($ imgElement ) {
94
+ $ style = $ imgElement ->style ;
95
+
96
+ if (preg_match ('/background-image:\s*url\(([^)]+)\)/i ' , $ style , $ matches )) {
97
+ return self ::URI . trim ($ matches [1 ], "' \"" );
98
+ }
99
+
100
+ return null ;
101
+ }
102
+
103
+ return null ;
104
+ }
105
+
106
+ private function withComment (): bool
107
+ {
108
+ return $ this ->getInput ('comments ' ) === 'yes ' ;
109
+ }
110
+ }
0 commit comments