24
24
"Lindsey Smith (maintainer)" , "Erik Hetzner" , "Aaron Swartz (original author)" ]
25
25
26
26
import urllib2
27
+ import BeautifulSoup
27
28
urllib2 .install_opener (urllib2 .build_opener ())
28
29
29
30
### Vaguely Customizable Options ###
@@ -382,7 +383,8 @@ def getContent(entry, HTMLOK=0):
382
383
if not HTMLOK : # Only need to convert to text if HTML isn't OK
383
384
for c in conts :
384
385
if contains (c .type , 'html' ):
385
- return html2text (c .value )
386
+ cleanerhtml = BeautifulSoup .BeautifulSoup (c .value )
387
+ return html2text (unicode (cleanerhtml ))
386
388
387
389
for c in conts :
388
390
if c .type == 'text/plain' : return c .value
@@ -392,7 +394,8 @@ def getContent(entry, HTMLOK=0):
392
394
return ""
393
395
394
396
def getID (entry ):
395
- """Get best ID from an entry."""
397
+ """Get best ID from an entry.
398
+ NEEDS UNIT TESTS"""
396
399
if TRUST_GUID :
397
400
if 'id' in entry and entry .id :
398
401
# Newer versions of feedparser could return a dictionary
@@ -406,17 +409,17 @@ def getID(entry):
406
409
if 'link' in entry : return entry .link
407
410
if 'title' in entry : return hash (unu (entry .title )).hexdigest ()
408
411
409
- def getName (r , entry ):
412
+ def getName (fullfeed , entry ):
410
413
"""Get the best name.
411
414
NEEDS UNIT TESTS"""
412
415
413
416
if NO_FRIENDLY_NAME : return ''
414
417
415
- feed = r .feed
416
- if hasattr (r , "url" ) and r .url in OVERRIDE_FROM .keys ():
417
- return OVERRIDE_FROM [r .url ]
418
+ feedinfo = fullfeed .feed
419
+ if hasattr (fullfeed , "url" ) and fullfeed .url in OVERRIDE_FROM .keys ():
420
+ return OVERRIDE_FROM [fullfeed .url ]
418
421
419
- name = feed .get ('title' , '' )
422
+ name = feedinfo .get ('title' , '' )
420
423
421
424
if 'name' in entry .get ('author_detail' , []): # normally {} but py2.1
422
425
if entry .author_detail .name :
@@ -427,10 +430,10 @@ def getName(r, entry):
427
430
except UnicodeDecodeError :
428
431
name += unicode (entry .author_detail .name , 'utf-8' )
429
432
430
- elif 'name' in feed .get ('author_detail' , []):
431
- if feed .author_detail .name :
433
+ elif 'name' in feedinfo .get ('author_detail' , []):
434
+ if feedinfo .author_detail .name :
432
435
if name : name += ", "
433
- name += feed .author_detail .name
436
+ name += feedinfo .author_detail .name
434
437
435
438
return name
436
439
@@ -469,6 +472,21 @@ def getEmail(r, entry):
469
472
return DEFAULT_EMAIL [r .url ]
470
473
return DEFAULT_FROM
471
474
475
+ def getTags (entry ):
476
+ """If the entry has any tags, build a tagline and return as a string. Otherwise returns empty string"""
477
+ tagline = ""
478
+ if 'tags' in entry :
479
+ tags = entry .get ('tags' )
480
+ taglist = []
481
+ if tags :
482
+ for tag in tags :
483
+ if tag .has_key ('term' ): taglist .append (tag ['term' ])
484
+ if taglist :
485
+ tagline = "," .join (taglist )
486
+
487
+ return tagline
488
+
489
+
472
490
### Simple Database of Feeds ###
473
491
474
492
class Feed :
@@ -689,16 +707,8 @@ def run(num=None):
689
707
useragenthdr = "rss2email"
690
708
691
709
# Add post tags, if available
692
- tagline = ""
693
- if 'tags' in entry :
694
- tags = entry .get ('tags' )
695
- taglist = []
696
- if tags :
697
- for tag in tags :
698
- taglist .append (tag ['term' ])
699
- if taglist :
700
- tagline = "," .join (taglist )
701
-
710
+ tagline = getTags (entry )
711
+
702
712
extraheaders = {'Date' : datehdr , 'User-Agent' : useragenthdr , 'X-RSS-Feed' : f .url , 'X-RSS-ID' : id , 'X-RSS-URL' : link , 'X-RSS-TAGS' : tagline }
703
713
if BONUS_HEADER != '' :
704
714
for hdr in BONUS_HEADER .strip ().splitlines ():
0 commit comments