Skip to content

Commit a558096

Browse files
committed
Better attribute handling. Factored out tag handling into getTags()
1 parent 190a3b4 commit a558096

File tree

1 file changed

+30
-20
lines changed

1 file changed

+30
-20
lines changed

rss2email.py

+30-20
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
"Lindsey Smith (maintainer)", "Erik Hetzner", "Aaron Swartz (original author)" ]
2525

2626
import urllib2
27+
import BeautifulSoup
2728
urllib2.install_opener(urllib2.build_opener())
2829

2930
### Vaguely Customizable Options ###
@@ -382,7 +383,8 @@ def getContent(entry, HTMLOK=0):
382383
if not HTMLOK: # Only need to convert to text if HTML isn't OK
383384
for c in conts:
384385
if contains(c.type, 'html'):
385-
return html2text(c.value)
386+
cleanerhtml = BeautifulSoup.BeautifulSoup(c.value)
387+
return html2text(unicode(cleanerhtml))
386388

387389
for c in conts:
388390
if c.type == 'text/plain': return c.value
@@ -392,7 +394,8 @@ def getContent(entry, HTMLOK=0):
392394
return ""
393395

394396
def getID(entry):
395-
"""Get best ID from an entry."""
397+
"""Get best ID from an entry.
398+
NEEDS UNIT TESTS"""
396399
if TRUST_GUID:
397400
if 'id' in entry and entry.id:
398401
# Newer versions of feedparser could return a dictionary
@@ -406,17 +409,17 @@ def getID(entry):
406409
if 'link' in entry: return entry.link
407410
if 'title' in entry: return hash(unu(entry.title)).hexdigest()
408411

409-
def getName(r, entry):
412+
def getName(fullfeed, entry):
410413
"""Get the best name.
411414
NEEDS UNIT TESTS"""
412415

413416
if NO_FRIENDLY_NAME: return ''
414417

415-
feed = r.feed
416-
if hasattr(r, "url") and r.url in OVERRIDE_FROM.keys():
417-
return OVERRIDE_FROM[r.url]
418+
feedinfo = fullfeed.feed
419+
if hasattr(fullfeed, "url") and fullfeed.url in OVERRIDE_FROM.keys():
420+
return OVERRIDE_FROM[fullfeed.url]
418421

419-
name = feed.get('title', '')
422+
name = feedinfo.get('title', '')
420423

421424
if 'name' in entry.get('author_detail', []): # normally {} but py2.1
422425
if entry.author_detail.name:
@@ -427,10 +430,10 @@ def getName(r, entry):
427430
except UnicodeDecodeError:
428431
name += unicode(entry.author_detail.name, 'utf-8')
429432

430-
elif 'name' in feed.get('author_detail', []):
431-
if feed.author_detail.name:
433+
elif 'name' in feedinfo.get('author_detail', []):
434+
if feedinfo.author_detail.name:
432435
if name: name += ", "
433-
name += feed.author_detail.name
436+
name += feedinfo.author_detail.name
434437

435438
return name
436439

@@ -469,6 +472,21 @@ def getEmail(r, entry):
469472
return DEFAULT_EMAIL[r.url]
470473
return DEFAULT_FROM
471474

475+
def getTags(entry):
476+
"""If the entry has any tags, build a tagline and return as a string. Otherwise returns empty string"""
477+
tagline = ""
478+
if 'tags' in entry:
479+
tags = entry.get('tags')
480+
taglist = []
481+
if tags:
482+
for tag in tags:
483+
if tag.has_key('term'): taglist.append(tag['term'])
484+
if taglist:
485+
tagline = ",".join(taglist)
486+
487+
return tagline
488+
489+
472490
### Simple Database of Feeds ###
473491

474492
class Feed:
@@ -689,16 +707,8 @@ def run(num=None):
689707
useragenthdr = "rss2email"
690708

691709
# Add post tags, if available
692-
tagline = ""
693-
if 'tags' in entry:
694-
tags = entry.get('tags')
695-
taglist = []
696-
if tags:
697-
for tag in tags:
698-
taglist.append(tag['term'])
699-
if taglist:
700-
tagline = ",".join(taglist)
701-
710+
tagline = getTags(entry)
711+
702712
extraheaders = {'Date': datehdr, 'User-Agent': useragenthdr, 'X-RSS-Feed': f.url, 'X-RSS-ID': id, 'X-RSS-URL': link, 'X-RSS-TAGS' : tagline}
703713
if BONUS_HEADER != '':
704714
for hdr in BONUS_HEADER.strip().splitlines():

0 commit comments

Comments
 (0)