From 97d679cea618533cbf1f4caba4705c848fb12894 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Fri, 17 Mar 2017 12:39:28 +0000 Subject: [PATCH] handle times with a newline between hours and minutes --- pyscraper/new_hansard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscraper/new_hansard.py b/pyscraper/new_hansard.py index ee818bbc4..941690751 100755 --- a/pyscraper/new_hansard.py +++ b/pyscraper/new_hansard.py @@ -979,7 +979,7 @@ def parse_time(self, tag): time_txt = u''.join(tag.xpath('.//text()')) if time_txt == '': return - matches = re.match('(\d+)(?:[:.](\d+))?[\xa0\s]*(am|pm)', time_txt) + matches = re.match('(\d+)(?:[:.\n](\d+))?[\xa0\s]*(am|pm)', time_txt) if matches: hours = int(matches.group(1)) minutes = int(matches.group(2) or 0)