From 9f0e0a47d8fca2af7a7d456a15bfc8202b41bafd Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Mon, 6 Mar 2017 17:28:08 +0000 Subject: [PATCH] handle mutiple para tags in a debate question Fix for the parser failing to pick up all the text if there is more than one hs_Para element instite a Question tag --- pyscraper/new_hansard.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pyscraper/new_hansard.py b/pyscraper/new_hansard.py index 056d0bf08..a101e4fb5 100755 --- a/pyscraper/new_hansard.py +++ b/pyscraper/new_hansard.py @@ -617,6 +617,15 @@ def parse_question(self, question): p.text = re.sub('\n', ' ', text) tag.append(p) + + if len(para) > 1: + for p in para: + text = self.get_single_line_text_from_element(p) + if text != '': + p = etree.Element('p') + p.text = re.sub('\n', ' ', text) + tag.append(p) + self.root.append(tag) def parse_indent(self, tag):