From d511b3eac886b1e37afdff14e8d4b89657ce1909 Mon Sep 17 00:00:00 2001
From: Struan Donald <struan@exo.org.uk>
Date: Wed, 15 Mar 2017 17:37:58 +0000
Subject: [PATCH] better parsing for Lords Amemdments

rather than just parsing it all into a single line of text parse all the
paragraphs and indents so that we try and retain a bit more structure.
---
 pyscraper/new_hansard.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/pyscraper/new_hansard.py b/pyscraper/new_hansard.py
index 05a3f9a9c..2f7265bdc 100755
--- a/pyscraper/new_hansard.py
+++ b/pyscraper/new_hansard.py
@@ -1663,12 +1663,17 @@ def parse_tabledby(self, tabledby):
         )
 
     def parse_amendment(self, amendment):
-        self.parse_para_with_member(
-            amendment,
-            None,
-            css_class='italic',
-            pwmotiontext='unrecognized'
-        )
+        # Amendments are often things like:
+        #
+        # <Amendment><hs_quote><B>54:</B>
+        # Clause 67, page 30, line 9, leave out “high” and insert
+        # “higher”</hs_quote></Amendment>
+        #
+        # so we need to parse the tags to make sure we get the
+        # indenting etc
+        for tag in amendment.getchildren():
+            tag_name = self.get_tag_name_no_ns(tag)
+            self.handle_tag(tag_name, tag)
 
     def parse_clause_heading(self, heading):
         tag = etree.Element('p')