Skip to content

Commit a2dc19a

Browse files
committed
Revised attempt to escape quotes in content, using xml.sax.saxutils.escape. Results for existing configs are valid XML.
1 parent e879849 commit a2dc19a

File tree

1 file changed

+12
-7
lines changed

1 file changed

+12
-7
lines changed

build/buildxml.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@
1111
# One extension can be generated per run of the script, with the extension's name and
1212
# destination file as parameters (see main() for syntax).
1313
#
14-
__version__ = '2023-07-09T12:46-03:00'
14+
__version__ = '2023-07-09T20:21-03:00'
1515

1616
import csv
1717
import sys
1818
import argparse
19+
from xml.sax.saxutils import escape
1920

2021
class CSVtoXMLConverter:
2122
'''
@@ -566,13 +567,17 @@ def get_xml(self, extension_name):
566567
xml += ' xmlns:dc="http://purl.org/dc/terms/"\n'
567568
xml += ' xsi:schemaLocation="http://rs.gbif.org/extension/ http://rs.gbif.org/schema/extension.xsd"\n'
568569
xml += f' dc:title="{extension.get("title")}"\n'
569-
xml += f' name="{extension_name}" namespace="{extension.get("namespace")}" rowType="{extension.get("rowType")}"\n'
570+
xml += f' name="{extension_name}"\n'
571+
xml += f' namespace="{extension.get("namespace")}"\n'
572+
xml += f' rowType="{extension.get("rowType")}"\n'
570573
xml += f' dc:issued="{extension.get("dc:issued")}"\n'
571574
subject = extension.get("dc:subject")
572575
if subject is not None:
573576
xml += f' dc:subject="{extension.get("dc:subject")}"\n'
574577
xml += f' dc:relation="{extension.get("dc:relation")}"\n'
575-
xml += f' dc:description="{extension.get("dc:description")}">\n'
578+
description = extension.get("dc:description")
579+
description = escape(description, {'"':'"'})
580+
xml += f' dc:description="{description}">\n'
576581
xml += '\n'
577582
with open(self.csv_file_path, 'r') as csv_file:
578583
reader = csv.reader(csv_file)
@@ -603,18 +608,21 @@ def get_xml(self, extension_name):
603608
description = row_dict["definition"]
604609
if row_dict.get("comments") is not None and len(row_dict.get("comments"))>0:
605610
description += f' {row_dict["comments"]}'
611+
description = escape(description, {'"':'"'})
606612
term_xml += f'dc:description="{description}" '
607613
examples = row_dict.get("examples") or ""
614+
examples = escape(examples, {'"':'"'})
608615
term_xml += f'examples="{examples}" '
609616
if row_dict["term_localName"] in extension.get("required"):
610617
term_xml += f'required="true"/>'
611618
else:
612619
term_xml += f'required="false"/>'
613620
xml += f' {term_xml}\n'
614621
for addition in extension.get("gbif_additions"):
622+
addition = escape(addition,{'"':'"'})
615623
xml += f' {addition}'
616624
xml += "</extension>"
617-
return encoded_quotes(xml)
625+
return xml
618626

619627
def write_xml(self, extension_name, filename):
620628
'''
@@ -624,9 +632,6 @@ def write_xml(self, extension_name, filename):
624632
with open(filename, 'w') as xml_file:
625633
xml_file.write(self.get_xml(extension_name))
626634

627-
def encoded_quotes(s):
628-
return s.replace('"', '&quot;')
629-
630635
def _getoptions():
631636
''' Parse command line options and return them.'''
632637
parser = argparse.ArgumentParser()

0 commit comments

Comments
 (0)