Skip to content

Commit 9175e5a

Browse files
authored
Merge pull request #253 from transifex/support-rtl-on-docx
Support RTL in ms formats
2 parents ee680df + e5e81c3 commit 9175e5a

File tree

8 files changed

+125
-8
lines changed

8 files changed

+125
-8
lines changed

openformats/formats/customizable_xml.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,9 @@ def parse(self, content, **kwargs):
173173
template = transcriber.get_destination()
174174
return template, stringset
175175

176-
def compile(self, template, stringset, is_source=True, language_info=None):
176+
def compile(
177+
self, template, stringset, is_source=True, language_info=None, **kwargs
178+
):
177179
"""Compile the given `template` by replacing all hash placeholders
178180
with the translations found in `stringset`.
179181

openformats/formats/docx.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from openformats.handlers import Handler
1313
from openformats.formats.office_open_xml.parser import OfficeOpenXmlHandler
1414

15+
1516
class DocxFile(object):
1617
"""
1718
A class used to wrap and expose the internals of a .docx file
@@ -81,6 +82,7 @@ class DocxFile(object):
8182
</Relationships>
8283
```
8384
"""
85+
8486
def __init__(self, content):
8587
self.__tmp_folder = "{}/{}".format(
8688
tempfile.gettempdir(), uuid.uuid4().hex
@@ -230,6 +232,24 @@ def remove_text_element(cls, text_element):
230232
else:
231233
text_element.decompose()
232234

235+
@classmethod
236+
def set_rtl_orientation(cls, paragraph):
237+
soup = BeautifulSoup("", "xml")
238+
ppr_tags = paragraph.find_all("w:pPr")
239+
for ppr_tag in ppr_tags:
240+
if ppr_tag.bidi is not None:
241+
ppr_tag.bidi.decompose()
242+
bidi_tag = soup.new_tag("w:bidi", **{"w:val": "1"})
243+
ppr_tag.append(bidi_tag)
244+
245+
246+
rpr_tags = paragraph.find_all("w:rPr")
247+
for rpr_tag in rpr_tags:
248+
if rpr_tag.rtl is not None:
249+
rpr_tag.rtl.decompose()
250+
rtl = soup.new_tag("w:rtl", **{"w:val": "1"})
251+
rpr_tag.append(rtl)
252+
233253
def parse(self, content, **kwargs):
234254
"""
235255
We will segment the text by paragraph `<w:p>` as this
@@ -272,9 +292,12 @@ def compile(self, template, stringset, **kwargs):
272292
docx = DocxFile(template)
273293
soup = BeautifulSoup(docx.get_document(), 'xml')
274294
rels_soup = BeautifulSoup(docx.get_document_rels(), 'xml')
295+
is_rtl = kwargs.get('is_rtl', False)
275296

276297
for paragraph in soup.find_all('w:p'):
277-
self.compile_paragraph(paragraph, rels_soup, stringset)
298+
self.compile_paragraph(
299+
paragraph, rels_soup, stringset, is_rtl=is_rtl
300+
)
278301

279302
docx.set_document(six.text_type(soup))
280303
docx.set_document_rels(six.text_type(rels_soup))

openformats/formats/office_open_xml/parser.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ def remove_hyperlink(cls, text_element):
2828
def remove_text_element(cls, text_element):
2929
raise NotImplementedError
3030

31+
@classmethod
32+
def set_rtl_orientation(cls, paragraph):
33+
raise NotImplementedError
34+
3135
@classmethod
3236
def swap_hyperlink_elements(
3337
cls, added_hl_text_elements, deleted_hl_text_elements
@@ -158,7 +162,7 @@ def parse_paragraph(cls, paragraph, rels_soup):
158162

159163
return open_string
160164

161-
def compile_paragraph(cls, paragraph, rels_soup, stringset):
165+
def compile_paragraph(cls, paragraph, rels_soup, stringset, is_rtl=False):
162166
text_elements = paragraph.find_all(cls.TEXT_ELEMENT_TAG)
163167
if not text_elements:
164168
return
@@ -188,6 +192,9 @@ def compile_paragraph(cls, paragraph, rels_soup, stringset):
188192

189193
# First of all try to replace each element translation
190194
# this is the happiest path
195+
if is_rtl:
196+
cls.set_rtl_orientation(paragraph)
197+
191198
for index, text_element in enumerate(text_elements):
192199
text = six.text_type(text_element.text)
193200

openformats/formats/pptx.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,14 @@ def remove_hyperlink(cls, text_element):
288288
def remove_text_element(cls, text_element):
289289
text_element.decompose()
290290

291+
@classmethod
292+
def set_rtl_orientation(cls, paragraph):
293+
ppr_tags = paragraph.find_all("a:pPr")
294+
for ppr_tag in ppr_tags:
295+
ppr_tag["rtl"] = "1"
296+
if ppr_tag.get("algn") == "l":
297+
ppr_tag["algn"] = "r"
298+
291299
def parse(self, content, **kwargs):
292300
"""
293301
We will segment the text by paragraph `<w:p>` as this
@@ -334,14 +342,16 @@ def compile(self, template, stringset, **kwargs):
334342
string.string_hash: string for string in stringset
335343
}
336344
pptx = PptxFile(template)
337-
345+
is_rtl = kwargs.get('is_rtl', False)
338346
for slide in pptx.get_slides():
339347
soup = BeautifulSoup(pptx.get_slide(slide), 'xml')
340348
rels_soup = BeautifulSoup(pptx.get_slide_rels(slide), 'xml')
341349

342350
for parent in soup.find_all('p:sp'):
343351
for paragraph in parent.find_all('a:p'):
344-
self.compile_paragraph(paragraph, rels_soup, stringset)
352+
self.compile_paragraph(
353+
paragraph, rels_soup, stringset, is_rtl=is_rtl
354+
)
345355

346356
pptx.set_slide(slide, six.text_type(soup))
347357
pptx.set_slide_rels(slide, six.text_type(rels_soup))

openformats/handlers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def parse(self, content, is_source=False):
7474

7575
raise NotImplementedError('Abstract method') # pragma: no cover
7676

77-
def compile(self, template, stringset):
77+
def compile(self, template, stringset, **kwargs):
7878
"""
7979
Parses the template, finds the hashes, replaces them with strings from
8080
the stringset and returns the compiled file. If a hash in the template

openformats/tests/formats/docx/test_docx.py

+34-2
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,10 @@ def test_hyperlink_reorder(self):
221221
paragraph = soup.find_all('w:p')[0]
222222
text_elements = paragraph.find_all('w:t')
223223

224-
self.assertEqual(text_elements[3].parent.rPr.color, text_elements_bf_reorder[1].parent.rPr.color)
225-
self.assertEqual(text_elements[3].parent.rPr.u, text_elements_bf_reorder[1].parent.rPr.u)
224+
self.assertEqual(text_elements[3].parent.rPr.color,
225+
text_elements_bf_reorder[1].parent.rPr.color)
226+
self.assertEqual(text_elements[3].parent.rPr.u,
227+
text_elements_bf_reorder[1].parent.rPr.u)
226228
self.assertEqual(text_elements[1].parent.rPr.color, None)
227229
self.assertEqual(text_elements[1].parent.rPr.u, None)
228230

@@ -865,3 +867,33 @@ def test_lt(self):
865867
self.assertEqual(openstring.order, 0)
866868
self.assertEqual(openstring.string, translation)
867869
self.assertEqual(openstring.string, openstring.key)
870+
871+
def test_rtl(self):
872+
path = '{}/hello_world.docx'.format(self.TESTFILE_BASE)
873+
with open(path, 'rb') as f:
874+
content = f.read()
875+
handler = DocxHandler()
876+
template, stringset = handler.parse(content)
877+
openstring = stringset[0]
878+
879+
# Compile with altered translation
880+
translation = u'<tx>Καλημέρα κόσμε </tx><tx href="https://el.transifex.com/">αυτός είναι ένας κρίκος</tx>' # noqa
881+
stringset = [
882+
OpenString(openstring.key, translation, order=1)
883+
]
884+
885+
content = handler.compile(template, stringset, is_rtl=True)
886+
docx = DocxFile(content)
887+
soup = BeautifulSoup(docx.get_document(), 'xml')
888+
self.assertEqual(len(stringset), 1)
889+
self.assertEqual(len(soup.find_all("w:bidi")), 1)
890+
for pPr in soup.find_all("w:pPr"):
891+
self.assertEqual(len(pPr.findChildren("w:bidi")), 1)
892+
for bidi in pPr.findChildren("w:bidi"):
893+
self.assertEqual(bidi["w:val"], "1")
894+
895+
self.assertTrue(len(soup.find_all("w:rtl")), 1)
896+
for rPr in soup.find_all("w:rPr"):
897+
self.assertEqual(len(rPr.findChildren("w:rtl")), 1)
898+
for rtl in rPr.findChildren("w:rtl"):
899+
self.assertEqual(rtl["w:val"], "1")
31.4 KB
Binary file not shown.

openformats/tests/formats/pptx/test_pptx.py

+43
Original file line numberDiff line numberDiff line change
@@ -594,3 +594,46 @@ def test_pptx_file_with_autofield(self):
594594
slide = u'/ppt/slides/slide1.xml'
595595
for text in [u'Title', u'text']:
596596
self.assertTrue(text in pptx.get_slide(slide))
597+
598+
def test_rtl(self):
599+
path = '{}/rtl.pptx'.format(self.TESTFILE_BASE)
600+
with open(path, 'rb') as f:
601+
content = f.read()
602+
603+
slide = u'/ppt/slides/slide1.xml'
604+
605+
pptx = PptxFile(content)
606+
soup = BeautifulSoup(pptx.get_slide(slide), 'xml')
607+
l_algn = []
608+
r_algn = []
609+
ctr_algn = []
610+
just_algn = []
611+
for index, pPr in enumerate(soup.find_all("a:pPr")):
612+
self.assertTrue(pPr["algn"] in ["just", "r", "l", "ctr"])
613+
if pPr["algn"] == "l":
614+
l_algn.append(index)
615+
if pPr["algn"] == "r":
616+
r_algn.append(index)
617+
if pPr["algn"] == "ctr":
618+
ctr_algn.append(index)
619+
if pPr["algn"] == "just":
620+
just_algn.append(index)
621+
622+
handler = PptxHandler()
623+
template, stringset = handler.parse(content)
624+
625+
content = handler.compile(template, stringset, is_rtl=True)
626+
627+
pptx = PptxFile(content)
628+
soup = BeautifulSoup(pptx.get_slide(slide), 'xml')
629+
for index, pPr in enumerate(soup.find_all("a:pPr")):
630+
self.assertEqual(pPr["rtl"], "1")
631+
self.assertTrue(pPr["algn"] in ["just", "r", "ctr"])
632+
if index in l_algn:
633+
self.assertEqual(pPr["algn"], "r")
634+
if index in r_algn:
635+
self.assertEqual(pPr["algn"], "r")
636+
if index in ctr_algn:
637+
self.assertEqual(pPr["algn"], "ctr")
638+
if index in just_algn:
639+
self.assertEqual(pPr["algn"], "just")

0 commit comments

Comments
 (0)