Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 30 additions & 10 deletions tabulate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1638,7 +1638,13 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"):
return rows, headers, headers_pad


def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long_words=_BREAK_LONG_WORDS, break_on_hyphens=_BREAK_ON_HYPHENS):
def _wrap_text_to_colwidths(
list_of_lists,
colwidths,
numparses=True,
break_long_words=_BREAK_LONG_WORDS,
break_on_hyphens=_BREAK_ON_HYPHENS,
):
if len(list_of_lists):
num_cols = len(list_of_lists[0])
else:
Expand All @@ -1655,7 +1661,11 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long
continue

if width is not None:
wrapper = _CustomTextWrap(width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens)
wrapper = _CustomTextWrap(
width=width,
break_long_words=break_long_words,
break_on_hyphens=break_on_hyphens,
)
casted_cell = str(cell)
wrapped = [
"\n".join(wrapper.wrap(line))
Expand Down Expand Up @@ -2258,7 +2268,11 @@ def tabulate(

numparses = _expand_numparse(disable_numparse, num_cols)
list_of_lists = _wrap_text_to_colwidths(
list_of_lists, maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
list_of_lists,
maxcolwidths,
numparses=numparses,
break_long_words=break_long_words,
break_on_hyphens=break_on_hyphens,
)

if maxheadercolwidths is not None:
Expand All @@ -2272,7 +2286,11 @@ def tabulate(

numparses = _expand_numparse(disable_numparse, num_cols)
headers = _wrap_text_to_colwidths(
[headers], maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
[headers],
maxheadercolwidths,
numparses=numparses,
break_long_words=break_long_words,
break_on_hyphens=break_on_hyphens,
)[0]

# empty values in the first column of RST tables should be escaped (issue #82)
Expand Down Expand Up @@ -2737,15 +2755,17 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
space_left = width - cur_len

# If we're allowed to break long words, then do so: put as much
# of the next chunk onto the current line as will fit.
if self.break_long_words:
# of the next chunk onto the current line as will fit. Be careful
# of empty chunks after ANSI codes removed.
chunk = reversed_chunks[-1]
chunk_noansi = _strip_ansi(chunk)
if self.break_long_words and chunk_noansi:
# Tabulate Custom: Build the string up piece-by-piece in order to
# take each charcter's width into account
chunk = reversed_chunks[-1]
i = 1
# Only count printable characters, so strip_ansi first, index later.
while len(_strip_ansi(chunk)[:i]) <= space_left:
i = i + 1
for i in range(1, len(chunk_noansi) + 1):
if self._len(chunk_noansi[:i]) > space_left:
break
# Consider escape codes when breaking words up
total_escape_len = 0
last_group = 0
Expand Down
38 changes: 38 additions & 0 deletions test/test_textwrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,44 @@ def test_wrap_color_line_longword():
assert_equal(expected, result)


def test_wrap_color_line_longword_zerowidth():
"""Lines with zero-width symbols (accents) must include those symbols with the prior symbol.
Let's exercise the calculation where the available symbols never satisfy the available width,
and ensure chunk calculation succeeds and ANSI colors are maintained.

Most combining marks combine with the preceding character (even in right-to-left alphabets):
- "e\u0301" → "é" (e + combining acute accent)
- "a\u0308" → "ä" (a + combining diaeresis)
- "n\u0303" → "ñ" (n + combining tilde)
Enclosing Marks: Some combining marks enclose the base character:
- "A\u20DD" → Ⓐ Combining enclosing circle
Multiple Combining Marks: You can stack multiple combining marks on a single base character:
- "e\u0301\u0308" → e with both acute accent and diaeresis
Zero width space → "ab" with a :
- "a\u200Bb"

"""
try:
import wcwidth # noqa
except ImportError:
skip("test_wrap_wide_char is skipped")

# Exactly filled, with a green zero-width segment at the end.
data = (
"This_is_A\u20DD_\033[31mte\u0301st_string_\u200b"
"to_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m"
)

expected = [
"This_is_A\u20DD_\033[31mte\u0301\033[0m",
"\033[31mst_string_\u200bto\033[0m",
"\033[31m_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m",
]
wrapper = CTW(width=12)
result = wrapper.wrap(data)
assert_equal(expected, result)


def test_wrap_color_line_multiple_escapes():
data = "012345(\x1b[32ma\x1b[0mbc\x1b[32mdefghij\x1b[0m)"
expected = [
Expand Down
Loading