Skip to content

Commit

Permalink
bpo-43950: handle wide unicode characters in tracebacks (python#28150)
Browse files Browse the repository at this point in the history
  • Loading branch information
isidentical authored and aisk committed Feb 11, 2024
1 parent 035681b commit dd48f67
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 14 deletions.
59 changes: 57 additions & 2 deletions Lib/test/test_traceback.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,8 +924,63 @@ def f():
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
" print(1, www(",
" ^^^^",
f" print(1, www(",
f" ^^^^^^^",
]
self.assertEqual(actual, expected)

def test_byte_offset_with_wide_characters_term_highlight(self):
def f():
说明说明 = 1
şçöğıĤellö = 0 # not wide but still non-ascii
return 说明说明 / şçöğıĤellö

actual = self.get_exception(f)
expected = [
f"Traceback (most recent call last):",
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
f" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 3}, in f",
f" return 说明说明 / şçöğıĤellö",
f" ~~~~~~~~~^~~~~~~~~~~~",
]
self.assertEqual(actual, expected)

def test_byte_offset_with_emojis_term_highlight(self):
def f():
return "✨🐍" + func_说明说明("📗🚛",
"📗🚛") + "🐍"

actual = self.get_exception(f)
expected = [
f"Traceback (most recent call last):",
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
f" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
f' return "✨🐍" + func_说明说明("📗🚛",',
f" ^^^^^^^^^^^^^",
]
self.assertEqual(actual, expected)

def test_byte_offset_wide_chars_subscript(self):
def f():
my_dct = {
"✨🚛✨": {
"说明": {
"🐍🐍🐍": None
}
}
}
return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]

actual = self.get_exception(f)
expected = [
f"Traceback (most recent call last):",
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
f" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 8}, in f",
f' return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]',
f" ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^",
]
self.assertEqual(actual, expected)

Expand Down
53 changes: 41 additions & 12 deletions Lib/traceback.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,39 +485,49 @@ def format_frame_summary(self, frame_summary):
stripped_line = frame_summary.line.strip()
row.append(' {}\n'.format(stripped_line))

orig_line_len = len(frame_summary._original_line)
line = frame_summary._original_line
orig_line_len = len(line)
frame_line_len = len(frame_summary.line.lstrip())
stripped_characters = orig_line_len - frame_line_len
if (
frame_summary.colno is not None
and frame_summary.end_colno is not None
):
start_offset = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.colno) + 1
line, frame_summary.colno)
end_offset = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.end_colno) + 1
line, frame_summary.end_colno)
code_segment = line[start_offset:end_offset]

anchors = None
if frame_summary.lineno == frame_summary.end_lineno:
with suppress(Exception):
anchors = _extract_caret_anchors_from_line_segment(
frame_summary._original_line[start_offset - 1:end_offset - 1]
)
anchors = _extract_caret_anchors_from_line_segment(code_segment)
else:
end_offset = stripped_characters + len(stripped_line)
# Don't count the newline since the anchors only need to
# go up until the last character of the line.
end_offset = len(line.rstrip())

# show indicators if primary char doesn't span the frame line
if end_offset - start_offset < len(stripped_line) or (
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
# When showing this on a terminal, some of the non-ASCII characters
# might be rendered as double-width characters, so we need to take
# that into account when calculating the length of the line.
dp_start_offset = _display_width(line, start_offset) + 1
dp_end_offset = _display_width(line, end_offset) + 1

row.append(' ')
row.append(' ' * (start_offset - stripped_characters))
row.append(' ' * (dp_start_offset - stripped_characters))

if anchors:
row.append(anchors.primary_char * (anchors.left_end_offset))
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset)
dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset)
row.append(anchors.primary_char * dp_left_end_offset)
row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset))
row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset))
else:
row.append('^' * (end_offset - start_offset))
row.append('^' * (dp_end_offset - dp_start_offset))

row.append('\n')

Expand Down Expand Up @@ -638,6 +648,25 @@ def _extract_caret_anchors_from_line_segment(segment):

return None

_WIDE_CHAR_SPECIFIERS = "WF"

def _display_width(line, offset):
"""Calculate the extra amount of width space the given source
code segment might take if it were to be displayed on a fixed
width output device. Supports wide unicode characters and emojis."""

# Fast track for ASCII-only strings
if line.isascii():
return offset

import unicodedata

return sum(
2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
for char in line[:offset]
)



class _ExceptionPrintContext:
def __init__(self):
Expand Down

0 comments on commit dd48f67

Please sign in to comment.