Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Do line breaking in WeasyPrint using Harfbuzz data #1840

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 23 additions & 12 deletions weasyprint/css/computed_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from tinycss2.color3 import parse_color

from ..logger import LOGGER
from ..text.ffi import ffi, pango, units_to_double
from ..text.line_break import Layout, first_line_metrics
from ..text.ffi import ffi, harfbuzz, pango, units_to_double, PANGO_SCALE
from ..text.line_break import Layout, shape_string
from ..urls import get_link_attribute
from .properties import (
INHERITED, INITIAL_NOT_COMPUTED, INITIAL_VALUES, Dimension)
Expand Down Expand Up @@ -719,13 +719,15 @@ def word_spacing(style, name, value):


def strut_layout(style, context=None):
"""Return a tuple of the used value of ``line-height`` and the baseline.
"""Return a tuple of the strut value of ``line-height`` and the baseline.

The baseline is given from the top edge of line height.

Used in CSS Inline Layout Module Level 3's "Logical Height Contributions of
Inline Boxes", and is technically only valid for inline boxes without
directly-contained glyphs or where the line-height value is not "normal":
other boxes should use their maximum ascender and descender values.
"""
# TODO: always get the real value for `context`? (if we really care…)

if style['font_size'] == 0:
return 0, 0

Expand All @@ -737,20 +739,29 @@ def strut_layout(style, context=None):
if key in context.strut_layouts:
return context.strut_layouts[key]

layout = Layout(context, style['font_size'], style)
layout.set_text(' ')
line, _ = layout.get_first_line()
_, _, _, _, text_height, baseline = first_line_metrics(
line, '', layout, resume_at=None, space_collapse=False, style=style)
_, _, _, shaping_results = shape_string(context, style, ' ')
if style['direction'] == 'rtl':
direction = harfbuzz.HB_DIRECTION_RTL
else:
direction = harfbuzz.HB_DIRECTION_LTR
extents = ffi.new('hb_font_extents_t *')

harfbuzz.hb_font_get_extents_for_direction(
shaping_results[0].hb_font, direction, extents)

text_height = (extents.ascender - extents.descender) / PANGO_SCALE

if style['line_height'] == 'normal':
result = text_height, baseline
result = text_height + (extents.line_gap / PANGO_SCALE), \
(extents.ascender + (extents.line_gap / 2)) / PANGO_SCALE
if context:
context.strut_layouts[key] = result
return result
type_, line_height = style['line_height']
if type_ == 'NUMBER':
line_height *= style['font_size']
result = line_height, baseline + (line_height - text_height) / 2
result = line_height, (extents.ascender / PANGO_SCALE) + \
(line_height - text_height) / 2
if context:
context.strut_layouts[key] = result
return result
Expand Down
227 changes: 113 additions & 114 deletions weasyprint/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import contextlib
import operator
import traceback
from colorsys import hsv_to_rgb, rgb_to_hsv
from io import BytesIO
from math import ceil, cos, floor, pi, sin, sqrt, tan
Expand All @@ -15,8 +16,7 @@
from .layout.background import BackgroundLayer
from .matrix import Matrix
from .stacking import StackingContext
from .text.ffi import ffi, harfbuzz, pango, units_from_double, units_to_double
from .text.line_break import get_last_word_end
from .text.ffi import ffi, harfbuzz, pango, PANGO_SCALE

SIDES = ('top', 'right', 'bottom', 'left')

Expand Down Expand Up @@ -976,7 +976,6 @@ def draw_text(stream, textbox, offset_x, text_overflow, block_ellipsis):
stream.set_color_rgb(*textbox.style['color'][:3])
stream.set_alpha(textbox.style['color'][3])

textbox.pango_layout.reactivate(textbox.style)
stream.begin_text()
emojis = draw_first_line(
stream, textbox, text_overflow, block_ellipsis, x, y)
Expand All @@ -989,27 +988,26 @@ def draw_text(stream, textbox, offset_x, text_overflow, block_ellipsis):
color = textbox.style['text_decoration_color']
if color == 'currentColor':
color = textbox.style['color']
if 'overline' in values:
thickness = textbox.pango_layout.underline_thickness
offset_y = (
textbox.baseline - textbox.pango_layout.ascent + thickness / 2)
draw_text_decoration(
stream, textbox, offset_x, offset_y, thickness, color)
if 'underline' in values:
thickness = textbox.pango_layout.underline_thickness
offset_y = (
textbox.baseline - textbox.pango_layout.underline_position +
thickness / 2)
draw_text_decoration(
stream, textbox, offset_x, offset_y, thickness, color)
if 'line-through' in values:
thickness = textbox.pango_layout.strikethrough_thickness
offset_y = (
textbox.baseline - textbox.pango_layout.strikethrough_position)
draw_text_decoration(
stream, textbox, offset_x, offset_y, thickness, color)

textbox.pango_layout.deactivate()
# TODO: Render these correctly
# if 'overline' in values:
# thickness = textbox.pango_layout.underline_thickness
# offset_y = (
# textbox.baseline - textbox.pango_layout.ascent + thickness / 2)
# draw_text_decoration(
# stream, textbox, offset_x, offset_y, thickness, color)
# if 'underline' in values:
# thickness = textbox.pango_layout.underline_thickness
# offset_y = (
# textbox.baseline - textbox.pango_layout.underline_position +
# thickness / 2)
# draw_text_decoration(
# stream, textbox, offset_x, offset_y, thickness, color)
# if 'line-through' in values:
# thickness = textbox.pango_layout.strikethrough_thickness
# offset_y = (
# textbox.baseline - textbox.pango_layout.strikethrough_position)
# draw_text_decoration(
# stream, textbox, offset_x, offset_y, thickness, color)


def draw_emojis(stream, font_size, x, y, emojis):
Expand All @@ -1027,54 +1025,45 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
if font_size < 1e-6: # Default float precision used by pydyf
return []

pango.pango_layout_set_single_paragraph_mode(
textbox.pango_layout.layout, True)

if text_overflow == 'ellipsis' or block_ellipsis != 'none':
assert textbox.pango_layout.max_width is not None
max_width = textbox.pango_layout.max_width
pango.pango_layout_set_width(
textbox.pango_layout.layout, units_from_double(max_width))
if text_overflow == 'ellipsis':
pango.pango_layout_set_ellipsize(
textbox.pango_layout.layout, pango.PANGO_ELLIPSIZE_END)
else:
if block_ellipsis == 'auto':
ellipsis = '…'
else:
assert block_ellipsis[0] == 'string'
ellipsis = block_ellipsis[1]

# Remove last word if hyphenated
new_text = textbox.pango_layout.text
if new_text.endswith(textbox.style['hyphenate_character']):
last_word_end = get_last_word_end(
new_text[:-len(textbox.style['hyphenate_character'])],
textbox.style['lang'])
if last_word_end:
new_text = new_text[:last_word_end]

textbox.pango_layout.set_text(new_text + ellipsis)

first_line, index = textbox.pango_layout.get_first_line()

if block_ellipsis != 'none':
while index:
last_word_end = get_last_word_end(
textbox.pango_layout.text[:-len(ellipsis)],
textbox.style['lang'])
if last_word_end is None:
break
new_text = textbox.pango_layout.text[:last_word_end]
textbox.pango_layout.set_text(new_text + ellipsis)
first_line, index = textbox.pango_layout.get_first_line()

utf8_text = textbox.pango_layout.text.encode()
previous_utf8_position = 0

runs = [first_line.runs[0]]
while runs[-1].next != ffi.NULL:
runs.append(runs[-1].next)
# TODO: handle ellipses (but maybe in layout.inline instead?)
# if text_overflow == 'ellipsis' or block_ellipsis != 'none':
# assert textbox.pango_layout.max_width is not None
# max_width = textbox.pango_layout.max_width
# pango.pango_layout_set_width(
# textbox.pango_layout.layout, units_from_double(max_width))
# if text_overflow == 'ellipsis':
# pango.pango_layout_set_ellipsize(
# textbox.pango_layout.layout, pango.PANGO_ELLIPSIZE_END)
# else:
# if block_ellipsis == 'auto':
# ellipsis = '…'
# else:
# assert block_ellipsis[0] == 'string'
# ellipsis = block_ellipsis[1]

# # Remove last word if hyphenated
# new_text = textbox.pango_layout.text
# if new_text.endswith(textbox.style['hyphenate_character']):
# last_word_end = get_last_word_end(
# new_text[:-len(textbox.style['hyphenate_character'])],
# textbox.style['lang'])
# if last_word_end:
# new_text = new_text[:last_word_end]

# textbox.pango_layout.set_text(new_text + ellipsis)

# first_line, index = textbox.pango_layout.get_first_line()

# if block_ellipsis != 'none':
# while index:
# last_word_end = get_last_word_end(
# textbox.pango_layout.text[:-len(ellipsis)],
# textbox.style['lang'])
# if last_word_end is None:
# break
# new_text = textbox.pango_layout.text[:last_word_end]
# textbox.pango_layout.set_text(new_text + ellipsis)
# first_line, index = textbox.pango_layout.get_first_line()

matrix = Matrix(1, 0, 0, -1, x, y)
if angle:
Expand All @@ -1085,22 +1074,25 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
string = ''
x_advance = 0
emojis = []
for run in runs:
# Pango objects
glyph_item = ffi.cast('PangoGlyphItem *', run.data)
glyph_string = glyph_item.glyphs
glyphs = glyph_string.glyphs
num_glyphs = glyph_string.num_glyphs
offset = glyph_item.item.offset
clusters = glyph_string.log_clusters

if not hasattr(textbox, 'parent_linebox'):
# TODO: this is a problem if it happens - how did it happen?
return []

parent_linebox = textbox.parent_linebox
for shaping in parent_linebox.shaping_results:
if shaping.end < textbox.render_range[0]:
# Skip to the first shaping result we can use.
continue
if shaping.start > textbox.render_range[1]:
# We're past the shaping results we need to render; we're done.
break

# Font content
pango_font = glyph_item.item.analysis.font
font = stream.add_font(pango_font)
font = stream.add_font(shaping.pango_font)

# Positions of the glyphs in the UTF-8 string
utf8_positions = [offset + clusters[i] for i in range(1, num_glyphs)]
utf8_positions.append(offset + glyph_item.item.length)
last_cluster = -1

# Go through the run glyphs
if font != last_font:
Expand All @@ -1110,58 +1102,65 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
last_font = font
stream.set_font_size(font.hash, 1 if font.bitmap else font_size)
string += '<'
for i in range(num_glyphs):
glyph_info = glyphs[i]
glyph = glyph_info.glyph
width = glyph_info.geometry.width
if (glyph == pango.PANGO_GLYPH_EMPTY or
glyph & pango.PANGO_GLYPH_UNKNOWN_FLAG):
string += f'>{-width / font_size}<'
for i, glyph_info in enumerate(shaping.glyph_infos):
if glyph_info.cluster < textbox.render_range[0]:
continue
utf8_position = utf8_positions[i]
if glyph_info.cluster >= textbox.render_range[1]:
break
glyph = glyph_info.codepoint # It's a glyph ID, oddity of HarfBuzz

width = shaping.glyph_positions[i].x_advance

offset = glyph_info.geometry.x_offset / font_size
offset = shaping.glyph_positions[i].x_offset / font_size
if offset:
string += f'>{-offset}<'
string += f'{glyph:02x}' if font.bitmap else f'{glyph:04x}'

# Ink bounding box and logical widths in font
if glyph not in font.widths:
pango.pango_font_get_glyph_extents(
pango_font, glyph, stream.ink_rect, stream.logical_rect)
harfbuzz.hb_font_get_glyph_extents(
shaping.hb_font, glyph, stream.hb_extents)
extents = stream.hb_extents
x1, y1, x2, y2 = (
stream.ink_rect.x,
-stream.ink_rect.y - stream.ink_rect.height,
stream.ink_rect.x + stream.ink_rect.width,
-stream.ink_rect.y)
extents.x_bearing,
extents.y_bearing + extents.height,
extents.x_bearing + extents.width,
extents.y_bearing)
if x1 < font.bbox[0]:
font.bbox[0] = int(units_to_double(x1 * 1000) / font_size)
font.bbox[0] = int(x1 * 1000 / font_size / PANGO_SCALE)
if y1 < font.bbox[1]:
font.bbox[1] = int(units_to_double(y1 * 1000) / font_size)
font.bbox[1] = int(y1 * 1000 / font_size / PANGO_SCALE)
if x2 > font.bbox[2]:
font.bbox[2] = int(units_to_double(x2 * 1000) / font_size)
font.bbox[2] = int(x2 * 1000 / font_size / PANGO_SCALE)
if y2 > font.bbox[3]:
font.bbox[3] = int(units_to_double(y2 * 1000) / font_size)
font.bbox[3] = int(y2 * 1000 / font_size / PANGO_SCALE)
font.widths[glyph] = int(round(
units_to_double(stream.logical_rect.width * 1000) /
font_size))
abs(width) * 1000 / font_size / PANGO_SCALE))

# Kerning, word spacing, letter spacing
kerning = int(
font.widths[glyph] -
units_to_double(width * 1000) / font_size +
width * 1000 / font_size / PANGO_SCALE +
offset)
if kerning:
string += f'>{kerning}<'

# Mapping between glyphs and characters
if glyph not in font.cmap:
utf8_slice = slice(previous_utf8_position, utf8_position)
font.cmap[glyph] = utf8_text[utf8_slice].decode()
previous_utf8_position = utf8_position
if glyph not in font.cmap and \
shaping.glyph_infos[i].cluster != last_cluster:
# If there is a multi-glyph output for a given run of text
# (for example, a multi-glyph ligature, or a multi-glyph
# expansion), this assigns all of the text to the first glyph.
# This is not ideal, we should investigate adding a PDF span
# and ActualText entry over these instead.
utf8_slice = slice(
last_cluster + 1,
shaping.glyph_infos[i].cluster + 1)
font.cmap[glyph] = parent_linebox.shaping_string[utf8_slice]
last_cluster = shaping.glyph_infos[i].cluster

if font.svg:
hb_font = pango.pango_font_get_hb_font(pango_font)
hb_font = pango.pango_font_get_hb_font(shaping.pango_font)
hb_face = harfbuzz.hb_font_get_face(hb_font)
hb_blob = ffi.gc(
harfbuzz.hb_ot_color_glyph_reference_svg(hb_face, glyph),
Expand All @@ -1174,7 +1173,7 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
a = d = font.widths[glyph] / 1000 / font.upem * font_size
emojis.append([image, font, a, d, x_advance, 0])
elif font.png:
hb_font = pango.pango_font_get_hb_font(pango_font)
hb_font = pango.pango_font_get_hb_font(shaping.pango_font)
hb_blob = ffi.gc(
harfbuzz.hb_ot_color_glyph_reference_png(hb_font, glyph),
harfbuzz.hb_blob_destroy)
Expand All @@ -1190,8 +1189,8 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
pango.pango_font_get_glyph_extents(
pango_font, glyph, stream.ink_rect,
stream.logical_rect)
f = units_to_double(
(-stream.logical_rect.y - stream.logical_rect.height))
f = ((-stream.logical_rect.y - stream.logical_rect.height)
/ PANGO_SCALE)
f = f / font_size - font_size
emojis.append([image, font, a, d, x_advance, f])

Expand Down
4 changes: 4 additions & 0 deletions weasyprint/formatting_structure/boxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ class Box:
cached_counter_values = None
missing_link = None

# Positioning
position_x = 0
position_y = 0

# Default, overriden on some subclasses
def all_children(self):
return ()
Expand Down
Loading