Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: add render_links for Styler.to_html formatting #45058

Merged
merged 7 commits into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ Additionally there are specific enhancements to the HTML specific rendering:
- :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`).
- :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`)
- Custom CSS classes can now be directly specified without string replacement (:issue:`43686`)
- Ability to render hyperlinks automatically via a new ``hyperlinks`` formatting keyword argument (:issue:`45058`)

There are also some LaTeX specific enhancements:

Expand Down
1 change: 1 addition & 0 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,7 @@ def to_latex(
| \\sisetup{detect-all = true} *(within {document})*
environment \\usepackage{longtable} if arg is "longtable"
| or any other relevant environment package
hyperlinks \\usepackage{hyperref}
===================== ==========================================================

**Cell Styles**
Expand Down
42 changes: 40 additions & 2 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,7 @@ def format(
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
hyperlinks: str | None = None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason we prefer this to render_links?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My arguments: hyperlink is a bit more specific whereas link can be a general term in certain contexts. And, technically, everything is rendered so why not add render to every kwarg. And rendering is somewhat of an esoteric concept outside of graphics processing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok fair

) -> StylerRenderer:
r"""
Format the text display value of cells.
Expand Down Expand Up @@ -842,6 +843,13 @@ def format(

.. versionadded:: 1.3.0

hyperlinks : {"html", "latex"}, optional
Convert string patterns containing https://, http://, ftp:// or www. to
HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
commands if "latex".

.. versionadded:: 1.4.0

Returns
-------
self : Styler
Expand Down Expand Up @@ -958,6 +966,7 @@ def format(
thousands is None,
na_rep is None,
escape is None,
hyperlinks is None,
)
):
self._display_funcs.clear()
Expand All @@ -980,6 +989,7 @@ def format(
decimal=decimal,
thousands=thousands,
escape=escape,
hyperlinks=hyperlinks,
)
for ri in ris:
self._display_funcs[(ri, ci)] = format_func
Expand All @@ -996,6 +1006,7 @@ def format_index(
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
hyperlinks: str | None = None,
) -> StylerRenderer:
r"""
Format the text display value of index labels or column headers.
Expand Down Expand Up @@ -1027,6 +1038,10 @@ def format_index(
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.
Escaping is done before ``formatter``.
hyperlinks : {"html", "latex"}, optional
Convert string patterns containing https://, http://, ftp:// or www. to
HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
commands if "latex".

Returns
-------
Expand Down Expand Up @@ -1128,6 +1143,7 @@ def format_index(
thousands is None,
na_rep is None,
escape is None,
hyperlinks is None,
)
):
display_funcs_.clear()
Expand All @@ -1149,6 +1165,7 @@ def format_index(
decimal=decimal,
thousands=thousands,
escape=escape,
hyperlinks=hyperlinks,
)

for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]:
Expand Down Expand Up @@ -1391,13 +1408,28 @@ def _str_escape(x, escape):
return x


def _render_href(x, format):
"""uses regex to detect a common URL pattern and converts to href tag in format."""
if isinstance(x, str):
if format == "html":
href = '<a href="{0}" target="_blank">{0}</a>'
elif format == "latex":
href = r"\href{{{0}}}{{{0}}}"
else:
raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'")
pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+"
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
return re.sub(pat, lambda m: href.format(m.group(0)), x)
return x


def _maybe_wrap_formatter(
formatter: BaseFormatter | None = None,
na_rep: str | None = None,
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
hyperlinks: str | None = None,
) -> Callable:
"""
Allows formatters to be expressed as str, callable or None, where None returns
Expand Down Expand Up @@ -1431,11 +1463,17 @@ def _maybe_wrap_formatter(
else:
func_2 = func_1

# Render links
if hyperlinks is not None:
func_3 = lambda x: func_2(_render_href(x, format=hyperlinks))
else:
func_3 = func_2

# Replace missing values if na_rep
if na_rep is None:
return func_2
return func_3
else:
return lambda x: na_rep if isna(x) else func_2(x)
return lambda x: na_rep if isna(x) else func_3(x)


def non_reducing_slice(slice_: Subset):
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/io/formats/style/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,3 +764,43 @@ def test_hiding_index_columns_multiindex_trimming():
)

assert result == expected


@pytest.mark.parametrize("type", ["data", "index"])
@pytest.mark.parametrize(
"text, exp, found",
[
("no link, just text", False, ""),
("subdomain not www: sub.web.com", False, ""),
("www subdomain: www.web.com other", True, "www.web.com"),
("scheme full structure: http://www.web.com", True, "http://www.web.com"),
("scheme no top-level: http://www.web", True, "http://www.web"),
("no scheme, no top-level: www.web", False, "www.web"),
("https scheme: https://www.web.com", True, "https://www.web.com"),
("ftp scheme: ftp://www.web", True, "ftp://www.web"),
("subdirectories: www.web.com/directory", True, "www.web.com/directory"),
("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"),
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
],
)
def test_rendered_links(type, text, exp, found):
if type == "data":
df = DataFrame([text])
styler = df.style.format(hyperlinks="html")
else:
df = DataFrame([0], index=[text])
styler = df.style.format_index(hyperlinks="html")

rendered = '<a href="{0}" target="_blank">{0}</a>'.format(found)
result = styler.to_html()
assert (rendered in result) is exp
assert (text in result) is not exp # test conversion done when expected and not


def test_multiple_rendered_links():
links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e")
df = DataFrame(["text {} {} text {} {}".format(*links)])
result = df.style.format(hyperlinks="html").to_html()
href = '<a href="{0}" target="_blank">{0}</a>'
for link in links:
assert href.format(link) in result
assert href.format("text") not in result
8 changes: 8 additions & 0 deletions pandas/tests/io/formats/style/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,3 +845,11 @@ def test_latex_hiding_index_columns_multiindex_alignment():
"""
)
assert result == expected


def test_rendered_links():
# note the majority of testing is done in test_html.py: test_rendered_links
# these test only the alternative latex format is functional
df = DataFrame(["text www.domain.com text"])
result = df.style.format(hyperlinks="latex").to_latex()
assert r"text \href{www.domain.com}{www.domain.com} text" in result