Skip to content

Commit

Permalink
ENH: add render_links for Styler.to_html formatting (#45058)
Browse files Browse the repository at this point in the history
  • Loading branch information
attack68 authored Dec 28, 2021
1 parent 4b77cbe commit 3c19380
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ Additionally there are specific enhancements to the HTML specific rendering:
- :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`).
- :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`)
- Custom CSS classes can now be directly specified without string replacement (:issue:`43686`)
- Ability to render hyperlinks automatically via a new ``hyperlinks`` formatting keyword argument (:issue:`45058`)

There are also some LaTeX specific enhancements:

Expand Down
1 change: 1 addition & 0 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,7 @@ def to_latex(
| \\sisetup{detect-all = true} *(within {document})*
environment \\usepackage{longtable} if arg is "longtable"
| or any other relevant environment package
hyperlinks \\usepackage{hyperref}
===================== ==========================================================
**Cell Styles**
Expand Down
42 changes: 40 additions & 2 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,7 @@ def format(
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
hyperlinks: str | None = None,
) -> StylerRenderer:
r"""
Format the text display value of cells.
Expand Down Expand Up @@ -842,6 +843,13 @@ def format(
.. versionadded:: 1.3.0
hyperlinks : {"html", "latex"}, optional
Convert string patterns containing https://, http://, ftp:// or www. to
HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
commands if "latex".
.. versionadded:: 1.4.0
Returns
-------
self : Styler
Expand Down Expand Up @@ -958,6 +966,7 @@ def format(
thousands is None,
na_rep is None,
escape is None,
hyperlinks is None,
)
):
self._display_funcs.clear()
Expand All @@ -980,6 +989,7 @@ def format(
decimal=decimal,
thousands=thousands,
escape=escape,
hyperlinks=hyperlinks,
)
for ri in ris:
self._display_funcs[(ri, ci)] = format_func
Expand All @@ -996,6 +1006,7 @@ def format_index(
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
hyperlinks: str | None = None,
) -> StylerRenderer:
r"""
Format the text display value of index labels or column headers.
Expand Down Expand Up @@ -1027,6 +1038,10 @@ def format_index(
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.
Escaping is done before ``formatter``.
hyperlinks : {"html", "latex"}, optional
Convert string patterns containing https://, http://, ftp:// or www. to
HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
commands if "latex".
Returns
-------
Expand Down Expand Up @@ -1128,6 +1143,7 @@ def format_index(
thousands is None,
na_rep is None,
escape is None,
hyperlinks is None,
)
):
display_funcs_.clear()
Expand All @@ -1149,6 +1165,7 @@ def format_index(
decimal=decimal,
thousands=thousands,
escape=escape,
hyperlinks=hyperlinks,
)

for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]:
Expand Down Expand Up @@ -1391,13 +1408,28 @@ def _str_escape(x, escape):
return x


def _render_href(x, format):
"""uses regex to detect a common URL pattern and converts to href tag in format."""
if isinstance(x, str):
if format == "html":
href = '<a href="{0}" target="_blank">{0}</a>'
elif format == "latex":
href = r"\href{{{0}}}{{{0}}}"
else:
raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'")
pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+"
return re.sub(pat, lambda m: href.format(m.group(0)), x)
return x


def _maybe_wrap_formatter(
formatter: BaseFormatter | None = None,
na_rep: str | None = None,
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
hyperlinks: str | None = None,
) -> Callable:
"""
Allows formatters to be expressed as str, callable or None, where None returns
Expand Down Expand Up @@ -1431,11 +1463,17 @@ def _maybe_wrap_formatter(
else:
func_2 = func_1

# Render links
if hyperlinks is not None:
func_3 = lambda x: func_2(_render_href(x, format=hyperlinks))
else:
func_3 = func_2

# Replace missing values if na_rep
if na_rep is None:
return func_2
return func_3
else:
return lambda x: na_rep if isna(x) else func_2(x)
return lambda x: na_rep if isna(x) else func_3(x)


def non_reducing_slice(slice_: Subset):
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/io/formats/style/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,3 +764,43 @@ def test_hiding_index_columns_multiindex_trimming():
)

assert result == expected


@pytest.mark.parametrize("type", ["data", "index"])
@pytest.mark.parametrize(
"text, exp, found",
[
("no link, just text", False, ""),
("subdomain not www: sub.web.com", False, ""),
("www subdomain: www.web.com other", True, "www.web.com"),
("scheme full structure: http://www.web.com", True, "http://www.web.com"),
("scheme no top-level: http://www.web", True, "http://www.web"),
("no scheme, no top-level: www.web", False, "www.web"),
("https scheme: https://www.web.com", True, "https://www.web.com"),
("ftp scheme: ftp://www.web", True, "ftp://www.web"),
("subdirectories: www.web.com/directory", True, "www.web.com/directory"),
("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"),
],
)
def test_rendered_links(type, text, exp, found):
if type == "data":
df = DataFrame([text])
styler = df.style.format(hyperlinks="html")
else:
df = DataFrame([0], index=[text])
styler = df.style.format_index(hyperlinks="html")

rendered = '<a href="{0}" target="_blank">{0}</a>'.format(found)
result = styler.to_html()
assert (rendered in result) is exp
assert (text in result) is not exp # test conversion done when expected and not


def test_multiple_rendered_links():
links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e")
df = DataFrame(["text {} {} text {} {}".format(*links)])
result = df.style.format(hyperlinks="html").to_html()
href = '<a href="{0}" target="_blank">{0}</a>'
for link in links:
assert href.format(link) in result
assert href.format("text") not in result
8 changes: 8 additions & 0 deletions pandas/tests/io/formats/style/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,3 +845,11 @@ def test_latex_hiding_index_columns_multiindex_alignment():
"""
)
assert result == expected


def test_rendered_links():
# note the majority of testing is done in test_html.py: test_rendered_links
# these test only the alternative latex format is functional
df = DataFrame(["text www.domain.com text"])
result = df.style.format(hyperlinks="latex").to_latex()
assert r"text \href{www.domain.com}{www.domain.com} text" in result

0 comments on commit 3c19380

Please sign in to comment.