From 5b61597ef4050cae739f0960cfdb083a338be49b Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 24 Dec 2021 21:39:20 +0100 Subject: [PATCH 01/25] render links --- pandas/io/formats/style_render.py | 35 +++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index dcb1f9a2a70dc..ae01232c4e661 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -798,6 +798,7 @@ def format( decimal: str = ".", thousands: str | None = None, escape: str | None = None, + render_links: bool = False, ) -> StylerRenderer: r""" Format the text display value of cells. @@ -842,6 +843,12 @@ def format( .. versionadded:: 1.3.0 + render_links : bool, optional + Convert string patterns containing https://, http://, ftp:// or www. to + HTML tags as clickable URL hyperlinks. + + .. versionadded:: 1.4.0 + Returns ------- self : Styler @@ -958,6 +965,7 @@ def format( thousands is None, na_rep is None, escape is None, + render_links is False, ) ): self._display_funcs.clear() @@ -980,6 +988,7 @@ def format( decimal=decimal, thousands=thousands, escape=escape, + render_links=render_links, ) for ri in ris: self._display_funcs[(ri, ci)] = format_func @@ -996,6 +1005,7 @@ def format_index( decimal: str = ".", thousands: str | None = None, escape: str | None = None, + render_links: bool = False, ) -> StylerRenderer: r""" Format the text display value of index labels or column headers. @@ -1027,6 +1037,9 @@ def format_index( ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with LaTeX-safe sequences. Escaping is done before ``formatter``. + render_links : bool, optional + Convert string patterns containing https://, http://, ftp:// or www. to + HTML tags as clickable URL hyperlinks. Returns ------- @@ -1128,6 +1141,7 @@ def format_index( thousands is None, na_rep is None, escape is None, + render_links is False, ) ): display_funcs_.clear() @@ -1149,6 +1163,7 @@ def format_index( decimal=decimal, thousands=thousands, escape=escape, + render_links=render_links, ) for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]: @@ -1391,6 +1406,15 @@ def _str_escape(x, escape): return x +def _render_href(x): + """uses regex to detect a common URL pattern and converts to HTML tag""" + if isinstance(x, str): + href = '{0}' + pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+" + return re.sub(pat, lambda m: href.format(m.group(0)), x) + return x + + def _maybe_wrap_formatter( formatter: BaseFormatter | None = None, na_rep: str | None = None, @@ -1398,6 +1422,7 @@ def _maybe_wrap_formatter( decimal: str = ".", thousands: str | None = None, escape: str | None = None, + render_links: bool = False, ) -> Callable: """ Allows formatters to be expressed as str, callable or None, where None returns @@ -1431,11 +1456,17 @@ def _maybe_wrap_formatter( else: func_2 = func_1 + # Render links + if render_links: + func_3 = lambda x: func_2(_render_href(x)) + else: + func_3 = func_2 + # Replace missing values if na_rep if na_rep is None: - return func_2 + return func_3 else: - return lambda x: na_rep if isna(x) else func_2(x) + return lambda x: na_rep if isna(x) else func_3(x) def non_reducing_slice(slice_: Subset): From fe8dd37d66d47424b37025035e0e4650bee1c3ee Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 24 Dec 2021 22:09:47 +0100 Subject: [PATCH 02/25] tests --- pandas/tests/io/formats/style/test_html.py | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 2143ef40582a5..71f78a88e0b59 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -764,3 +764,33 @@ def test_hiding_index_columns_multiindex_trimming(): ) assert result == expected + + +@pytest.mark.parametrize("type", ["data", "index"]) +@pytest.mark.parametrize( + "text, exp, found", + [ + ("no link, just text", False, ""), + ("subdomain not www: sub.web.com", False, ""), + ("www subdomain: www.web.com other", True, "www.web.com"), + ("scheme full structure: http://www.web.com", True, "http://www.web.com"), + ("scheme no top-level: http://www.web", True, "http://www.web"), + ("no scheme, no top-level: www.web", False, "www.web"), + ("https scheme: https://www.web.com", True, "https://www.web.com"), + ("ftp scheme: ftp://www.web", True, "ftp://www.web"), + ("subdirectories: www.web.com/directory", True, "www.web.com/directory"), + ("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"), + ], +) +def test_rendered_links(type, text, exp, found): + if type == "data": + df = DataFrame([text]) + styler = df.style.format(render_links=True) + else: + df = DataFrame([0], index=[text]) + styler = df.style.format_index(render_links=True) + + rendered = '{0}'.format(found) + result = styler.to_html() + assert (rendered in result) is exp + assert (text in result) is not exp # test conversion done when expected and not From 11f20eb23faada4b41b9555e04961c8159682241 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 24 Dec 2021 22:13:52 +0100 Subject: [PATCH 03/25] whats new --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/io/formats/style_render.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 9d788ffcfabe1..92a21108898ea 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -111,6 +111,7 @@ Additionally there are specific enhancements to the HTML specific rendering: - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`). - :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`) - Custom CSS classes can now be directly specified without string replacement (:issue:`43686`) + - Ability to render hyperlinks automatically via a new ``render_links`` formatting keyword argument (:issue:`45058`) There are also some LaTeX specific enhancements: diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index ae01232c4e661..d539bc1d95618 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -843,7 +843,7 @@ def format( .. versionadded:: 1.3.0 - render_links : bool, optional + render_links : bool Convert string patterns containing https://, http://, ftp:// or www. to HTML tags as clickable URL hyperlinks. @@ -1037,7 +1037,7 @@ def format_index( ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with LaTeX-safe sequences. Escaping is done before ``formatter``. - render_links : bool, optional + render_links : bool Convert string patterns containing https://, http://, ftp:// or www. to HTML tags as clickable URL hyperlinks. From a7b3c59625f46e4419c624a1a92f3192268de59b Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 24 Dec 2021 22:36:01 +0100 Subject: [PATCH 04/25] multipl links test --- pandas/tests/io/formats/style/test_html.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 71f78a88e0b59..ab2c2ff62595b 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -794,3 +794,13 @@ def test_rendered_links(type, text, exp, found): result = styler.to_html() assert (rendered in result) is exp assert (text in result) is not exp # test conversion done when expected and not + + +def test_multiple_rendered_links(): + links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e") + df = DataFrame(["text {} {} text {} {}".format(*links)]) + result = df.style.format(render_links=True).to_html() + href = '{0}' + for link in links: + assert href.format(link) in result + assert href.format("text") not in result From bf9b9c2e12a2dfff432cef0439921d31fdfb2dc9 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 25 Dec 2021 11:10:28 +0100 Subject: [PATCH 05/25] add LaTeX. chg to `hyperlinks` instead of `render_links` --- pandas/io/formats/style_render.py | 39 +++++++++++-------- pandas/tests/io/formats/style/test_html.py | 6 +-- .../tests/io/formats/style/test_to_latex.py | 8 ++++ 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index d539bc1d95618..a6b8913b23d9d 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -798,7 +798,7 @@ def format( decimal: str = ".", thousands: str | None = None, escape: str | None = None, - render_links: bool = False, + hyperlinks: str | None = None, ) -> StylerRenderer: r""" Format the text display value of cells. @@ -843,9 +843,10 @@ def format( .. versionadded:: 1.3.0 - render_links : bool + hyperlinks : {"html", "latex"}, optional Convert string patterns containing https://, http://, ftp:// or www. to - HTML tags as clickable URL hyperlinks. + HTML tags as clickable URL hyperlinks if "html", or LaTeX \href + commands if "latex". .. versionadded:: 1.4.0 @@ -965,7 +966,7 @@ def format( thousands is None, na_rep is None, escape is None, - render_links is False, + hyperlinks is None, ) ): self._display_funcs.clear() @@ -988,7 +989,7 @@ def format( decimal=decimal, thousands=thousands, escape=escape, - render_links=render_links, + hyperlinks=hyperlinks, ) for ri in ris: self._display_funcs[(ri, ci)] = format_func @@ -1005,7 +1006,7 @@ def format_index( decimal: str = ".", thousands: str | None = None, escape: str | None = None, - render_links: bool = False, + hyperlinks: str | None = None, ) -> StylerRenderer: r""" Format the text display value of index labels or column headers. @@ -1037,9 +1038,10 @@ def format_index( ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with LaTeX-safe sequences. Escaping is done before ``formatter``. - render_links : bool + hyperlinks : {"html", "latex"}, optional Convert string patterns containing https://, http://, ftp:// or www. to - HTML tags as clickable URL hyperlinks. + HTML tags as clickable URL hyperlinks if "html", or LaTeX \href + commands if "latex". Returns ------- @@ -1141,7 +1143,7 @@ def format_index( thousands is None, na_rep is None, escape is None, - render_links is False, + hyperlinks is None, ) ): display_funcs_.clear() @@ -1163,7 +1165,7 @@ def format_index( decimal=decimal, thousands=thousands, escape=escape, - render_links=render_links, + hyperlinks=hyperlinks, ) for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]: @@ -1406,10 +1408,15 @@ def _str_escape(x, escape): return x -def _render_href(x): - """uses regex to detect a common URL pattern and converts to HTML tag""" +def _render_href(x, format): + """uses regex to detect a common URL pattern and converts to href tag in format.""" if isinstance(x, str): - href = '{0}' + if format == "html": + href = '{0}' + elif format == "latex": + href = r"\href{{{0}}}{{{0}}}" + else: + raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'") pat = r"(https?:\/\/|ftp:\/\/|www.)[\w/\-?=%.]+\.[\w/\-&?=%.]+" return re.sub(pat, lambda m: href.format(m.group(0)), x) return x @@ -1422,7 +1429,7 @@ def _maybe_wrap_formatter( decimal: str = ".", thousands: str | None = None, escape: str | None = None, - render_links: bool = False, + hyperlinks: str | None = None, ) -> Callable: """ Allows formatters to be expressed as str, callable or None, where None returns @@ -1457,8 +1464,8 @@ def _maybe_wrap_formatter( func_2 = func_1 # Render links - if render_links: - func_3 = lambda x: func_2(_render_href(x)) + if hyperlinks is not None: + func_3 = lambda x: func_2(_render_href(x, format=hyperlinks)) else: func_3 = func_2 diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index ab2c2ff62595b..fad289d5e0d2c 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -785,10 +785,10 @@ def test_hiding_index_columns_multiindex_trimming(): def test_rendered_links(type, text, exp, found): if type == "data": df = DataFrame([text]) - styler = df.style.format(render_links=True) + styler = df.style.format(hyperlinks="html") else: df = DataFrame([0], index=[text]) - styler = df.style.format_index(render_links=True) + styler = df.style.format_index(hyperlinks="html") rendered = '{0}'.format(found) result = styler.to_html() @@ -799,7 +799,7 @@ def test_rendered_links(type, text, exp, found): def test_multiple_rendered_links(): links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e") df = DataFrame(["text {} {} text {} {}".format(*links)]) - result = df.style.format(render_links=True).to_html() + result = df.style.format(hyperlinks="html").to_html() href = '{0}' for link in links: assert href.format(link) in result diff --git a/pandas/tests/io/formats/style/test_to_latex.py b/pandas/tests/io/formats/style/test_to_latex.py index 9c2a364b396b8..0ecf6079044e0 100644 --- a/pandas/tests/io/formats/style/test_to_latex.py +++ b/pandas/tests/io/formats/style/test_to_latex.py @@ -845,3 +845,11 @@ def test_latex_hiding_index_columns_multiindex_alignment(): """ ) assert result == expected + + +def test_rendered_links(): + # note the majority of testing is done in test_html.py: test_rendered_links + # these test only the alternative latex format is functional + df = DataFrame(["text www.domain.com text"]) + result = df.style.format(hyperlinks="latex").to_latex() + assert r"text \href{www.domain.com}{www.domain.com} text" in result From ea9c6a684cfc062b3730f79d66e297cdb132243c Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 25 Dec 2021 11:13:03 +0100 Subject: [PATCH 06/25] whats new update --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 92a21108898ea..330d64ea88b8c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -111,7 +111,7 @@ Additionally there are specific enhancements to the HTML specific rendering: - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`). - :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`) - Custom CSS classes can now be directly specified without string replacement (:issue:`43686`) - - Ability to render hyperlinks automatically via a new ``render_links`` formatting keyword argument (:issue:`45058`) + - Ability to render hyperlinks automatically via a new ``hyperlinks`` formatting keyword argument (:issue:`45058`) There are also some LaTeX specific enhancements: From d331a827a859f2692dec49349800662886f25f99 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 25 Dec 2021 11:17:00 +0100 Subject: [PATCH 07/25] extend docs --- pandas/io/formats/style.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d9550f0940376..a196ec60c4012 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -623,6 +623,7 @@ def to_latex( | \\sisetup{detect-all = true} *(within {document})* environment \\usepackage{longtable} if arg is "longtable" | or any other relevant environment package + hyperlinks \\usepackage{hyperref} ===================== ========================================================== **Cell Styles** From 1abc6d928fc96a2171896b1aa5077163a92a95b3 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 25 Dec 2021 23:45:24 +0100 Subject: [PATCH 08/25] html deprecation implementation. --- pandas/core/frame.py | 460 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 431 insertions(+), 29 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7fd30fbbe1b7b..5f935e1a8246e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2867,16 +2867,29 @@ def to_html( justify: str | None = None, max_rows: int | None = None, max_cols: int | None = None, - show_dimensions: bool | str = False, + show_dimensions: bool | str | None = None, decimal: str = ".", - bold_rows: bool = True, + bold_rows: bool | None = None, classes: str | list | tuple | None = None, escape: bool = True, - notebook: bool = False, + notebook: bool | None = None, border: int | None = None, table_id: str | None = None, - render_links: bool = False, + render_links: bool | None = None, encoding: str | None = None, + *, + table_attributes: str | None = None, + sparse_index: bool | None = None, + sparse_columns: bool | None = None, + caption: str | None = None, + max_columns: int | None = None, + doctype_html: bool | None = None, + formatter=None, + precision: int | None = None, + thousands: str | None = None, + hyperlinks: bool | None = None, + bold_headers: bool | None = None, + **kwargs, ): """ Render a DataFrame as an HTML table. @@ -2900,42 +2913,431 @@ def to_html( Set character encoding. .. versionadded:: 1.0 + table_attributes : str, optional + Attributes to assign within the `` HTML element in the format: + + ``
>`` + + .. versionadded:: 1.4.0 + + sparse_index : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.index`` value. + + .. versionadded:: 1.4.0 + sparse_columns : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each + column. Defaults to ``pandas.options.styler.sparse.columns`` value. + + .. versionadded:: 1.4.0 + caption : str, optional + Set the HTML caption on Styler. + + .. versionadded:: 1.4.0 + max_columns : int, optional + The maximum number of columns that will be rendered. Defaults to + ``pandas.options.styler.render.max_columns``, which is None. + + Rows and columns may be reduced if the number of total elements is + large. This value is set to ``pandas.options.styler.render.max_elements``, + which is 262144 (18 bit browser rendering). + + .. versionadded:: 1.4.0 + doctype_html : bool, default False + Whether to output a fully structured HTML file including all + HTML elements, or just the core ``