Skip to content

Commit

Permalink
Merge pull request #739 from euphorie/htmllaundry-refactor
Browse files Browse the repository at this point in the history
Add euphorie.htmllaundry module
  • Loading branch information
ale-rt committed May 21, 2024
2 parents 0f5639a + f4407a7 commit 3ee4659
Show file tree
Hide file tree
Showing 18 changed files with 651 additions and 12 deletions.
4 changes: 4 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Changelog
16.1.3 (unreleased)
-------------------

- Add ``euphorie.htmllaundry`` module.
The original ``htmllaundry`` package fails with ``lxml`` 5.2.
[ale-rt, maurits]

- CSV download of similar title details.
Ref: scrum-2198

Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
"decorator",
"py-bcrypt",
"ftw.upgrade",
"htmllaundry",
"lxml",
"markdownify",
"nltk",
Expand Down
4 changes: 2 additions & 2 deletions src/euphorie/client/browser/risk.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from euphorie.content.survey import ISurvey
from euphorie.content.utils import IToolTypesInfo
from euphorie.content.utils import parse_scaled_answers
from htmllaundry import StripMarkup
from euphorie.htmllaundry.utils import strip_markup
from io import BytesIO
from plone import api
from plone.memoize.instance import memoize
Expand Down Expand Up @@ -154,7 +154,7 @@ def solutions_available_for_action_plan(self):
if solution_id not in existing_measure_ids:
solutions.append(
{
"description": StripMarkup(solution.description),
"description": strip_markup(solution.description),
"action": getattr(solution, "action", "") or "",
"requirements": solution.requirements,
"id": solution_id,
Expand Down
4 changes: 2 additions & 2 deletions src/euphorie/client/docx/html.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from euphorie.htmllaundry.utils import strip_markup
from lxml import etree

import docx
import htmllaundry
import lxml.html


Expand Down Expand Up @@ -120,7 +120,7 @@ def __call__(self, markup, doc, style=None, next_style=None):
try:
markup_doc = lxml.html.document_fromstring(markup)
except etree.XMLSyntaxError:
text = htmllaundry.StripMarkup(markup)
text = strip_markup(markup)
text = text.replace("&#13", "\n")
doc.add_paragraph(text)
return doc
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/configure.zcml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<include package="plone.app.dexterity" />
<include package="plone.app.vocabularies" />
<include package="plone.uuid" />
<include package="htmllaundry" />
<include package="euphorie.htmllaundry" />
<include package="Products.membrane" />

<include file="permissions.zcml" />
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"""

from euphorie.content.user import BaseValidator
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plonetheme.nuplone.z3cform.directives import Dependency
from z3c.form.interfaces import IForm
from z3c.form.interfaces import IValidator
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/help.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from .. import MessageFactory as _
from euphorie.content.utils import StripMarkup
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone.app.z3cform.wysiwyg import WysiwygFieldWidget
from plone.autoform import directives
from plone.indexer import indexer
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from Acquisition import aq_chain
from euphorie.content.dependency import ConditionalTextLine
from euphorie.content.utils import ensure_image_size
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone.app.dexterity.behaviors.metadata import IBasic
from plone.app.z3cform.wysiwyg import WysiwygFieldWidget
from plone.autoform import directives
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/page.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .. import MessageFactory as _
from euphorie.content.behaviour.richdescription import IRichDescription
from euphorie.content.utils import StripMarkup
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone.app.dexterity.behaviors.metadata import IBasic
from plone.app.z3cform.wysiwyg import WysiwygFieldWidget
from plone.autoform import directives
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/risk.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from Acquisition import aq_chain
from Acquisition import aq_inner
from euphorie.content.utils import ensure_image_size
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone import api
from plone.app.dexterity.behaviors.metadata import IBasic
from plone.autoform import directives
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from euphorie.content.dependency import ConditionalTextLine
from euphorie.content.utils import get_tool_type_default
from euphorie.content.utils import IToolTypesInfo
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone.app.dexterity.behaviors.metadata import IBasic
from plone.app.z3cform.wysiwyg import WysiwygFieldWidget
from plone.autoform import directives
Expand Down
9 changes: 9 additions & 0 deletions src/euphorie/htmllaundry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# htmllaundry was originally a library developed by Wichert Akkerman
#
# See: https://github.com/syslabcom/htmllaundry/tree/master
#
# A subset of the now unmaintained package code was copied and pasted
# into euphorie.htmllaundry
#
# The original htmllaundry package was licensed under the BSD license
#
113 changes: 113 additions & 0 deletions src/euphorie/htmllaundry/cleaners.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
try:
from lxml_html_clean.clean import _find_external_links
from lxml_html_clean.clean import Cleaner
except ImportError:
# BBB for lxml 5.1 or earlier, Plone 6.0.10 or earlier
from lxml.html.clean import _find_external_links
from lxml.html.clean import Cleaner


marker = []


class LaundryCleaner(Cleaner):
link_target = marker

def __call__(self, doc):
super().__call__(doc)
if self.link_target is not marker:
self.force_link_target(doc, self.link_target)

def force_link_target(self, doc, target):
for el in _find_external_links(doc):
if target is None:
if "target" in el.attrib:
del el.attrib["target"]
elif isinstance(target, (list, tuple)):
el.set("target", target[0])
else:
el.set("target", target)


DocumentCleaner = LaundryCleaner(
page_structure=False,
remove_unknown_tags=False,
allow_tags=[
"blockquote",
"a",
"img",
"em",
"p",
"strong",
"h3",
"h4",
"h5",
"ul",
"ol",
"li",
"sub",
"sup",
"abbr",
"acronym",
"dl",
"dt",
"dd",
"cite",
"dft",
"br",
"table",
"tr",
"td",
"th",
"thead",
"tbody",
"tfoot",
],
safe_attrs_only=True,
add_nofollow=True,
scripts=True,
javascript=True,
comments=False,
style=True,
links=False,
meta=False,
processing_instructions=False,
frames=False,
annoying_tags=False,
)


# Useful for line fields such as titles
LineCleaner = LaundryCleaner(
page_structure=False,
safe_attrs_only=True,
remove_unknown_tags=False, # Weird API..
allow_tags=["em", "strong"],
add_nofollow=True,
scripts=True,
javascript=True,
comments=False,
style=True,
processing_instructions=False,
frames=False,
annoying_tags=False,
)

CommentCleaner = LaundryCleaner(
page_structure=False,
safe_attrs_only=True,
remove_unknown_tags=False, # Weird API..
allow_tags=["blockquote", "a", "em", "p", "strong"],
add_nofollow=True,
scripts=False,
javascript=True,
comments=False,
style=True,
processing_instructions=False,
frames=False,
annoying_tags=False,
link_target=["_blank"],
)


__all__ = ["DocumentCleaner", "LineCleaner", "CommentCleaner"]
5 changes: 5 additions & 0 deletions src/euphorie/htmllaundry/configure.zcml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<configure xmlns="http://namespaces.zope.org/zope">

<adapter factory=".z3cform.HtmlDataConverter" />

</configure>
Empty file.
Loading

0 comments on commit 3ee4659

Please sign in to comment.