Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add euphorie.htmllaundry module #739

Merged
merged 12 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Changelog
16.1.3 (unreleased)
-------------------

- Add ``euphorie.htmllaundry`` module.
The original ``htmllaundry`` package fails with ``lxml`` 5.2.
[ale-rt, maurits]

- CSV download of similar title details.
Ref: scrum-2198

Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
"decorator",
"py-bcrypt",
"ftw.upgrade",
"htmllaundry",
"lxml",
"markdownify",
"nltk",
Expand Down
4 changes: 2 additions & 2 deletions src/euphorie/client/browser/risk.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from euphorie.content.survey import ISurvey
from euphorie.content.utils import IToolTypesInfo
from euphorie.content.utils import parse_scaled_answers
from htmllaundry import StripMarkup
from euphorie.htmllaundry.utils import strip_markup
from io import BytesIO
from plone import api
from plone.memoize.instance import memoize
Expand Down Expand Up @@ -154,7 +154,7 @@ def solutions_available_for_action_plan(self):
if solution_id not in existing_measure_ids:
solutions.append(
{
"description": StripMarkup(solution.description),
"description": strip_markup(solution.description),
"action": getattr(solution, "action", "") or "",
"requirements": solution.requirements,
"id": solution_id,
Expand Down
4 changes: 2 additions & 2 deletions src/euphorie/client/docx/html.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from euphorie.htmllaundry.utils import strip_markup
from lxml import etree

import docx
import htmllaundry
import lxml.html


Expand Down Expand Up @@ -120,7 +120,7 @@ def __call__(self, markup, doc, style=None, next_style=None):
try:
markup_doc = lxml.html.document_fromstring(markup)
except etree.XMLSyntaxError:
text = htmllaundry.StripMarkup(markup)
text = strip_markup(markup)
text = text.replace("&#13", "\n")
doc.add_paragraph(text)
return doc
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/configure.zcml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<include package="plone.app.dexterity" />
<include package="plone.app.vocabularies" />
<include package="plone.uuid" />
<include package="htmllaundry" />
<include package="euphorie.htmllaundry" />
<include package="Products.membrane" />

<include file="permissions.zcml" />
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"""

from euphorie.content.user import BaseValidator
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plonetheme.nuplone.z3cform.directives import Dependency
from z3c.form.interfaces import IForm
from z3c.form.interfaces import IValidator
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/help.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from .. import MessageFactory as _
from euphorie.content.utils import StripMarkup
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone.app.z3cform.wysiwyg import WysiwygFieldWidget
from plone.autoform import directives
from plone.indexer import indexer
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from Acquisition import aq_chain
from euphorie.content.dependency import ConditionalTextLine
from euphorie.content.utils import ensure_image_size
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone.app.dexterity.behaviors.metadata import IBasic
from plone.app.z3cform.wysiwyg import WysiwygFieldWidget
from plone.autoform import directives
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/page.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .. import MessageFactory as _
from euphorie.content.behaviour.richdescription import IRichDescription
from euphorie.content.utils import StripMarkup
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone.app.dexterity.behaviors.metadata import IBasic
from plone.app.z3cform.wysiwyg import WysiwygFieldWidget
from plone.autoform import directives
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/risk.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from Acquisition import aq_chain
from Acquisition import aq_inner
from euphorie.content.utils import ensure_image_size
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone import api
from plone.app.dexterity.behaviors.metadata import IBasic
from plone.autoform import directives
Expand Down
2 changes: 1 addition & 1 deletion src/euphorie/content/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from euphorie.content.dependency import ConditionalTextLine
from euphorie.content.utils import get_tool_type_default
from euphorie.content.utils import IToolTypesInfo
from htmllaundry.z3cform import HtmlText
from euphorie.htmllaundry.z3cform import HtmlText
from plone.app.dexterity.behaviors.metadata import IBasic
from plone.app.z3cform.wysiwyg import WysiwygFieldWidget
from plone.autoform import directives
Expand Down
9 changes: 9 additions & 0 deletions src/euphorie/htmllaundry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# htmllaundry was originally a library developed by Wichert Akkerman
#
# See: https://github.com/syslabcom/htmllaundry/tree/master
#
# A subset of the now unmaintained package code was copied and pasted
# into euphorie.htmllaundry
#
# The original htmllaundry package was licensed under the BSD license
#
113 changes: 113 additions & 0 deletions src/euphorie/htmllaundry/cleaners.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
try:
from lxml_html_clean.clean import _find_external_links
from lxml_html_clean.clean import Cleaner
except ImportError:
# BBB for lxml 5.1 or earlier, Plone 6.0.10 or earlier
from lxml.html.clean import _find_external_links
from lxml.html.clean import Cleaner


marker = []


class LaundryCleaner(Cleaner):
link_target = marker

def __call__(self, doc):
super().__call__(doc)
if self.link_target is not marker:
self.force_link_target(doc, self.link_target)

def force_link_target(self, doc, target):
for el in _find_external_links(doc):
if target is None:
if "target" in el.attrib:
del el.attrib["target"]
elif isinstance(target, (list, tuple)):
el.set("target", target[0])
else:
el.set("target", target)


DocumentCleaner = LaundryCleaner(
page_structure=False,
remove_unknown_tags=False,
allow_tags=[
"blockquote",
"a",
"img",
"em",
"p",
"strong",
"h3",
"h4",
"h5",
"ul",
"ol",
"li",
"sub",
"sup",
"abbr",
"acronym",
"dl",
"dt",
"dd",
"cite",
"dft",
"br",
"table",
"tr",
"td",
"th",
"thead",
"tbody",
"tfoot",
],
safe_attrs_only=True,
add_nofollow=True,
scripts=True,
javascript=True,
comments=False,
style=True,
links=False,
meta=False,
processing_instructions=False,
frames=False,
annoying_tags=False,
)


# Useful for line fields such as titles
LineCleaner = LaundryCleaner(
page_structure=False,
safe_attrs_only=True,
remove_unknown_tags=False, # Weird API..
allow_tags=["em", "strong"],
add_nofollow=True,
scripts=True,
javascript=True,
comments=False,
style=True,
processing_instructions=False,
frames=False,
annoying_tags=False,
)

CommentCleaner = LaundryCleaner(
page_structure=False,
safe_attrs_only=True,
remove_unknown_tags=False, # Weird API..
allow_tags=["blockquote", "a", "em", "p", "strong"],
add_nofollow=True,
scripts=False,
javascript=True,
comments=False,
style=True,
processing_instructions=False,
frames=False,
annoying_tags=False,
link_target=["_blank"],
)


__all__ = ["DocumentCleaner", "LineCleaner", "CommentCleaner"]
5 changes: 5 additions & 0 deletions src/euphorie/htmllaundry/configure.zcml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<configure xmlns="http://namespaces.zope.org/zope">

<adapter factory=".z3cform.HtmlDataConverter" />

</configure>
Empty file.
Loading
Loading