Skip to content

Commit

Permalink
Update sanitizier for RTF to HTML pasting.
Browse files Browse the repository at this point in the history
  • Loading branch information
jimafisk committed Aug 6, 2024
1 parent 1aa455f commit b836e45
Showing 1 changed file with 132 additions and 43 deletions.
175 changes: 132 additions & 43 deletions defaults/core/cms/html_sanitizer.svelte
Original file line number Diff line number Diff line change
@@ -1,69 +1,158 @@
<script context="module">
//JavaScript HTML Sanitizer v2.0.2, (c) Alexander Yumashev, Jitbit Software.
//homepage https://github.com/jitbit/HtmlSanitizer
//License: MIT https://github.com/jitbit/HtmlSanitizer/blob/master/LICENSE
//Modified to fit project needs
// https://developer.mozilla.org/en-US/docs/Web/API/Element/paste_event
export const plaintextPaste = e => {
e.preventDefault();
const rawPaste = (e.clipboardData || window.clipboardData).getData("text");
if (rawPaste === "") {
return;
}
const paste = HtmlSanitizer.SanitizeHtml(rawPaste);
document.execCommand("insertHTML", false,paste);
}
// https://developer.mozilla.org/en-US/docs/Web/API/Element/paste_event
export const plaintextPaste = e => {
e.preventDefault();
const rawHtmlPaste = (e.clipboardData || window.clipboardData).getData("text/html");
if (rawHtmlPaste === "") {
return;
}
const paste = HtmlSanitizer.SanitizeHtml(rawHtmlPaste);
document.execCommand("insertHTML", false, paste);
}
const HtmlSanitizer = typeof window === "undefined" ? null : new (function () {
const _tagWhitelist = {
'A': true, 'ABBR': true, 'B': true, 'BLOCKQUOTE': true, 'BODY': true, 'BR': true, 'CENTER': true, 'CODE': true, 'DD': true, 'DIV': true, 'DL': true, 'DT': true, 'EM': true, 'FONT': true,
'H1': true, 'H2': true, 'H3': true, 'H4': true, 'H5': true, 'H6': true, 'HR': true, 'I': true, 'IMG': true, 'LABEL': true, 'LI': true, 'OL': true, 'P': true, 'PRE': true,
'SMALL': true, 'SOURCE': true, 'SPAN': true, 'STRONG': true, 'SUB': true, 'SUP': true, 'TABLE': true, 'TBODY': true, 'TR': true, 'TD': true, 'TH': true, 'THEAD': true, 'UL': true, 'U': true, 'VIDEO': true
'A': true,
'ABBR': true,
'B': true,
'BLOCKQUOTE': true,
'BODY': true,
'BR': true,
'CENTER': true,
'CODE': true,
'DD': true,
'DIV': true,
'DL': true,
'DT': true,
'EM': true,
'FONT': true,
'H1': true,
'H2': true,
'H3': true,
'H4': true,
'H5': true,
'H6': true,
'HR': true,
'I': true,
'IMG': true,
'LABEL': true,
'LI': true,
'OL': true,
'P': true,
'PRE': true,
'S': true,
'STRIKE': true,
'SMALL': true,
'SOURCE': true,
'SPAN': true,
'STRONG': true,
'SUB': true,
'SUP': true,
'TABLE': true,
'TBODY': true,
'TR': true,
'TD': true,
'TH': true,
'THEAD': true,
'UL': true,
'U': true,
'VIDEO': true,
};
const _contentTagWhiteList = { 'FORM': true, 'GOOGLE-SHEETS-HTML-ORIGIN': true }; //tags that will be converted to DIVs
const _attributeWhitelist = { 'align': true, 'color': true, 'controls': true, 'height': true, 'href': true, 'id': true, 'src': true, 'style': false, 'target': true, 'title': true, 'type': true, 'width': true };
const _cssWhitelist = { 'background-color': false, 'color': false, 'font-size': false, 'font-weight': false, 'text-align': false, 'text-decoration': false, 'width': false };
const _schemaWhiteList = [ 'http:', 'https:', 'data:', 'm-files:', 'file:', 'ftp:', 'mailto:', 'pw:' ]; //which "protocols" are allowed in "href", "src" etc
const _uriAttributes = { 'href': true, 'action': true };
// tags that will be converted to new mapped tag
// note: must be in whitelist
const _tagConvertList = {
'STRIKE': 'S',
};
// tags that will be converted to DIVs
const _contentTagWhiteList = {
'FORM': true,
'GOOGLE-SHEETS-HTML-ORIGIN': true,
'P': true,
};
const _attributeWhitelist = {
'align': true,
'color': true,
'controls': true,
'height': true,
'href': true,
'id': true,
'src': true,
'style': false,
'target': true,
'title': true,
'type': true,
'width': true,
};
const _cssWhitelist = {
'background-color': false,
'color': false,
'font-size': false,
'font-weight': false,
'text-align': false,
'text-decoration': false,
'width': false,
};
// which "protocols" are allowed in "href", "src" etc
const _schemaWhiteList = [
'http:',
'https:',
'data:',
'm-files:',
'file:',
'ftp:',
'mailto:',
'pw:',
];
const _uriAttributes = {
'href': true,
'action': true,
};
const _parser = new DOMParser();
this.SanitizeHtml = function (input, extraSelector) {
input = input.trim();
if (input == "") return ""; //to save performance
//firefox "bogus node" workaround for wysiwyg's
if (input == "<br>") return "";
if (input.indexOf("<body")==-1) input = "<body>" + input + "</body>"; //add "body" otherwise some tags are skipped, like <style>
let doc = _parser.parseFromString(input, "text/html");
//DOM clobbering check (damn you firefox)
if (doc.body.tagName !== 'BODY')
doc.body.remove();
if (typeof doc.createElement !== 'function')
doc.createElement.remove();
function makeSanitizedCopy(node) {
let newNode;
if (node.nodeType == Node.TEXT_NODE) {
newNode = node.cloneNode(true);
} else if (node.nodeType == Node.ELEMENT_NODE && (_tagWhitelist[node.tagName] || _contentTagWhiteList[node.tagName] || (extraSelector && node.matches(extraSelector)))) { //is tag allowed?
if (_contentTagWhiteList[node.tagName])
newNode = doc.createElement('DIV'); //convert to DIV
else if (_tagConvertList[node.tagName])
newNode = doc.createElement(_tagConvertList[node.tagName]); //convert to mapped tag
else
newNode = doc.createElement(node.tagName);
for (let i = 0; i < node.attributes.length; i++) {
let attr = node.attributes[i];
if (_attributeWhitelist[attr.name]) {
Expand All @@ -87,26 +176,26 @@
let subCopy = makeSanitizedCopy(node.childNodes[i]);
newNode.appendChild(subCopy, false);
}
//remove useless empty spans (lots of those when pasting from MS Outlook)
if ((newNode.tagName == "SPAN" || newNode.tagName == "B" || newNode.tagName == "I" || newNode.tagName == "U")
&& newNode.innerHTML.trim() == "") {
&& newNode.innerHTML.trim() == "") {
return doc.createDocumentFragment();
}
} else {
newNode = doc.createDocumentFragment();
}
return newNode;
};
let resultElement = makeSanitizedCopy(doc.body);
return resultElement.innerHTML
.replace(/<br[^>]*>(\S)/g, "<br>\n$1")
.replace(/div><div/g, "div>\n<div"); //replace is just for cleaner code
}
function startsWithAny(str, substrings) {
for (let i = 0; i < substrings.length; i++) {
if (str.indexOf(substrings[i]) == 0) {
Expand All @@ -115,10 +204,10 @@
}
return false;
}
this.AllowedTags = _tagWhitelist;
this.AllowedAttributes = _attributeWhitelist;
this.AllowedCssStyles = _cssWhitelist;
this.AllowedSchemas = _schemaWhiteList;
});
</script>
</script>

0 comments on commit b836e45

Please sign in to comment.