From 2a2259a2608ef7e9420d9d3fa59f55530d662ae9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Edenstr=C3=B6m?= <jonathan@edenstrom.io>
Date: Wed, 6 Oct 2021 21:40:33 +0200
Subject: [PATCH] fix: failing html parsing

---
 libs/api-skolplattformen/lib/parseHtml.ts | 49 ++++++++++++-----------
 1 file changed, 26 insertions(+), 23 deletions(-)
diff --git a/libs/api-skolplattformen/lib/parseHtml.ts b/libs/api-skolplattformen/lib/parseHtml.ts
index a7847ed3f..62592412f 100644
--- a/libs/api-skolplattformen/lib/parseHtml.ts
+++ b/libs/api-skolplattformen/lib/parseHtml.ts
@@ -1,7 +1,7 @@
-import * as h2m from 'h2m'
-import { htmlDecode } from 'js-htmlencode'
+import h2m from 'h2m'
 import { decode } from 'he'
-import { parse, HTMLElement, TextNode } from 'node-html-parser'
+import { htmlDecode } from 'js-htmlencode'
+import { HTMLElement, parse, TextNode } from 'node-html-parser'
 
 const noChildren = ['strong', 'b', 'em', 'i', 'u', 's']
 const trimNodes = [...noChildren, 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'a']
@@ -40,45 +40,48 @@ const deepClean = (node: HTMLElement): HTMLElement => {
   return cleaned
 }
 
-const rearrangeWhitespace = (html: string = ''): string => {
+const rearrangeWhitespace = (html = ''): string => {
   let content = html
-  .replace(/<span[^>]*>/gm, '')
-  .split('</span>').join('')
-  .replace(/<div[^>]*>/gm, '')
-  .split('</div>').join('')
-  .split('&#160;').join('&amp;nbsp;')
-  
+    .replace(/<span[^>]*>/gm, '')
+    .split('</span>')
+    .join('')
+    .replace(/<div[^>]*>/gm, '')
+    .split('</div>')
+    .join('')
+    .split('&#160;')
+    .join('&amp;nbsp;')
+
   // FIXME: Make a loop that doesn't break linting
   trimNodes.forEach((trimNode) => {
     content = content.split(`<${trimNode}> `).join(` <${trimNode}>`)
     content = content.split(` </${trimNode}>`).join(`</${trimNode}> `)
     content = content.split(`<${trimNode}>&amp;nbsp;`).join(` <${trimNode}>`)
-    content = content.split(`&amp;nbsp;</${trimNode}>`).join(`</${trimNode}> `)    
+    content = content.split(`&amp;nbsp;</${trimNode}>`).join(`</${trimNode}> `)
   })
-    
+
   trimNodes.forEach((trimNode) => {
     content = content.split(`<${trimNode}> `).join(` <${trimNode}>`)
     content = content.split(` </${trimNode}>`).join(`</${trimNode}> `)
     content = content.split(`<${trimNode}>&amp;nbsp;`).join(` <${trimNode}>`)
-    content = content.split(`&amp;nbsp;</${trimNode}>`).join(`</${trimNode}> `)    
+    content = content.split(`&amp;nbsp;</${trimNode}>`).join(`</${trimNode}> `)
   })
   trimNodes.forEach((trimNode) => {
     content = content.split(`<${trimNode}> `).join(` <${trimNode}>`)
     content = content.split(` </${trimNode}>`).join(`</${trimNode}> `)
     content = content.split(`<${trimNode}>&amp;nbsp;`).join(` <${trimNode}>`)
-    content = content.split(`&amp;nbsp;</${trimNode}>`).join(`</${trimNode}> `)    
+    content = content.split(`&amp;nbsp;</${trimNode}>`).join(`</${trimNode}> `)
   })
   trimNodes.forEach((trimNode) => {
     content = content.split(`<${trimNode}> `).join(` <${trimNode}>`)
     content = content.split(` </${trimNode}>`).join(`</${trimNode}> `)
     content = content.split(`<${trimNode}>&amp;nbsp;`).join(` <${trimNode}>`)
-    content = content.split(`&amp;nbsp;</${trimNode}>`).join(`</${trimNode}> `)    
+    content = content.split(`&amp;nbsp;</${trimNode}>`).join(`</${trimNode}> `)
   })
 
   return content
 }
 
-export const clean = (html: string = ''): string =>
+export const clean = (html = ''): string =>
   deepClean(parse(decode(html))).outerHTML
 
 interface Node {
@@ -93,15 +96,15 @@ const overides = {
   img: (node: Node) => `![${node.attrs.title || ''}](${node.attrs.src})`,
   i: (node: Node) => `*${node.md}*`,
   b: (node: Node) => `**${node.md}**`,
-  'h1': (node: Node) => `# ${node.md}\n`,
-  'h2': (node: Node) => `## ${node.md}\n`,
-  'h3': (node: Node) => `### ${node.md}\n`,
-  'h4': (node: Node) => `#### ${node.md}\n`,
-  'h5': (node: Node) => `##### ${node.md}\n`,
-  'h6': (node: Node) => `###### ${node.md}\n`,
+  h1: (node: Node) => `# ${node.md}\n`,
+  h2: (node: Node) => `## ${node.md}\n`,
+  h3: (node: Node) => `### ${node.md}\n`,
+  h4: (node: Node) => `#### ${node.md}\n`,
+  h5: (node: Node) => `##### ${node.md}\n`,
+  h6: (node: Node) => `###### ${node.md}\n`,
 }
 
-export const toMarkdown = (html: string): string => {
+export const toMarkdown = (html?: string): string => {
   const rearranged = rearrangeWhitespace(html)
   const trimmed = clean(rearranged)
   const markdown = h2m(trimmed, { overides, converter })