From 051b843816b649e32236670b1bbecc35228e6095 Mon Sep 17 00:00:00 2001
From: Shaun Martin <inhumantsar@protonmail.com>
Date: Sat, 8 Jun 2024 10:25:56 -0500
Subject: [PATCH] fix: strip hash marks from file names (#20)

---
 src/lib/util.ts    | 20 +++++++++++++++++++-
 src/parse.ts       | 20 +-------------------
 test/parse.test.ts | 21 +--------------------
 test/util.test.ts  | 38 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 40 deletions(-)
 create mode 100644 test/util.test.ts
diff --git a/src/lib/util.ts b/src/lib/util.ts
index da0d99a..065ed01 100644
--- a/src/lib/util.ts
+++ b/src/lib/util.ts
@@ -24,10 +24,28 @@ export const cleanTitle = (title: string) => {
         // eg: Bitcoin prices edges lower after "Halving" concludes
         .replace('"', "'")
         // assume that others can simply be nuked
-        .replace(/[\*"\\/<>:\?]/g, '');
+        .replace(/[\*"\\/#<>:\?]/g, '');
 
 };
 
+export const cleanTag = (text: string, tagCase: StringCase): string => {
+    const other = new RegExp(/[^\w\-\/]+/g);
+    const extraWhitespace = new RegExp(/\s{2,}/);
+    return updateStringCase(
+        text
+            // & is used almost exclusively to mean "and"
+            // wrapping the word with spaces so updateStringCase handles it gracefully later
+            .replace('&', ' and ')
+            // : is used mainly for categories. TODO: look for "Categor(y|ies)" and strip it?
+            .replace(':',"/")
+            // use spaces in place of other invalid chars to maintain word separation
+            .replace(other, ' ')
+            // collapse multiple spaces into a single space
+            .replace(extraWhitespace, ' ')
+            .trim(),
+        tagCase);
+};
+
 export const updateStringCase = (text: string, targetCase: StringCase) => {
     switch (targetCase) {
         case "PascalCase":
diff --git a/src/parse.ts b/src/parse.ts
index ed58cd8..ee19d0b 100644
--- a/src/parse.ts
+++ b/src/parse.ts
@@ -2,7 +2,7 @@ import { Readability } from "@mozilla/readability";
 import { htmlToMarkdown, requestUrl, sanitizeHTMLToDom } from "obsidian";
 import { logger } from "./lib/logger";
 import type { StringCase } from "./lib/string-case";
-import { isEmpty, updateStringCase } from "./lib/util";
+import { cleanTag, isEmpty } from "./lib/util";
 import type { FormatterArgs, IArticle, IArticleMetadata, IArticleTags, TFrontMatterProps } from "./types";
 
 export const fixRelativeLinks = (html: string, articleUrl: string) => {
@@ -40,24 +40,6 @@ export const parsePage = (doc: Document) => {
     return article;
 };
 
-export const cleanTag = (text: string, tagCase: StringCase): string => {
-    const other = new RegExp(/[^\w\-\/]+/g);
-    const extraWhitespace = new RegExp(/\s{2,}/);
-    return updateStringCase(
-        text
-            // & is used almost exclusively to mean "and"
-            // wrapping the word with spaces so updateStringCase handles it gracefully later
-            .replace('&', ' and ')
-            // : is used mainly for categories. TODO: look for "Categor(y|ies)" and strip it?
-            .replace(':',"/")
-            // use spaces in place of other invalid chars to maintain word separation
-            .replace(other, ' ')
-            // collapse multiple spaces into a single space
-            .replace(extraWhitespace, ' ')
-            .trim(),
-        tagCase);
-};
-
 export const parseMetadataTags = (elements: NodeListOf<HTMLMetaElement>, tagPrefix: string, tagCase: StringCase) => {
     // Tags need to be split and reformatted:
     //   - Must be alphanumeric (not numeric)
diff --git a/test/parse.test.ts b/test/parse.test.ts
index a10497a..e654c21 100644
--- a/test/parse.test.ts
+++ b/test/parse.test.ts
@@ -1,4 +1,4 @@
-import { cleanTag, mergeMetadata } from '../src/parse';
+import { mergeMetadata } from '../src/parse';
 import type { IArticle, IArticleMetadata } from '../src/types';
 
 describe('mergeMetadata', () => {
@@ -30,22 +30,3 @@ describe('mergeMetadata', () => {
         expect(result.tags.length).toEqual(3);
     });
 });
-
-describe('cleanTag', () => {
-    it('should replace invalid characters', () => {
-        const strCase = "iKebab-case";
-        expect(cleanTag("Cheese&Bacon", strCase)).toEqual("Cheese-and-Bacon");
-        expect(cleanTag("C++",strCase)).toEqual("C");
-        expect(cleanTag("Categories:Other", strCase)).toEqual("Categories/Other");
-        expect(cleanTag("#hashtag", strCase)).toEqual("hashtag");
-        expect(cleanTag("Why++would++you++write++it++like++this?", strCase)).toEqual("Why-would-you-write-it-like-this");
-    });
-
-    it('should update string case', () => {
-        expect(cleanTag("Cheese&Bacon", "iKebab-case")).toEqual("Cheese-and-Bacon");
-        expect(cleanTag("Cheese&Bacon", "PascalCase")).toEqual("CheeseAndBacon");
-        expect(cleanTag("Cheese&Bacon", "camelCase")).toEqual("cheeseAndBacon");
-        expect(cleanTag("Cheese&Bacon", "kebab-case")).toEqual("cheese-and-bacon");
-        expect(cleanTag("Cheese&Bacon", "snake_case")).toEqual("cheese_and_bacon");
-    });
-})
\ No newline at end of file
diff --git a/test/util.test.ts b/test/util.test.ts
new file mode 100644
index 0000000..3fdad55
--- /dev/null
+++ b/test/util.test.ts
@@ -0,0 +1,38 @@
+import { cleanTag, cleanTitle, updateStringCase } from "../src/lib/util";
+
+describe('cleanTag', () => {
+    it('should replace invalid characters', () => {
+        const strCase = "iKebab-case";
+        expect(cleanTag("Cheese&Bacon", strCase)).toEqual("Cheese-and-Bacon");
+        expect(cleanTag("C++", strCase)).toEqual("C");
+        expect(cleanTag("Categories:Other", strCase)).toEqual("Categories/Other");
+        expect(cleanTag("#hashtag", strCase)).toEqual("hashtag");
+        expect(cleanTag("Why++would++you++write++it++like++this?", strCase)).toEqual("Why-would-you-write-it-like-this");
+    });
+});
+
+describe('cleanTitle', () => {
+    it('should replace pipes and colons with hyphens', () => {
+        expect(cleanTitle("OpenNeRF: Open Set 3D Neural Scene Segmentation"))
+            .toEqual("OpenNeRF - Open Set 3D Neural Scene Segmentation");
+
+        expect(cleanTitle("Local News | Botched home sale costs man his real estate license"))
+            .toEqual("Local News - Botched home sale costs man his real estate license");
+
+        expect(cleanTitle("Blog|Some Title")).toEqual("Blog - Some Title");
+
+        expect(cleanTitle("Some Podcast #323")).toEqual("Some Podcast 323");
+    })
+});
+
+describe('updateStringCase', () => {
+    it('should update string case', () => {
+        const str = "Cheese and Bacon";
+        expect(updateStringCase(str, "iKebab-case")).toEqual("Cheese-and-Bacon");
+        expect(updateStringCase(str, "PascalCase")).toEqual("CheeseAndBacon");
+        expect(updateStringCase(str, "camelCase")).toEqual("cheeseAndBacon");
+        expect(updateStringCase(str, "kebab-case")).toEqual("cheese-and-bacon");
+        expect(updateStringCase(str, "snake_case")).toEqual("cheese_and_bacon");
+    });
+});
+