From 051b843816b649e32236670b1bbecc35228e6095 Mon Sep 17 00:00:00 2001 From: Shaun Martin Date: Sat, 8 Jun 2024 10:25:56 -0500 Subject: [PATCH] fix: strip hash marks from file names (#20) --- src/lib/util.ts | 20 +++++++++++++++++++- src/parse.ts | 20 +------------------- test/parse.test.ts | 21 +-------------------- test/util.test.ts | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 40 deletions(-) create mode 100644 test/util.test.ts diff --git a/src/lib/util.ts b/src/lib/util.ts index da0d99a..065ed01 100644 --- a/src/lib/util.ts +++ b/src/lib/util.ts @@ -24,10 +24,28 @@ export const cleanTitle = (title: string) => { // eg: Bitcoin prices edges lower after "Halving" concludes .replace('"', "'") // assume that others can simply be nuked - .replace(/[\*"\\/<>:\?]/g, ''); + .replace(/[\*"\\/#<>:\?]/g, ''); }; +export const cleanTag = (text: string, tagCase: StringCase): string => { + const other = new RegExp(/[^\w\-\/]+/g); + const extraWhitespace = new RegExp(/\s{2,}/); + return updateStringCase( + text + // & is used almost exclusively to mean "and" + // wrapping the word with spaces so updateStringCase handles it gracefully later + .replace('&', ' and ') + // : is used mainly for categories. TODO: look for "Categor(y|ies)" and strip it? + .replace(':',"/") + // use spaces in place of other invalid chars to maintain word separation + .replace(other, ' ') + // collapse multiple spaces into a single space + .replace(extraWhitespace, ' ') + .trim(), + tagCase); +}; + export const updateStringCase = (text: string, targetCase: StringCase) => { switch (targetCase) { case "PascalCase": diff --git a/src/parse.ts b/src/parse.ts index ed58cd8..ee19d0b 100644 --- a/src/parse.ts +++ b/src/parse.ts @@ -2,7 +2,7 @@ import { Readability } from "@mozilla/readability"; import { htmlToMarkdown, requestUrl, sanitizeHTMLToDom } from "obsidian"; import { logger } from "./lib/logger"; import type { StringCase } from "./lib/string-case"; -import { isEmpty, updateStringCase } from "./lib/util"; +import { cleanTag, isEmpty } from "./lib/util"; import type { FormatterArgs, IArticle, IArticleMetadata, IArticleTags, TFrontMatterProps } from "./types"; export const fixRelativeLinks = (html: string, articleUrl: string) => { @@ -40,24 +40,6 @@ export const parsePage = (doc: Document) => { return article; }; -export const cleanTag = (text: string, tagCase: StringCase): string => { - const other = new RegExp(/[^\w\-\/]+/g); - const extraWhitespace = new RegExp(/\s{2,}/); - return updateStringCase( - text - // & is used almost exclusively to mean "and" - // wrapping the word with spaces so updateStringCase handles it gracefully later - .replace('&', ' and ') - // : is used mainly for categories. TODO: look for "Categor(y|ies)" and strip it? - .replace(':',"/") - // use spaces in place of other invalid chars to maintain word separation - .replace(other, ' ') - // collapse multiple spaces into a single space - .replace(extraWhitespace, ' ') - .trim(), - tagCase); -}; - export const parseMetadataTags = (elements: NodeListOf, tagPrefix: string, tagCase: StringCase) => { // Tags need to be split and reformatted: // - Must be alphanumeric (not numeric) diff --git a/test/parse.test.ts b/test/parse.test.ts index a10497a..e654c21 100644 --- a/test/parse.test.ts +++ b/test/parse.test.ts @@ -1,4 +1,4 @@ -import { cleanTag, mergeMetadata } from '../src/parse'; +import { mergeMetadata } from '../src/parse'; import type { IArticle, IArticleMetadata } from '../src/types'; describe('mergeMetadata', () => { @@ -30,22 +30,3 @@ describe('mergeMetadata', () => { expect(result.tags.length).toEqual(3); }); }); - -describe('cleanTag', () => { - it('should replace invalid characters', () => { - const strCase = "iKebab-case"; - expect(cleanTag("Cheese&Bacon", strCase)).toEqual("Cheese-and-Bacon"); - expect(cleanTag("C++",strCase)).toEqual("C"); - expect(cleanTag("Categories:Other", strCase)).toEqual("Categories/Other"); - expect(cleanTag("#hashtag", strCase)).toEqual("hashtag"); - expect(cleanTag("Why++would++you++write++it++like++this?", strCase)).toEqual("Why-would-you-write-it-like-this"); - }); - - it('should update string case', () => { - expect(cleanTag("Cheese&Bacon", "iKebab-case")).toEqual("Cheese-and-Bacon"); - expect(cleanTag("Cheese&Bacon", "PascalCase")).toEqual("CheeseAndBacon"); - expect(cleanTag("Cheese&Bacon", "camelCase")).toEqual("cheeseAndBacon"); - expect(cleanTag("Cheese&Bacon", "kebab-case")).toEqual("cheese-and-bacon"); - expect(cleanTag("Cheese&Bacon", "snake_case")).toEqual("cheese_and_bacon"); - }); -}) \ No newline at end of file diff --git a/test/util.test.ts b/test/util.test.ts new file mode 100644 index 0000000..3fdad55 --- /dev/null +++ b/test/util.test.ts @@ -0,0 +1,38 @@ +import { cleanTag, cleanTitle, updateStringCase } from "../src/lib/util"; + +describe('cleanTag', () => { + it('should replace invalid characters', () => { + const strCase = "iKebab-case"; + expect(cleanTag("Cheese&Bacon", strCase)).toEqual("Cheese-and-Bacon"); + expect(cleanTag("C++", strCase)).toEqual("C"); + expect(cleanTag("Categories:Other", strCase)).toEqual("Categories/Other"); + expect(cleanTag("#hashtag", strCase)).toEqual("hashtag"); + expect(cleanTag("Why++would++you++write++it++like++this?", strCase)).toEqual("Why-would-you-write-it-like-this"); + }); +}); + +describe('cleanTitle', () => { + it('should replace pipes and colons with hyphens', () => { + expect(cleanTitle("OpenNeRF: Open Set 3D Neural Scene Segmentation")) + .toEqual("OpenNeRF - Open Set 3D Neural Scene Segmentation"); + + expect(cleanTitle("Local News | Botched home sale costs man his real estate license")) + .toEqual("Local News - Botched home sale costs man his real estate license"); + + expect(cleanTitle("Blog|Some Title")).toEqual("Blog - Some Title"); + + expect(cleanTitle("Some Podcast #323")).toEqual("Some Podcast 323"); + }) +}); + +describe('updateStringCase', () => { + it('should update string case', () => { + const str = "Cheese and Bacon"; + expect(updateStringCase(str, "iKebab-case")).toEqual("Cheese-and-Bacon"); + expect(updateStringCase(str, "PascalCase")).toEqual("CheeseAndBacon"); + expect(updateStringCase(str, "camelCase")).toEqual("cheeseAndBacon"); + expect(updateStringCase(str, "kebab-case")).toEqual("cheese-and-bacon"); + expect(updateStringCase(str, "snake_case")).toEqual("cheese_and_bacon"); + }); +}); +