Skip to content

Commit

Permalink
fix: strip hash marks from file names (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
inhumantsar committed Jun 8, 2024
1 parent c17c2f2 commit 051b843
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 40 deletions.
20 changes: 19 additions & 1 deletion src/lib/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,28 @@ export const cleanTitle = (title: string) => {
// eg: Bitcoin prices edges lower after "Halving" concludes
.replace('"', "'")
// assume that others can simply be nuked
.replace(/[\*"\\/<>:\?]/g, '');
.replace(/[\*"\\/#<>:\?]/g, '');

};

export const cleanTag = (text: string, tagCase: StringCase): string => {
const other = new RegExp(/[^\w\-\/]+/g);
const extraWhitespace = new RegExp(/\s{2,}/);
return updateStringCase(
text
// & is used almost exclusively to mean "and"
// wrapping the word with spaces so updateStringCase handles it gracefully later
.replace('&', ' and ')
// : is used mainly for categories. TODO: look for "Categor(y|ies)" and strip it?
.replace(':',"/")
// use spaces in place of other invalid chars to maintain word separation
.replace(other, ' ')
// collapse multiple spaces into a single space
.replace(extraWhitespace, ' ')
.trim(),
tagCase);
};

export const updateStringCase = (text: string, targetCase: StringCase) => {
switch (targetCase) {
case "PascalCase":
Expand Down
20 changes: 1 addition & 19 deletions src/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { Readability } from "@mozilla/readability";
import { htmlToMarkdown, requestUrl, sanitizeHTMLToDom } from "obsidian";
import { logger } from "./lib/logger";
import type { StringCase } from "./lib/string-case";
import { isEmpty, updateStringCase } from "./lib/util";
import { cleanTag, isEmpty } from "./lib/util";
import type { FormatterArgs, IArticle, IArticleMetadata, IArticleTags, TFrontMatterProps } from "./types";

export const fixRelativeLinks = (html: string, articleUrl: string) => {
Expand Down Expand Up @@ -40,24 +40,6 @@ export const parsePage = (doc: Document) => {
return article;
};

export const cleanTag = (text: string, tagCase: StringCase): string => {
const other = new RegExp(/[^\w\-\/]+/g);
const extraWhitespace = new RegExp(/\s{2,}/);
return updateStringCase(
text
// & is used almost exclusively to mean "and"
// wrapping the word with spaces so updateStringCase handles it gracefully later
.replace('&', ' and ')
// : is used mainly for categories. TODO: look for "Categor(y|ies)" and strip it?
.replace(':',"/")
// use spaces in place of other invalid chars to maintain word separation
.replace(other, ' ')
// collapse multiple spaces into a single space
.replace(extraWhitespace, ' ')
.trim(),
tagCase);
};

export const parseMetadataTags = (elements: NodeListOf<HTMLMetaElement>, tagPrefix: string, tagCase: StringCase) => {
// Tags need to be split and reformatted:
// - Must be alphanumeric (not numeric)
Expand Down
21 changes: 1 addition & 20 deletions test/parse.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { cleanTag, mergeMetadata } from '../src/parse';
import { mergeMetadata } from '../src/parse';
import type { IArticle, IArticleMetadata } from '../src/types';

describe('mergeMetadata', () => {
Expand Down Expand Up @@ -30,22 +30,3 @@ describe('mergeMetadata', () => {
expect(result.tags.length).toEqual(3);
});
});

describe('cleanTag', () => {
it('should replace invalid characters', () => {
const strCase = "iKebab-case";
expect(cleanTag("Cheese&Bacon", strCase)).toEqual("Cheese-and-Bacon");
expect(cleanTag("C++",strCase)).toEqual("C");
expect(cleanTag("Categories:Other", strCase)).toEqual("Categories/Other");
expect(cleanTag("#hashtag", strCase)).toEqual("hashtag");
expect(cleanTag("Why++would++you++write++it++like++this?", strCase)).toEqual("Why-would-you-write-it-like-this");
});

it('should update string case', () => {
expect(cleanTag("Cheese&Bacon", "iKebab-case")).toEqual("Cheese-and-Bacon");
expect(cleanTag("Cheese&Bacon", "PascalCase")).toEqual("CheeseAndBacon");
expect(cleanTag("Cheese&Bacon", "camelCase")).toEqual("cheeseAndBacon");
expect(cleanTag("Cheese&Bacon", "kebab-case")).toEqual("cheese-and-bacon");
expect(cleanTag("Cheese&Bacon", "snake_case")).toEqual("cheese_and_bacon");
});
})
38 changes: 38 additions & 0 deletions test/util.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { cleanTag, cleanTitle, updateStringCase } from "../src/lib/util";

describe('cleanTag', () => {
it('should replace invalid characters', () => {
const strCase = "iKebab-case";
expect(cleanTag("Cheese&Bacon", strCase)).toEqual("Cheese-and-Bacon");
expect(cleanTag("C++", strCase)).toEqual("C");
expect(cleanTag("Categories:Other", strCase)).toEqual("Categories/Other");
expect(cleanTag("#hashtag", strCase)).toEqual("hashtag");
expect(cleanTag("Why++would++you++write++it++like++this?", strCase)).toEqual("Why-would-you-write-it-like-this");
});
});

describe('cleanTitle', () => {
it('should replace pipes and colons with hyphens', () => {
expect(cleanTitle("OpenNeRF: Open Set 3D Neural Scene Segmentation"))
.toEqual("OpenNeRF - Open Set 3D Neural Scene Segmentation");

expect(cleanTitle("Local News | Botched home sale costs man his real estate license"))
.toEqual("Local News - Botched home sale costs man his real estate license");

expect(cleanTitle("Blog|Some Title")).toEqual("Blog - Some Title");

expect(cleanTitle("Some Podcast #323")).toEqual("Some Podcast 323");
})
});

describe('updateStringCase', () => {
it('should update string case', () => {
const str = "Cheese and Bacon";
expect(updateStringCase(str, "iKebab-case")).toEqual("Cheese-and-Bacon");
expect(updateStringCase(str, "PascalCase")).toEqual("CheeseAndBacon");
expect(updateStringCase(str, "camelCase")).toEqual("cheeseAndBacon");
expect(updateStringCase(str, "kebab-case")).toEqual("cheese-and-bacon");
expect(updateStringCase(str, "snake_case")).toEqual("cheese_and_bacon");
});
});

0 comments on commit 051b843

Please sign in to comment.