From f81beea264c0db1b3f68d0ff7ac19b7cab62aa76 Mon Sep 17 00:00:00 2001 From: Benjamin Calderon Date: Sun, 18 Aug 2024 21:57:48 -0400 Subject: [PATCH 1/3] initial --- src/core/config/Categories.json | 3 +- src/core/operations/Ngram.mjs | 52 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 src/core/operations/Ngram.mjs diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index bebdd6a5e..79ff60077 100644 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -320,7 +320,8 @@ "Unescape string", "Pseudo-Random Number Generator", "Sleep", - "File Tree" + "File Tree", + "Ngram" ] }, { diff --git a/src/core/operations/Ngram.mjs b/src/core/operations/Ngram.mjs new file mode 100644 index 000000000..fbcbfce7e --- /dev/null +++ b/src/core/operations/Ngram.mjs @@ -0,0 +1,52 @@ +/** + * @author benjcal [benj.calderon@gmail.com] + * @copyright Crown Copyright 2024 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; + +/** + * ngram operation + */ +class Ngram extends Operation { + + /** + * Ngram constructor + */ + constructor() { + super(); + + this.name = "Ngram"; + this.module = "Default"; + this.description = "Extracts n-grams from the input text. N-grams are contiguous sequences of n characters from a given text sample."; + this.infoURL = "https://wikipedia.org/wiki/N-gram"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + name: "N-gram size", + type: "number", + value: 3 + }, + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const n = args[0]; + const ngrams = []; + for (let i = 0; i <= input.length - n; i++) { + ngrams.push(input.slice(i, i + n)); + } + + return ngrams.join("\n"); + } + +} + +export default Ngram; From 4e6efead61c84d8cf19db8b25bcb8f215608aee4 Mon Sep 17 00:00:00 2001 From: Benjamin Calderon Date: Sun, 18 Aug 2024 22:01:08 -0400 Subject: [PATCH 2/3] rename --- src/core/config/Categories.json | 2 +- src/core/operations/Ngram.mjs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 79ff60077..6389414e9 100644 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -321,7 +321,7 @@ "Pseudo-Random Number Generator", "Sleep", "File Tree", - "Ngram" + "N-gram" ] }, { diff --git a/src/core/operations/Ngram.mjs b/src/core/operations/Ngram.mjs index fbcbfce7e..6471e4d89 100644 --- a/src/core/operations/Ngram.mjs +++ b/src/core/operations/Ngram.mjs @@ -17,7 +17,7 @@ class Ngram extends Operation { constructor() { super(); - this.name = "Ngram"; + this.name = "N-gram"; this.module = "Default"; this.description = "Extracts n-grams from the input text. N-grams are contiguous sequences of n characters from a given text sample."; this.infoURL = "https://wikipedia.org/wiki/N-gram"; From 619752373579850591b07c14b29942110e7f4937 Mon Sep 17 00:00:00 2001 From: Benjamin Calderon Date: Sun, 18 Aug 2024 22:07:17 -0400 Subject: [PATCH 3/3] add join delimiter --- src/core/operations/Ngram.mjs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/core/operations/Ngram.mjs b/src/core/operations/Ngram.mjs index 6471e4d89..269c788dc 100644 --- a/src/core/operations/Ngram.mjs +++ b/src/core/operations/Ngram.mjs @@ -5,6 +5,7 @@ */ import Operation from "../Operation.mjs"; +import {JOIN_DELIM_OPTIONS} from "../lib/Delim.mjs"; /** * ngram operation @@ -29,6 +30,11 @@ class Ngram extends Operation { type: "number", value: 3 }, + { + "name": "Join delimiter", + "type": "editableOptionShort", + "value": JOIN_DELIM_OPTIONS + } ]; } @@ -38,13 +44,15 @@ class Ngram extends Operation { * @returns {string} */ run(input, args) { - const n = args[0]; + const nGramSize = args[0], + joinDelim = args[1]; + const ngrams = []; - for (let i = 0; i <= input.length - n; i++) { - ngrams.push(input.slice(i, i + n)); + for (let i = 0; i <= input.length - nGramSize; i++) { + ngrams.push(input.slice(i, i + nGramSize)); } - return ngrams.join("\n"); + return ngrams.join(joinDelim); } }